#!/usr/bin/perl -w use POSIX ":sys_wait_h"; use Mail::IMAPClient; use Mail::SpamAssassin; use Getopt::Long; use File::stat; use Data::Dumper; use Digest::SHA1 qw(sha1_hex); use Scalar::Util qw(reftype); use strict; use English; my $Verbosity = 0; my $ConfDir = "$ENV{HOME}/.spamassassin/"; my $ConfFile = "$ConfDir/IMAPAssassin.conf"; my $ProgramFile = $0; my $ProgramModificationTime = get_file_last_modified($ProgramFile); # Read in configuration file my %Conf; open CONF, $ConfFile; while ( my $line = ) { chomp $line; $line =~ s/^\s+//; $line =~ s/\s+$//; if ( substr( $line, 0, 1 ) eq "#" ) { next; } my ( $key, $value ) = split( /\s+/, $line ); $key =~ tr/[A-Z]/[a-z]/; $Conf{$key} = $value; } close CONF; # Set configuration variables with defaults my $INBOX = $Conf{inbox} || "INBOX"; my $SpamFolder = $Conf{spamfolder} || "spam"; my $Server = $Conf{server} || "imap"; my $Port = $Conf{port} || 143; my $User = $Conf{user} || $ENV{'USER'}; my $Password = $Conf{password}; my $Delay = $Conf{delay} || 60; Getopt::Long::Configure("bundling"); # Allow single-dash options to be groups: -vvvv = -v -v -v -v GetOptions( 'user=s' => \$User, 'password=s' => \$Password, 'inbox=s' => \$INBOX, 'spam-folder=s' => \$SpamFolder, 'server=s' => \$Server, 'port=i' => \$Port, 'delay=i' => \$Delay, 'verbose=i' => \$Verbosity, 'v+' => \$Verbosity ); my $PIDFile = "$ConfDir/IMAP.$User.pid"; my $MIDFile = "$ConfDir/IMAP.$User.mid"; die("$PROGRAM_NAME is already running for $User\n") if is_already_running(); print "Running at verbosity level $Verbosity\n" if ($Verbosity); my $ServerURL = "imap://$User\@$Server:$Port"; my $IMAP; my $idle; # Used to store handle returned by $IMAP->idle() my $MessageCount = 0; my $SpamCount = 0; unless ( $Password = $Conf{password} ) { system "stty -echo"; print STDERR "Password for $ServerURL: "; chomp( $Password = ); print STDERR "\n"; system "stty echo"; } my $Secret = $Conf{secret} || sha1_hex( $User . $Password . $Server . $Port ); # Catch signals $SIG{TERM} = \&catch_zap; $SIG{INT} = \&catch_zap; $SIG{PIPE} = \&catch_sigpipe; my $LastMID = get_saved_message_id(); my $PrevMID = $LastMID; # Write new PID file open( PIDFILE, ">$PIDFile" ); print PIDFILE $$, "\n"; close(PIDFILE); # Setup SpamAssassin: my $SpamTest = Mail::SpamAssassin->new( { debug => ( $Verbosity > 9 ) } ); $SpamTest->compile_now(1); # Keep track of when the user's config file was last changed so we can update later: my $SAUserPrefs = $SpamTest->{userprefs_filename} || $SpamTest->first_existing_path(@Mail::SpamAssassin::default_userprefs_path); print "SpamAssassin user_prefs: $SAUserPrefs\n" if $Verbosity > 4; my $LastConfigChange = get_file_last_modified($SAUserPrefs); my %Filtered = (); # Hash of already checked sequence #'s # Used in a header we add to rewritten messages to avoid rechecking without being spoofable: $SpamTest->{conf}->{headers_ham}->{IMAPAssassin} = $Secret; $SpamTest->{conf}->{headers_spam}->{IMAPAssassin} = $Secret; print "X-Spam-IMAPAssassin secret: $Secret\n" if $Verbosity > 6; while (1) { eval { print "Calling check_connection()\n" if $Verbosity > 3; check_connection(); if ( get_file_last_modified($ProgramFile) > $ProgramModificationTime ) { print "$ProgramFile has changed; exiting...\n"; #exec({ $0 } @ARGV); catch_zap(); } if ( get_file_last_modified($SAUserPrefs) > $LastConfigChange ) { print "$SAUserPrefs has changed; reloading...\n"; $SpamTest->read_cf( $SAUserPrefs, "user preferences" ); $SpamTest->compile_now(1); $LastConfigChange = get_file_last_modified($SAUserPrefs); } print "Retrieving IDs for unprocessed messages\n" if $Verbosity > 2; my @AllMIDs = $IMAP->unseen(); if ( @AllMIDs > 0 and ( !$AllMIDs[0] or $AllMIDs[0] < 0 ) ) { die("Error retrieving unseen message IDs: $!\n"); } my @ProcessedMIDs = $IMAP->search("HEADER X-Spam-IMAPAssassin $Secret"); my %seen; # lookup table my @MIDs; # answer # build lookup table @seen{@ProcessedMIDs} = (); foreach my $item (@AllMIDs) { push( @MIDs, $item ) unless exists $seen{$item}; } print "Checking up to " . scalar(@MIDs) . " unseen messages\n" if $Verbosity > 2; foreach my $MID (@MIDs) { if ( !( defined( $Filtered{$MID} ) and $Filtered{$MID} ) and $MID > $PrevMID ) { print "Checking $MID\n" if $Verbosity > 3; $Filtered{$MID} = 1; $LastMID = $MID; my $StartTime = time(); # Get message from IMAP, re-mark unread. Is it spam? $MessageCount++; my $Message = $IMAP->message_string($MID); $IMAP->deny_seeing($MID); if ( $Message =~ m/^X-Spam-Status: Yes/m ) { print "Message has already been flagged as spam - skipping\n" if $Verbosity; $SpamCount++; next; } my $MessageObject = $SpamTest->check_message_text($Message); my $IsSpam = $MessageObject->is_spam(); print "MID: $MID " . ( $IsSpam ? "Spam" : "" ) . " (" . $MessageObject->get_hits() . " / " . $MessageObject->get_required_hits() . " hits)\n"; if ( $IsSpam && $Verbosity > 1 ) { my $report = $MessageObject->get_report(); $report =~ s/\n/\n\t/mg; print "\t$report\n"; } if ($IsSpam) { $SpamCount++; # Get re-written version, make copy in new folder, delete original my $Spam = $MessageObject->rewrite_mail(); my $AppendStatus = $IMAP->append_string( $SpamFolder, $Spam ); if ($AppendStatus) { $IMAP->delete_message($MID); } else { print "Error moving spam message!\n"; } } elsif ( 1 == 1 ) { eval { # Rewrite the original message so we can see the SpamAssassin headers: print "\tRewriting message $MID: " if $Verbosity > 1; my $NewMessage = $MessageObject->rewrite_mail() or die("Couldn't retrieve message body: $!\n"); my @MessageFlags = $IMAP->flags($MID); if ( my $mfType = reftype(@MessageFlags) ) { unless ( $mfType == "ARRAY" ) { warn("Error retrieving message flags - got a $mfType instead of an array!\n"); print STDERR Dumper( \@MessageFlags ); @MessageFlags = (); } } # Check to see WTF the flags are sometimes being filled with the last command executed(!) # instead of the valid message flags: if ( $Verbosity > 4 ) { print "Message flags:\n"; print Dumper( \@MessageFlags ); } my $MessageInternalDate = $IMAP->internaldate($MID) or die("Couldn't retrieve internal date for $MID: $!\n"); if ( $MessageInternalDate =~ m/^\* NO/ ) { print "Error retrieving message date for $MID - message rewrite aborted\n"; exit 0; } print "\tMessage Internal Date: \n\t\t$MessageInternalDate\n" if $Verbosity > 2; my $new_mid = $IMAP->append_string( $INBOX, $NewMessage,, $MessageInternalDate ) or die "Could not rewrite original message: $@\n"; # Don't waste time refiltering the rewritten version: $Filtered{$new_mid} = 1; # Delete the original: $IMAP->delete_message($MID); #$IMAP->expunge(); print "done.\n" if $Verbosity > 1; }; if ($@) { print " error.\n"; if ($Verbosity) { print $IMAP->LastError(); print "-" x 78 . "\nLast Command:\n"; print $IMAP->LastIMAPCommand(); print "-" x 78 . "\nLast Result:\n"; print Dumper( $IMAP->Results ); print "-" x 78 . "\n"; } exit 1; } } $MessageObject->finish(); &cleanupchildren; print "\tProcessed $MID in " . ( time() - $StartTime ) . " seconds\n" if $Verbosity > 1; print_statistics(); $IMAP->expunge(); } else { print "Already processed $MID\n" if $Verbosity > 2; } } print "Done checking messages\n" if $Verbosity > 3; unless ( $idle = $IMAP->idle() ) { # This needs to handle an IDLE being immediately aborted because of new messages: # print "Unable to idle IMAP connection; disconnecting instead\n" if $Verbosity > 2; # $IMAP->disconnect(); } }; if ($@) { warn_and_sleep("Caught error: $@\n"); } sleep $Delay; } sub catch_zap { # Orderly shutdown, remove PID file unlink $PIDFile; open( MIDFILE, ">$MIDFile" ); print MIDFILE "$LastMID\n"; close(MIDFILE); print_statistics(); print "Exiting IMAPAssassin\n"; exit; } sub catch_sigpipe { my $le = $IMAP->LastError; chomp($le); print STDERR "Caught SIGPIPE. Last error: $le\n" if $Verbosity; undef $IMAP; undef $idle; check_connection(); } sub print_statistics { print "Total: $MessageCount messages processed, $SpamCount marked as spam"; if ( $MessageCount > 0 ) { printf( " %0.1f%% spam", 100 * $SpamCount / $MessageCount ); } print "\n"; # TODO: average time per message, min/max, etc. } sub numerically { return $_[0] <=> $_[1]; } sub cleanupchildren { while ( my $kid = waitpid( -1, &WNOHANG ) > 0 ) { print "Cleaned child pid $kid\n" if $Verbosity > 1; } } sub check_connection { if ( $IMAP and $idle ) { print "Resuming idled IMAP connection\n" if $Verbosity > 2; $IMAP->done(); undef $idle; } until ( $IMAP and $IMAP->IsConnected() ) { undef $IMAP; print "Attempting to connect to $ServerURL\n" if $Verbosity > 2; $IMAP = Mail::IMAPClient->new( Debug => ( $Verbosity > 5 ) ) or die("Couldn't instantiate IMAPClient: $!\n"); $IMAP->Timeout(15); $IMAP->Server($Server); $IMAP->User($User); $IMAP->Password($Password); $IMAP->Port($Port); unless ( $IMAP->connect() ) { warn_and_sleep("Couldn't connect to $ServerURL: $@"); next; } unless ( $IMAP->select($INBOX) ) { warn_and_sleep("Couldn't open $ServerURL/$INBOX: $@"); next; } $IMAP->Peek(); last; } print "Connected to $ServerURL\n" if $Verbosity > 2; } sub warn_and_sleep { chomp(@_); print join( "\n", @_ ) . "\n"; if ($IMAP) { print "Closing connection because of error\n" if $Verbosity > 2; undef $IMAP; } &cleanupchildren; print "Sleeping 5 seconds before retrying\n"; sleep 5; } sub get_file_last_modified { my $File = shift; my $st = stat($File) or die("Couldn't stat $File: $!\n"); return $st->mtime; } sub get_saved_message_id { my $mid; if ( -e $MIDFile ) { open( MIDFILE, "$MIDFile" ); $mid = ; chomp($mid); close(MIDFILE); } else { $mid = 0; } return $mid; } sub is_already_running { # See if PID mentioned in PID file (if any) is still around if ( -e $PIDFile ) { my $OldPID; open( PIDFILE, "<$PIDFile" ); $OldPID = ; chomp($OldPID); close(PIDFILE); if ( getpgrp($OldPID) > 0 ) { return 1; } } return 0; }