#!/usr/bin/perl -w # $Id: t-prot,v 1.51 2002/03/23 10:47:32 jochen Exp $ require 5.005; use strict; use Getopt::Mixed qw(nextOption); use vars qw( $VER $REV $REL $EX_OK $EX_USAGE $EX_DATAERR $EX_UNAVAILABLE $EX_BOUNCE $ad $ads $boun $cr $diff $elli $footers $hdrs $indent $lsig $maxsig $mda $ml $ms $mua $ofile $sendmail $sig $sysl $trad $trsp ); # Version info $VER = '0.54'; $REV = ''; $REL = q$Revision: 1.51 $; chop($REL); # From # (you might have to adjust those if not using GNU libc) $EX_OK = 0; $EX_USAGE = 64; $EX_DATAERR = 65; $EX_UNAVAILABLE = 69; $EX_BOUNCE = $EX_UNAVAILABLE; # Please adjust these vals to your needs: $maxsig = 4; # max. valid signature length $indent = '>'; # Indent string, regexp to identify a quoted line $sendmail = '/usr/sbin/sendmail -oi'; # MTA expecting mail on STDIN $boun = "Blocked by $0: This user does not accept TOFUed email. Please see and for more info. Have a nice day!\n"; $ofile = '-'; # use STDOUT if nothing is specified # end of user adjusted vals # help(): print help text and exit with appropriate exit code sub help { print "Usage: $0 [options] -a remove ad footers; requires -A -A=DIRECTORY ad footer directory, treat ad footers as signature -c merge multiple blank lines -d, --debug print notice to syslog when bouncing; requires -p --diff tolerate diffs appended *after* the signature -e force ellipsis for excessive punctuation -h, --help show this short help and exit -i=INFILE file to be read; '-' for STDIN (default) -L=DIRECTORY mailling list footer directory, treat mailing list footers as signature -l delete mailing list footer; requires -L -M, --mua=MUA turn on special treatment for some mail user agents -m delete MS style TOFU; careful: might be too agressive -o=OUTFILE file to be written to; '-' for STDOUT (default) -P=MESSAGE user defined bounce message; requires -p -p=ADDRESS redirect to ADDRESS if no TOFU was found -r delete mail header lines -S[=n] supress signatures with more than n lines; default is $maxsig if n not specified -s delete signature -t delete traditional style TOFU -v, --version show version string and exit -w delete trailing whitespaces\n"; exit($EX_USAGE); } # version(): print version info and exit with appropriate exit code sub version { print "$0 v$VER$REV ($REL), Jochen Striepe Get the latest version at \n"; exit($EX_OK); } # remove_footers(): remove any trailing appearance of footers contained # in the given directory. sub remove_footers { my $L = shift; # array of message lines my $S = shift; # array to store removed lines in my $F = shift; # footers dir name my $O = shift; # remove only one footer? if ($F && scalar(@$L)) { opendir(DIR, $F) || die "Could not open $F: $!"; my @feet = grep { /^[^.]/ && -f "$F/$_" } readdir DIR; closedir DIR; foreach my $f (@feet) { open(IN, "$F/$f") || die "Could not open $F/$f: $!"; my @l = ; close IN; while (scalar(@l)<=scalar(@$L)) { my $y = 0; for(my $x=1; $x<=scalar(@l); $x++) { chomp($l[scalar(@l)-$x]); if (index($$L[scalar(@$L)-$x], $l[scalar(@l)-$x])!=0) { $y = 1; } } if (!$y) { unshift(@$S, @$L[$#$L-$#l..$#$L]); splice(@$L, $#$L-$#l); while (scalar(@$L) && $$L[$#$L] =~ /^\s*$/) { unshift(@$S, pop(@$L)); } if ($O) { last; } } else { last; } } } } } # write_msg(): output sub write_msg { my $O = shift; my $l; open(OUT, $O) || die "Could not open $O: $!"; while (scalar(@_)) { $l = shift; if (defined $l) { $^W = 0; print OUT @$l; $^W = 1; } } close OUT; } # process_msg(): This one proc does *everything* what has to be done with # the lines of the message sub process_msg { my $lines = shift; my ($j, $x, $verb) = (0, 0, 0); my (@ads, @hdr, @bo1, @bo2, @ftr, @sig, @vrb, @att) = ((), (), (), (), (), (), (), (), ()); # First, remove and store lines we might need later... # Remove headers: for ($x=0; $x<$#$lines; $x++) { if (@$lines[$x] =~ /^$/) { last; }; } @hdr = @$lines[0..$x]; splice(@$lines, 0, $x+1); # See if we have a multipart content type. If yes, see if it is already # ripped (e.g. by mutt(1)), otherwise only leave the first part if it # is plain text (if not, we are done - non-text messages are not our # business). if (lc($mua) ne 'mutt') { for ($x=0; $x=scalar(@$lines)) { exit($EX_DATAERR); } if ($bar =~ /^text\/plain/i) { my $z; for ($z=1; $x+$z<@$lines; $z++) { if (index($$lines[$x+$z], '--'.$foo)==0) { last; } } if ($x+$z>=scalar(@$lines)) { exit($EX_DATAERR); } @bo2 = @$lines[$x+$z..$#$lines]; splice(@$lines, $x+$z); if ($$lines[$#$lines] =~ /^\s*$/) { unshift(@bo2, pop @$lines); } @bo1 = @$lines[0..$x]; splice(@$lines, 0, $x+1); last; } else { write_msg(($mda?"|$sendmail $mda":">$ofile"), ($hdrs?undef:\@hdr), $lines); exit; } } } } last; } } } # Protect verbatims: $verb = 0; for ($x=0; $x[]*\[-- Attachment #(\d+)(: .*)? --\]\s*$/ && (($1 ne '1') || ($x[]*\[-- Type: text\/plain/))) || ($$lines[$x] =~ /^[^>[]*\[-- End of .* data --\]\s*$/)) { @att = @$lines[$x..$#$lines]; splice(@$lines, $x); if (scalar(@$lines) && $$lines[$#$lines] =~ /^\s*$/) { unshift(@att, pop(@$lines)); } last; } } # Pipe message/rfc822 parts to another instance of process_msg() # for further processing. # Please note that we cannot see what a hierarchy the original # message had -- if there were message/rfc822 parts within other # message/rfc822 parts constellations can occur which we cannot # resolve. Therefore we simply do not even try to be smart. This # should work for most situations: if (scalar(@att)) { for ($x=0; $x<$#att; $x++) { if ($vrb[scalar(@$lines)+$x]) { next; } # The following regexp is quite ugly because for most # users the line is coloured using termcap... (bah!) if ($att[$x]=~/^[^>[]*\[-- Attachment #\d+(: .*)? --\]\s*$/ && $att[$x+1] =~ /^[^>[]*\[-- Type: message\/rfc822/) { $x += 2; while ($att[$x] !~ /^\s*$/) { $x++; } $x++; my @tmp = @att[$x..$#att]; process_msg(\@tmp); splice(@att, $x, scalar(@att)-$x, @tmp); } } } } # Remove ML footers: remove_footers($lines, \@ftr, $footers, undef); # Remove ad footers: remove_footers($lines, \@ads, $ads, undef); # Remove signature: if (scalar(@$lines)) { for ($x=0; $x$lsig))) { if ($lsig && !$sig) { push(@sig, "[---=| Overlong signature removed by $0: " . (scalar(@$lines)-$x) . " lines snipped |=---]\n"); } splice(@$lines, $x); } elsif ($#$lines-$x<=($lsig?$lsig:$maxsig)) { @sig = @$lines[$x..$#$lines]; splice(@$lines, $x); } last; } } } # Now care about TOFU. # One common mispractice is M$ style TOFU: if ($ms) { # bloat this array if you want more internationalization: my @tofu = ('Original Message', 'Ursprüngliche Nachricht', 'Ursprungliche Nachricht', 'Mensagem original'); DONE: for ($x=0; $x=0; $x--) { if ((!$vrb[$x]) && $$lines[$x] =~ /^\s*$/) { if ($t<2) { $t++; } else { splice(@$lines, $x, 1); } } else { $t = 0; } } } # Everything changing the body is done now. Time to fix the line count # header so naive clients do not get confused. Just to be sure, append # the old line count to X-headers. my $l = scalar(@bo1) + scalar(@$lines) + scalar(@att) + scalar(@bo2) + (!$sig?scalar(@sig):0) + (!$ml?scalar(@ftr):0) + (!$ad?scalar(@ads):0); for ($x=0; $xd diff help>h mua>M version>v'); while (my ($opt, $val, $pretty) = nextOption()) { if ($opt eq 'a') { $ad = 1; } elsif ($opt eq 'A') { $ads = $val; } elsif ($opt eq 'c') { $cr = 1; } elsif ($opt eq 'd') { $sysl = 1; } elsif ($opt eq 'diff') { $diff = 1; } elsif ($opt eq 'e') { $elli = 1; } elsif ($opt eq 'i') { $ifile = $val; } elsif ($opt eq 'L') { $footers = $val; } elsif ($opt eq 'l') { $ml = 1; } elsif ($opt eq 'm') { $ms = 1; } elsif ($opt eq 'M') { $mua = $val; } elsif ($opt eq 'o') { $ofile = $val; } elsif ($opt eq 'P') { $boun = $val; } elsif ($opt eq 'p') { $mda = $val; } elsif ($opt eq 'r') { $hdrs = 1; } elsif ($opt eq 'S') { $lsig = $val ? $val : $maxsig; } elsif ($opt eq 's') { $sig = 1; } elsif ($opt eq 't') { $trad = 1; } elsif ($opt eq 'v') { version(); } elsif ($opt eq 'w') { $trsp = 1; } else { help(); } } Getopt::Mixed::cleanup(); if (($ml && $footers eq '')||($ad && $ads eq '')) { help(); } # Read message: open(IN, $ifile) || die "Could not open $ifile: $!"; my @message = ; close IN; # this should be self-explanatory: process_msg(\@message); # Finally, print clean lines: write_msg(($mda?"|$sendmail $mda":">$ofile"), \@message); # eof