+#!/usr/bin/perl -w
+# $Id: t-prot,v 1.51 2002/03/23 10:47:32 jochen Exp $
+
+require 5.005;
+use strict;
+use Getopt::Mixed qw(nextOption);
+use vars qw(
+ $VER $REV $REL
+ $EX_OK $EX_USAGE $EX_DATAERR $EX_UNAVAILABLE $EX_BOUNCE
+ $ad $ads $boun $cr $diff $elli $footers $hdrs $indent $lsig $maxsig
+ $mda $ml $ms $mua $ofile $sendmail $sig $sysl $trad $trsp
+);
+
+
+# Version info
+$VER = '0.54';
+$REV = '';
+$REL = q$Revision: 1.51 $; chop($REL);
+# From <sysexits.h>
+# (you might have to adjust those if not using GNU libc)
+$EX_OK = 0;
+$EX_USAGE = 64;
+$EX_DATAERR = 65;
+$EX_UNAVAILABLE = 69;
+$EX_BOUNCE = $EX_UNAVAILABLE;
+# Please adjust these vals to your needs:
+$maxsig = 4; # max. valid signature length
+$indent = '>'; # Indent string, regexp to identify a quoted line
+$sendmail = '/usr/sbin/sendmail -oi'; # MTA expecting mail on STDIN
+$boun = "Blocked by $0: This user does not accept TOFUed email. Please see <http://learn.to/edit_messages/> and <http://www.escape.de/users/tolot/mutt/> for more info. Have a nice day!\n";
+$ofile = '-'; # use STDOUT if nothing is specified
+# end of user adjusted vals
+
+
+# help(): print help text and exit with appropriate exit code
+sub help {
+ print "Usage: $0 [options]
+ -a remove ad footers; requires -A
+ -A=DIRECTORY ad footer directory, treat ad footers as signature
+ -c merge multiple blank lines
+ -d, --debug print notice to syslog when bouncing; requires -p
+ --diff tolerate diffs appended *after* the signature
+ -e force ellipsis for excessive punctuation
+ -h, --help show this short help and exit
+ -i=INFILE file to be read; '-' for STDIN (default)
+ -L=DIRECTORY mailling list footer directory, treat mailing list
+ footers as signature
+ -l delete mailing list footer; requires -L
+ -M, --mua=MUA turn on special treatment for some mail user agents
+ -m delete MS style TOFU; careful: might be too agressive
+ -o=OUTFILE file to be written to; '-' for STDOUT (default)
+ -P=MESSAGE user defined bounce message; requires -p
+ -p=ADDRESS redirect to ADDRESS if no TOFU was found
+ -r delete mail header lines
+ -S[=n] supress signatures with more than n lines;
+ default is $maxsig if n not specified
+ -s delete signature
+ -t delete traditional style TOFU
+ -v, --version show version string and exit
+ -w delete trailing whitespaces\n";
+ exit($EX_USAGE);
+}
+
+# version(): print version info and exit with appropriate exit code
+sub version {
+ print "$0 v$VER$REV ($REL), Jochen Striepe <t-prot\@tolot.escape.de>
+Get the latest version at <http://www.escape.de/users/tolot/mutt/>\n";
+ exit($EX_OK);
+}
+
+# remove_footers(): remove any trailing appearance of footers contained
+# in the given directory.
+sub remove_footers {
+ my $L = shift; # array of message lines
+ my $S = shift; # array to store removed lines in
+ my $F = shift; # footers dir name
+ my $O = shift; # remove only one footer?
+
+ if ($F && scalar(@$L)) {
+ opendir(DIR, $F) || die "Could not open $F: $!";
+ my @feet = grep { /^[^.]/ && -f "$F/$_" } readdir DIR;
+ closedir DIR;
+
+ foreach my $f (@feet) {
+ open(IN, "$F/$f") || die "Could not open $F/$f: $!";
+ my @l = <IN>;
+ close IN;
+
+ while (scalar(@l)<=scalar(@$L)) {
+ my $y = 0;
+ for(my $x=1; $x<=scalar(@l); $x++) {
+ chomp($l[scalar(@l)-$x]);
+ if (index($$L[scalar(@$L)-$x], $l[scalar(@l)-$x])!=0) {
+ $y = 1;
+ }
+ }
+ if (!$y) {
+ unshift(@$S, @$L[$#$L-$#l..$#$L]);
+ splice(@$L, $#$L-$#l);
+ while (scalar(@$L) && $$L[$#$L] =~ /^\s*$/) {
+ unshift(@$S, pop(@$L));
+ }
+ if ($O) { last; }
+ }
+ else { last; }
+ }
+ }
+ }
+}
+
+# write_msg(): output
+sub write_msg {
+ my $O = shift;
+ my $l;
+
+ open(OUT, $O) || die "Could not open $O: $!";
+ while (scalar(@_)) {
+ $l = shift;
+ if (defined $l) {
+ $^W = 0;
+ print OUT @$l;
+ $^W = 1;
+ }
+ }
+ close OUT;
+}
+
+# process_msg(): This one proc does *everything* what has to be done with
+# the lines of the message
+sub process_msg {
+ my $lines = shift;
+
+ my ($j, $x, $verb) = (0, 0, 0);
+ my (@ads, @hdr, @bo1, @bo2, @ftr, @sig, @vrb, @att) =
+ ((), (), (), (), (), (), (), (), ());
+
+ # First, remove and store lines we might need later...
+ # Remove headers:
+ for ($x=0; $x<$#$lines; $x++) { if (@$lines[$x] =~ /^$/) { last; }; }
+ @hdr = @$lines[0..$x];
+ splice(@$lines, 0, $x+1);
+
+ # See if we have a multipart content type. If yes, see if it is already
+ # ripped (e.g. by mutt(1)), otherwise only leave the first part if it
+ # is plain text (if not, we are done - non-text messages are not our
+ # business).
+ if (lc($mua) ne 'mutt') {
+ for ($x=0; $x<scalar(@hdr); $x++) {
+ if ($hdr[$x] =~ /^Content-Type:\s+(.*)$/i) {
+ my $foo = $1;
+
+ if ($foo =~ /^multipart\//i) {
+ undef $foo;
+
+ if ($hdr[$x] =~ /\Wboundary="([^"]+)"/i) { $foo = $1; }
+ else {
+ for (my $z=1; $x+$z<@hdr && $hdr[$x+$z]=~/^\s/; $z++) {
+ if ($hdr[$x] =~ /\Wboundary="?([\S]+)"?$/i) {
+ $foo = $1;
+ last;
+ }
+ }
+ }
+
+ if (defined $foo) {
+ for (my $x=0; $x<scalar(@$lines); $x++) {
+ if (index($$lines[$x], '--'.$foo)!=0) { next; }
+
+ my $bar = 'text/plain';
+ for ($x++; $x<@$lines && $$lines[$x]!~/^$/; $x++)
+ {
+ if ($$lines[$x] =~ /^Content-Type:\s+(.*)$/i) {
+ $bar = $1;
+ }
+ }
+ if ($x>=scalar(@$lines)) { exit($EX_DATAERR); }
+
+ if ($bar =~ /^text\/plain/i) {
+ my $z;
+ for ($z=1; $x+$z<@$lines; $z++) {
+ if (index($$lines[$x+$z], '--'.$foo)==0) {
+ last;
+ }
+ }
+ if ($x+$z>=scalar(@$lines)) { exit($EX_DATAERR); }
+
+ @bo2 = @$lines[$x+$z..$#$lines];
+ splice(@$lines, $x+$z);
+ if ($$lines[$#$lines] =~ /^\s*$/) {
+ unshift(@bo2, pop @$lines);
+ }
+ @bo1 = @$lines[0..$x];
+ splice(@$lines, 0, $x+1);
+ last;
+ }
+ else {
+ write_msg(($mda?"|$sendmail $mda":">$ofile"),
+ ($hdrs?undef:\@hdr), $lines);
+ exit;
+ }
+ }
+ }
+ }
+ last;
+ }
+ }
+ }
+
+
+ # Protect verbatims:
+ $verb = 0;
+ for ($x=0; $x<scalar(@$lines); $x++) {
+ if ($$lines[$x] =~ /^\s*#v([+-])\s*$/) {
+ $verb = $1 eq '+' ? 1 : 0;
+ }
+ $vrb[$x] = $verb;
+ }
+
+
+ if (lc($mua) eq 'mutt') {
+ # Remove all but the first attachment (if this is text/plain)
+ # mutt did introduce (bah!). Remember, all this ugliness could
+ # be replaced with a proper and clean edit_filter patch in
+ # mutt(1) itself...
+ for ($x=0; $x<scalar(@$lines); $x++) {
+ if ($vrb[$x]) { next; }
+ # The following regexp's are quite ugly because for most users
+ # these lines are coloured using termcap... (bah!)
+ if (($$lines[$x] =~ /^[^>[]*\[-- Attachment #(\d+)(: .*)? --\]\s*$/ &&
+ (($1 ne '1') ||
+ ($x<scalar(@$lines) &&
+ $$lines[$x+1] !~ /^[^>[]*\[-- Type: text\/plain/))) ||
+ ($$lines[$x] =~ /^[^>[]*\[-- End of .* data --\]\s*$/))
+ {
+ @att = @$lines[$x..$#$lines];
+ splice(@$lines, $x);
+ if (scalar(@$lines) && $$lines[$#$lines] =~ /^\s*$/) {
+ unshift(@att, pop(@$lines));
+ }
+ last;
+ }
+ }
+
+ # Pipe message/rfc822 parts to another instance of process_msg()
+ # for further processing.
+ # Please note that we cannot see what a hierarchy the original
+ # message had -- if there were message/rfc822 parts within other
+ # message/rfc822 parts constellations can occur which we cannot
+ # resolve. Therefore we simply do not even try to be smart. This
+ # should work for most situations:
+ if (scalar(@att)) {
+ for ($x=0; $x<$#att; $x++) {
+ if ($vrb[scalar(@$lines)+$x]) { next; }
+ # The following regexp is quite ugly because for most
+ # users the line is coloured using termcap... (bah!)
+ if ($att[$x]=~/^[^>[]*\[-- Attachment #\d+(: .*)? --\]\s*$/ &&
+ $att[$x+1] =~ /^[^>[]*\[-- Type: message\/rfc822/)
+ {
+ $x += 2;
+ while ($att[$x] !~ /^\s*$/) { $x++; }
+ $x++;
+
+ my @tmp = @att[$x..$#att];
+ process_msg(\@tmp);
+ splice(@att, $x, scalar(@att)-$x, @tmp);
+ }
+ }
+ }
+ }
+
+ # Remove ML footers:
+ remove_footers($lines, \@ftr, $footers, undef);
+
+ # Remove ad footers:
+ remove_footers($lines, \@ads, $ads, undef);
+
+ # Remove signature:
+ if (scalar(@$lines)) {
+ for ($x=0; $x<scalar(@$lines); $x++) {
+ if ((!$vrb[$x]) && $$lines[$x] =~ /^-- $/) {
+ if ($diff) {
+ for (my $i=1; $x+$i+1<scalar(@$lines); $i++) {
+ if ($$lines[$x+$i] =~ /^\-{3}\ .+/ &&
+ $$lines[$x+$i+1] =~ /^\+{3}\ .+/)
+ {
+ $sig = 0;
+ @sig = @$lines[$x..$#$lines];
+ splice(@$lines, $x);
+ last;
+ }
+ }
+ if (scalar(@sig)) { last; }
+ }
+
+ if ($sig || ($lsig && ($#$lines-$x>$lsig))) {
+ if ($lsig && !$sig) {
+ push(@sig, "[---=| Overlong signature removed by $0: " .
+ (scalar(@$lines)-$x) . " lines snipped |=---]\n");
+ }
+ splice(@$lines, $x);
+ }
+ elsif ($#$lines-$x<=($lsig?$lsig:$maxsig)) {
+ @sig = @$lines[$x..$#$lines];
+ splice(@$lines, $x);
+ }
+ last;
+ }
+ }
+ }
+
+ # Now care about TOFU.
+ # One common mispractice is M$ style TOFU:
+ if ($ms) {
+ # bloat this array if you want more internationalization:
+ my @tofu = ('Original Message',
+ 'Ursprüngliche Nachricht',
+ 'Ursprungliche Nachricht',
+ 'Mensagem original');
+
+ DONE: for ($x=0; $x<scalar(@$lines); $x++) {
+ if (!$vrb[$x]) {
+ foreach my $tmp (@tofu) {
+ if ($$lines[$x] =~ /^-+\s?$tmp\s?-+\s*$/) {
+ $x++;
+ last DONE;
+ }
+ }
+ }
+ }
+
+ $j = scalar(@$lines)-$x;
+ splice(@$lines, $x);
+ }
+
+ # Nothing? Then try traditional TOFU:
+ if ($trad && (!$j) && !$vrb[$#$lines]) {
+ if (scalar(@$lines) && $$lines[$#$lines] =~ /^\s*$/) {
+ unshift(@sig, pop(@$lines));
+ }
+ while (scalar(@$lines) && $$lines[$#$lines] =~ /^\s*$/) {
+ pop(@$lines);
+ }
+ while (scalar(@$lines) && $$lines[$#$lines] =~ /^$indent/) {
+ $j++;
+ pop(@$lines);
+ }
+ }
+
+ # OK, if we found TOFU, we will leave a message that we were here...
+ if ($j) {
+ # make sendmail bounce if we shall be picky
+ # and indeed found something:
+ if ($mda) {
+ print STDERR $boun;
+
+ if ($sysl) {
+ eval { require Sys::Syslog; };
+ if ($@) { warn $@; } else {
+ Sys::Syslog::setlogsock('unix');
+ Sys::Syslog::openlog("$0[$$]", 'pid', 'mail');
+ Sys::Syslog::syslog('debug', 'bounced message %s', $hdr[0]);
+ Sys::Syslog::closelog();
+ }
+ }
+
+ exit $EX_BOUNCE;
+ }
+
+ push(@$lines, "[---=| TOFU protection by $0: $j lines snipped |=---]\n");
+ }
+
+
+ # Care for trailing whitespaces:
+ if ($trsp) {
+ for ($x=0; $x<scalar(@$lines); $x++) {
+ if (!$vrb[$x]) { $$lines[$x] =~ s/[\ \t]+$//; }
+ }
+ }
+
+ # Care for punctuation abuse:
+ if ($elli) {
+ for ($x=0; $x<scalar(@$lines); $x++) {
+ if (!$vrb[$x]) { $$lines[$x] =~ s/([.?!])(\1{2})\1+/$1 . $2/eg; }
+ }
+ }
+
+ # (Nearly) at last care for multiple blank lines. (Do not do this
+ # earlier -- the way it is done right now would screw up the verbatim
+ # list)
+ if ($cr) {
+ my $t = 0;
+ for ($x=scalar(@$lines)-1; $x>=0; $x--) {
+ if ((!$vrb[$x]) && $$lines[$x] =~ /^\s*$/) {
+ if ($t<2) { $t++; } else { splice(@$lines, $x, 1); }
+ }
+ else { $t = 0; }
+ }
+ }
+
+ # Everything changing the body is done now. Time to fix the line count
+ # header so naive clients do not get confused. Just to be sure, append
+ # the old line count to X-headers.
+ my $l = scalar(@bo1) + scalar(@$lines) + scalar(@att) + scalar(@bo2) +
+ (!$sig?scalar(@sig):0) + (!$ml?scalar(@ftr):0) +
+ (!$ad?scalar(@ads):0);
+ for ($x=0; $x<scalar(@hdr); $x++) {
+ if ($hdr[$x] =~ s/^(Lines:\s+)(\d+)(\s*)?$/$1 . $l . ($3?$3:'')/e &&
+ $2!=$l)
+ {
+ $hdr[$#hdr] = "X-Old-Lines: $2\n";
+ push(@hdr, "\n");
+ }
+ }
+
+ # Finally, before leaving we put everything back in right order.
+ unshift(@$lines, (!$hdrs?@hdr:()), @bo1);
+ push(@$lines, (!$sig?@sig:()), (!$ad?@ads:()), (!$ml?@ftr:()), @att,
+ @bo2);
+}
+
+
+# command line switches
+($ad, $ads, $cr, $sysl, $diff, $elli, $footers, $ml, $ms, $mda, $mua,
+ $hdrs, $lsig, $sig, $trad, $trsp) =
+ (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+my $ifile = '-'; # use STDIN if nothing specified
+
+# get command line params:
+$0 =~ s!^.*/!!;
+Getopt::Mixed::init('a A=s c d e h i=s L=s l m M=s o=s P=s p=s r S:i'.
+ ' s t v w debug>d diff help>h mua>M version>v');
+while (my ($opt, $val, $pretty) = nextOption()) {
+ if ($opt eq 'a') { $ad = 1; }
+ elsif ($opt eq 'A') { $ads = $val; }
+ elsif ($opt eq 'c') { $cr = 1; }
+ elsif ($opt eq 'd') { $sysl = 1; }
+ elsif ($opt eq 'diff') { $diff = 1; }
+ elsif ($opt eq 'e') { $elli = 1; }
+ elsif ($opt eq 'i') { $ifile = $val; }
+ elsif ($opt eq 'L') { $footers = $val; }
+ elsif ($opt eq 'l') { $ml = 1; }
+ elsif ($opt eq 'm') { $ms = 1; }
+ elsif ($opt eq 'M') { $mua = $val; }
+ elsif ($opt eq 'o') { $ofile = $val; }
+ elsif ($opt eq 'P') { $boun = $val; }
+ elsif ($opt eq 'p') { $mda = $val; }
+ elsif ($opt eq 'r') { $hdrs = 1; }
+ elsif ($opt eq 'S') { $lsig = $val ? $val : $maxsig; }
+ elsif ($opt eq 's') { $sig = 1; }
+ elsif ($opt eq 't') { $trad = 1; }
+ elsif ($opt eq 'v') { version(); }
+ elsif ($opt eq 'w') { $trsp = 1; }
+ else { help(); }
+}
+Getopt::Mixed::cleanup();
+if (($ml && $footers eq '')||($ad && $ads eq '')) { help(); }
+
+
+# Read message:
+open(IN, $ifile) || die "Could not open $ifile: $!";
+my @message = <IN>;
+close IN;
+
+# this should be self-explanatory:
+process_msg(\@message);
+
+# Finally, print clean lines:
+write_msg(($mda?"|$sendmail $mda":">$ofile"), \@message);
+
+# eof