2 # $Id: t-prot,v 1.51 2002/03/23 10:47:32 jochen Exp $
6 use Getopt::Mixed qw(nextOption);
9 $EX_OK $EX_USAGE $EX_DATAERR $EX_UNAVAILABLE $EX_BOUNCE
10 $ad $ads $boun $cr $diff $elli $footers $hdrs $indent $lsig $maxsig
11 $mda $ml $ms $mua $ofile $sendmail $sig $sysl $trad $trsp
18 $REL = q$Revision: 1.51 $; chop($REL);
20 # (you might have to adjust those if not using GNU libc)
25 $EX_BOUNCE = $EX_UNAVAILABLE;
26 # Please adjust these vals to your needs:
27 $maxsig = 4; # max. valid signature length
28 $indent = '>'; # Indent string, regexp to identify a quoted line
29 $sendmail = '/usr/sbin/sendmail -oi'; # MTA expecting mail on STDIN
30 $boun = "Blocked by $0: This user does not accept TOFUed email. Please see <http://learn.to/edit_messages/> and <http://www.escape.de/users/tolot/mutt/> for more info. Have a nice day!\n";
31 $ofile = '-'; # use STDOUT if nothing is specified
32 # end of user adjusted vals
35 # help(): print help text and exit with appropriate exit code
37 print "Usage: $0 [options]
38 -a remove ad footers; requires -A
39 -A=DIRECTORY ad footer directory, treat ad footers as signature
40 -c merge multiple blank lines
41 -d, --debug print notice to syslog when bouncing; requires -p
42 --diff tolerate diffs appended *after* the signature
43 -e force ellipsis for excessive punctuation
44 -h, --help show this short help and exit
45 -i=INFILE file to be read; '-' for STDIN (default)
46 -L=DIRECTORY mailling list footer directory, treat mailing list
48 -l delete mailing list footer; requires -L
49 -M, --mua=MUA turn on special treatment for some mail user agents
50 -m delete MS style TOFU; careful: might be too agressive
51 -o=OUTFILE file to be written to; '-' for STDOUT (default)
52 -P=MESSAGE user defined bounce message; requires -p
53 -p=ADDRESS redirect to ADDRESS if no TOFU was found
54 -r delete mail header lines
55 -S[=n] supress signatures with more than n lines;
56 default is $maxsig if n not specified
58 -t delete traditional style TOFU
59 -v, --version show version string and exit
60 -w delete trailing whitespaces\n";
64 # version(): print version info and exit with appropriate exit code
66 print "$0 v$VER$REV ($REL), Jochen Striepe <t-prot\@tolot.escape.de>
67 Get the latest version at <http://www.escape.de/users/tolot/mutt/>\n";
71 # remove_footers(): remove any trailing appearance of footers contained
72 # in the given directory.
74 my $L = shift; # array of message lines
75 my $S = shift; # array to store removed lines in
76 my $F = shift; # footers dir name
77 my $O = shift; # remove only one footer?
79 if ($F && scalar(@$L)) {
80 opendir(DIR, $F) || die "Could not open $F: $!";
81 my @feet = grep { /^[^.]/ && -f "$F/$_" } readdir DIR;
84 foreach my $f (@feet) {
85 open(IN, "$F/$f") || die "Could not open $F/$f: $!";
89 while (scalar(@l)<=scalar(@$L)) {
91 for(my $x=1; $x<=scalar(@l); $x++) {
92 chomp($l[scalar(@l)-$x]);
93 if (index($$L[scalar(@$L)-$x], $l[scalar(@l)-$x])!=0) {
98 unshift(@$S, @$L[$#$L-$#l..$#$L]);
99 splice(@$L, $#$L-$#l);
100 while (scalar(@$L) && $$L[$#$L] =~ /^\s*$/) {
101 unshift(@$S, pop(@$L));
111 # write_msg(): output
116 open(OUT, $O) || die "Could not open $O: $!";
128 # process_msg(): This one proc does *everything* what has to be done with
129 # the lines of the message
133 my ($j, $x, $verb) = (0, 0, 0);
134 my (@ads, @hdr, @bo1, @bo2, @ftr, @sig, @vrb, @att) =
135 ((), (), (), (), (), (), (), (), ());
137 # First, remove and store lines we might need later...
139 for ($x=0; $x<$#$lines; $x++) { if (@$lines[$x] =~ /^$/) { last; }; }
140 @hdr = @$lines[0..$x];
141 splice(@$lines, 0, $x+1);
143 # See if we have a multipart content type. If yes, see if it is already
144 # ripped (e.g. by mutt(1)), otherwise only leave the first part if it
145 # is plain text (if not, we are done - non-text messages are not our
147 if (lc($mua) ne 'mutt') {
148 for ($x=0; $x<scalar(@hdr); $x++) {
149 if ($hdr[$x] =~ /^Content-Type:\s+(.*)$/i) {
152 if ($foo =~ /^multipart\//i) {
155 if ($hdr[$x] =~ /\Wboundary="([^"]+)"/i) { $foo = $1; }
157 for (my $z=1; $x+$z<@hdr && $hdr[$x+$z]=~/^\s/; $z++) {
158 if ($hdr[$x] =~ /\Wboundary="?([\S]+)"?$/i) {
166 for (my $x=0; $x<scalar(@$lines); $x++) {
167 if (index($$lines[$x], '--'.$foo)!=0) { next; }
169 my $bar = 'text/plain';
170 for ($x++; $x<@$lines && $$lines[$x]!~/^$/; $x++)
172 if ($$lines[$x] =~ /^Content-Type:\s+(.*)$/i) {
176 if ($x>=scalar(@$lines)) { exit($EX_DATAERR); }
178 if ($bar =~ /^text\/plain/i) {
180 for ($z=1; $x+$z<@$lines; $z++) {
181 if (index($$lines[$x+$z], '--'.$foo)==0) {
185 if ($x+$z>=scalar(@$lines)) { exit($EX_DATAERR); }
187 @bo2 = @$lines[$x+$z..$#$lines];
188 splice(@$lines, $x+$z);
189 if ($$lines[$#$lines] =~ /^\s*$/) {
190 unshift(@bo2, pop @$lines);
192 @bo1 = @$lines[0..$x];
193 splice(@$lines, 0, $x+1);
197 write_msg(($mda?"|$sendmail $mda":">$ofile"),
198 ($hdrs?undef:\@hdr), $lines);
212 for ($x=0; $x<scalar(@$lines); $x++) {
213 if ($$lines[$x] =~ /^\s*#v([+-])\s*$/) {
214 $verb = $1 eq '+' ? 1 : 0;
220 if (lc($mua) eq 'mutt') {
221 # Remove all but the first attachment (if this is text/plain)
222 # mutt did introduce (bah!). Remember, all this ugliness could
223 # be replaced with a proper and clean edit_filter patch in
225 for ($x=0; $x<scalar(@$lines); $x++) {
226 if ($vrb[$x]) { next; }
227 # The following regexp's are quite ugly because for most users
228 # these lines are coloured using termcap... (bah!)
229 if (($$lines[$x] =~ /^[^>[]*\[-- Attachment #(\d+)(: .*)? --\]\s*$/ &&
231 ($x<scalar(@$lines) &&
232 $$lines[$x+1] !~ /^[^>[]*\[-- Type: text\/plain/))) ||
233 ($$lines[$x] =~ /^[^>[]*\[-- End of .* data --\]\s*$/))
235 @att = @$lines[$x..$#$lines];
237 if (scalar(@$lines) && $$lines[$#$lines] =~ /^\s*$/) {
238 unshift(@att, pop(@$lines));
244 # Pipe message/rfc822 parts to another instance of process_msg()
245 # for further processing.
246 # Please note that we cannot see what a hierarchy the original
247 # message had -- if there were message/rfc822 parts within other
248 # message/rfc822 parts constellations can occur which we cannot
249 # resolve. Therefore we simply do not even try to be smart. This
250 # should work for most situations:
252 for ($x=0; $x<$#att; $x++) {
253 if ($vrb[scalar(@$lines)+$x]) { next; }
254 # The following regexp is quite ugly because for most
255 # users the line is coloured using termcap... (bah!)
256 if ($att[$x]=~/^[^>[]*\[-- Attachment #\d+(: .*)? --\]\s*$/ &&
257 $att[$x+1] =~ /^[^>[]*\[-- Type: message\/rfc822/)
260 while ($att[$x] !~ /^\s*$/) { $x++; }
263 my @tmp = @att[$x..$#att];
265 splice(@att, $x, scalar(@att)-$x, @tmp);
272 remove_footers($lines, \@ftr, $footers, undef);
275 remove_footers($lines, \@ads, $ads, undef);
278 if (scalar(@$lines)) {
279 for ($x=0; $x<scalar(@$lines); $x++) {
280 if ((!$vrb[$x]) && $$lines[$x] =~ /^-- $/) {
282 for (my $i=1; $x+$i+1<scalar(@$lines); $i++) {
283 if ($$lines[$x+$i] =~ /^\-{3}\ .+/ &&
284 $$lines[$x+$i+1] =~ /^\+{3}\ .+/)
287 @sig = @$lines[$x..$#$lines];
292 if (scalar(@sig)) { last; }
295 if ($sig || ($lsig && ($#$lines-$x>$lsig))) {
296 if ($lsig && !$sig) {
297 push(@sig, "[---=| Overlong signature removed by $0: " .
298 (scalar(@$lines)-$x) . " lines snipped |=---]\n");
302 elsif ($#$lines-$x<=($lsig?$lsig:$maxsig)) {
303 @sig = @$lines[$x..$#$lines];
311 # Now care about TOFU.
312 # One common mispractice is M$ style TOFU:
314 # bloat this array if you want more internationalization:
315 my @tofu = ('Original Message',
316 'Ursprüngliche Nachricht',
317 'Ursprungliche Nachricht',
318 'Mensagem original');
320 DONE: for ($x=0; $x<scalar(@$lines); $x++) {
322 foreach my $tmp (@tofu) {
323 if ($$lines[$x] =~ /^-+\s?$tmp\s?-+\s*$/) {
331 $j = scalar(@$lines)-$x;
335 # Nothing? Then try traditional TOFU:
336 if ($trad && (!$j) && !$vrb[$#$lines]) {
337 if (scalar(@$lines) && $$lines[$#$lines] =~ /^\s*$/) {
338 unshift(@sig, pop(@$lines));
340 while (scalar(@$lines) && $$lines[$#$lines] =~ /^\s*$/) {
343 while (scalar(@$lines) && $$lines[$#$lines] =~ /^$indent/) {
349 # OK, if we found TOFU, we will leave a message that we were here...
351 # make sendmail bounce if we shall be picky
352 # and indeed found something:
357 eval { require Sys::Syslog; };
358 if ($@) { warn $@; } else {
359 Sys::Syslog::setlogsock('unix');
360 Sys::Syslog::openlog("$0[$$]", 'pid', 'mail');
361 Sys::Syslog::syslog('debug', 'bounced message %s', $hdr[0]);
362 Sys::Syslog::closelog();
369 push(@$lines, "[---=| TOFU protection by $0: $j lines snipped |=---]\n");
373 # Care for trailing whitespaces:
375 for ($x=0; $x<scalar(@$lines); $x++) {
376 if (!$vrb[$x]) { $$lines[$x] =~ s/[\ \t]+$//; }
380 # Care for punctuation abuse:
382 for ($x=0; $x<scalar(@$lines); $x++) {
383 if (!$vrb[$x]) { $$lines[$x] =~ s/([.?!])(\1{2})\1+/$1 . $2/eg; }
387 # (Nearly) at last care for multiple blank lines. (Do not do this
388 # earlier -- the way it is done right now would screw up the verbatim
392 for ($x=scalar(@$lines)-1; $x>=0; $x--) {
393 if ((!$vrb[$x]) && $$lines[$x] =~ /^\s*$/) {
394 if ($t<2) { $t++; } else { splice(@$lines, $x, 1); }
400 # Everything changing the body is done now. Time to fix the line count
401 # header so naive clients do not get confused. Just to be sure, append
402 # the old line count to X-headers.
403 my $l = scalar(@bo1) + scalar(@$lines) + scalar(@att) + scalar(@bo2) +
404 (!$sig?scalar(@sig):0) + (!$ml?scalar(@ftr):0) +
405 (!$ad?scalar(@ads):0);
406 for ($x=0; $x<scalar(@hdr); $x++) {
407 if ($hdr[$x] =~ s/^(Lines:\s+)(\d+)(\s*)?$/$1 . $l . ($3?$3:'')/e &&
410 $hdr[$#hdr] = "X-Old-Lines: $2\n";
415 # Finally, before leaving we put everything back in right order.
416 unshift(@$lines, (!$hdrs?@hdr:()), @bo1);
417 push(@$lines, (!$sig?@sig:()), (!$ad?@ads:()), (!$ml?@ftr:()), @att,
422 # command line switches
423 ($ad, $ads, $cr, $sysl, $diff, $elli, $footers, $ml, $ms, $mda, $mua,
424 $hdrs, $lsig, $sig, $trad, $trsp) =
425 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
426 my $ifile = '-'; # use STDIN if nothing specified
428 # get command line params:
430 Getopt::Mixed::init('a A=s c d e h i=s L=s l m M=s o=s P=s p=s r S:i'.
431 ' s t v w debug>d diff help>h mua>M version>v');
432 while (my ($opt, $val, $pretty) = nextOption()) {
433 if ($opt eq 'a') { $ad = 1; }
434 elsif ($opt eq 'A') { $ads = $val; }
435 elsif ($opt eq 'c') { $cr = 1; }
436 elsif ($opt eq 'd') { $sysl = 1; }
437 elsif ($opt eq 'diff') { $diff = 1; }
438 elsif ($opt eq 'e') { $elli = 1; }
439 elsif ($opt eq 'i') { $ifile = $val; }
440 elsif ($opt eq 'L') { $footers = $val; }
441 elsif ($opt eq 'l') { $ml = 1; }
442 elsif ($opt eq 'm') { $ms = 1; }
443 elsif ($opt eq 'M') { $mua = $val; }
444 elsif ($opt eq 'o') { $ofile = $val; }
445 elsif ($opt eq 'P') { $boun = $val; }
446 elsif ($opt eq 'p') { $mda = $val; }
447 elsif ($opt eq 'r') { $hdrs = 1; }
448 elsif ($opt eq 'S') { $lsig = $val ? $val : $maxsig; }
449 elsif ($opt eq 's') { $sig = 1; }
450 elsif ($opt eq 't') { $trad = 1; }
451 elsif ($opt eq 'v') { version(); }
452 elsif ($opt eq 'w') { $trsp = 1; }
455 Getopt::Mixed::cleanup();
456 if (($ml && $footers eq '')||($ad && $ads eq '')) { help(); }
460 open(IN, $ifile) || die "Could not open $ifile: $!";
464 # this should be self-explanatory:
465 process_msg(\@message);
467 # Finally, print clean lines:
468 write_msg(($mda?"|$sendmail $mda":">$ofile"), \@message);