+2010-07-01 16:02 Jochen Striepe
+
+ * t-prot: Release as v2.99.
+
+2010-07-01 15:58 Jochen Striepe
+
+ * t-prot, t-prot.1: New command line option --fixind to fix broken
+ quoting (regarding to RFC 3676). Perhaps this should not be
+ default, as there might be false positives if the message is
+ already quoted correctly. Quotes with this particular brokenness
+ are quite common, though. Patch by Simon Ruderich, many thanks.
+
+2010-06-09 19:32 Jochen Striepe
+
+ * t-prot: Use index() instead of regex for $indent. Patch by Simon
+ Ruderich.
+
+2010-04-20 09:40 Jochen Striepe
+
+ * t-prot: Revert new handling of empty lines at the beginning of
+ the body, as it removes lines where it should not.
+
+2010-04-20 08:58 Jochen Striepe
+
+ * t-prot: Another bug with -c: Empty lines before the signature
+ were not handled properly. Patch by Simon Ruderich.
+
+2010-04-20 08:57 Jochen Striepe
+
+ * t-prot: This causes -c1 to not create an empty line at the
+ beginning of the message if there were empty lines at the start of
+ the body. Patch by Simon Ruderich.
+
+2010-04-20 08:55 Jochen Striepe
+
+ * t-prot, t-prot.1: Typos and documentation fixes regarding
+ $indent. Many thanks to Simon Ruderich.
+
2010-04-09 06:47 Jochen Striepe
* t-prot: Release as v2.98.
#!/usr/bin/perl -w
-# $Id: t-prot,v 1.300 2010/04/09 06:47:11 jochen Exp $
+# $Id: t-prot,v 1.307 2010/07/01 16:02:56 jochen Exp $
require 5.006;
use strict;
use Fcntl qw(O_EXCL O_WRONLY O_CREAT);
use Getopt::Long qw(:config gnu_getopt no_ignore_case);
-use constant VER => '2.98';
+use constant VER => '2.99';
use constant REV => '';
-use constant REL => q$Revision: 1.300 $=~m/(\d+(?:\.\d+)+)/;
+use constant REL => q$Revision: 1.307 $=~m/(\d+(?:\.\d+)+)/;
# MTA expecting mail on STDIN
# (you might have to adjust this if using a different MTA)
use constant SENDMAIL => '/usr/sbin/sendmail -oi';
use constant EX_BOUNCE => EX_UNAVAILABLE;
use vars qw(
$ad $ads $bigqn $bigqx $boun $check $check_ratio $cr $crshrink $diff $elli
- $footers $ftr_ad $ftr_ml $hdrs $indent $kamm $kdiff $kminl $kmaxl
+ $fixind $footers $ftr_ad $ftr_ml $hdrs $indent $kamm $kdiff $kminl $kmaxl
$lax $lsig $maxsig $maxlines $mda $ml $gw $ms $ms_smart $msg_quote
$msg_ratio $mua $nohdr $ofile $pgpshort $pgpmove $pgpmovevrf $reply
$sani $sig $sigint $sign $spass $spass_prefix $sysl $trad $trsp
$maxsig = 4; # max. valid signature length
$maxlines = undef; # no limit of message lines
$crshrink = 2; # multiple blank lines are shrunk to $crshrink lines
-$indent = '>'; # Indent string, regexp to identify a quoted line
+$indent = '>'; # Indent string to identify a quoted line
$kminl = 65; # see decomb() for details
$kmaxl = 80;
$kdiff = 20;
-d, --debug print notice to syslog when bouncing; requires -p
--diff tolerate diffs
-e force ellipsis for excessive punctuation
+ --fixind fix quotes to adhere to RFC 3676
--ftr-ad enable aggressive ad footer matching; requires -A
--ftr-ml enable aggressive mailing list footer matching; req. -L
--groupwise delete Novell Groupwise style TOFU
--lax-security use unsafe writing method; USE ON YOUR OWN RISK!
--locale=LOCALE internationalization; currently only used with -Mmutt
-M, --mua=MUA turn on special treatment for some mail user agents
- -m delete MS style TOFU; careful: might be too agressive
+ -m delete MS style TOFU; careful: might be too aggressive
--max-lines=x maximum number of message lines
--ms-smart try to be smart with MS style TOFU; req. -Mmutt and -m
-o OUTFILE file to be written to; '-' for STDOUT (default)
--pgp-short hide non-relevant pgp key uids; requires -Mmutt
-r delete mail header lines
--reply squeeze multiple reply prefixes in subject line
- -S[n] supress signatures with more than n lines (default $maxsig)
+ -S[n] suppress signatures with more than n lines (default $maxsig)
-s delete signature
--sani sanitize some header fields
--sigsmax[=n] max number of sigs tolerated, no value for unlimited
}
# debigq(): Finds big quotes (more than $n lines quoted) and deletes all
-# but $x lines of them.
+# but the last $x lines of them.
sub debigq {
my $L = shift; # array of message lines
my $V = shift; # array with verbatim list
}
}
+ # Fix quote markers to adhere to RFC 3676, this changes "> >" to ">> " (if
+ # the default $indent is used). The space after ">" is not mandatory by RFC
+ # but makes the result more readable.
+ if ($fixind) {
+ for ($x=0; $x<scalar(@$lines); $x++) {
+ if (!$vrb[$x] && index($$lines[$x], $indent)==0) {
+ # We match space at the beginning to prevent removal of spaces
+ # directly after the last quote mark.
+ $$lines[$x] =~ /^((\Q$indent\E| )*\Q$indent\E ?)(.*)$/;
+ my $tmp = $1;
+ my $len = length $1;
+ my $rest = $3; # if anything follows after the quote
+ $tmp =~ tr/ //d;
+ $tmp .= ' ' if $rest; # don't create trailing whitespace
+ substr($$lines[$x], 0, $len, $tmp);
+ }
+ }
+ }
+
# See if there is some Kammquoting to fix:
if ($kamm) { decomb($lines, \@vrb); }
for (my $i=$#$lines; $i>=0; $i--) {
if ($vrb[$i]) { last; }
- if ($$lines[$i] =~ /^$indent/o) {
+ if (index($$lines[$i], $indent)==0) {
$j++;
$k = $i;
}
# earlier -- the way it is done right now would screw up the verbatim
# list)
if ($cr) {
+ # When handling regular TOFU above we move the last empty line from
+ # the body to the signature. This prevents the correct removal of
+ # empty lines before a signature (one line less is removed than it
+ # should) when -c and -t are used. This fixes it.
+ if (scalar(@sig) && $sig[0] =~ /^\s*$/) {
+ push(@$lines, shift(@sig));
+ }
+
my $t = 0;
for ($x=scalar(@$lines)-1; $x>=0; $x--) {
if ((!$vrb[$x]) &&
($ENV{'LC_MESSAGES'}?$ENV{'LC_MESSAGES'}:$ENV{'LANG'});
# command line switches
-($ad, $ads, $bigqn, $bigqx, $check, $cr, $sysl, $diff, $elli, $footers, $lax,
+($ad, $ads, $bigqn, $bigqx, $check, $cr, $sysl, $diff, $elli, $fixind, $footers, $lax,
$ml, $gw, $ms, $ms_smart, $mda, $mua, $hdrs, $kamm, $lsig, $nohdr, $reply,
$sani, $sig, $sigint, $spass, $trad, $trsp) =
- (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
my $ifile = '-'; # use STDIN if nothing specified
# temp vals:
'debug|d' => \$sysl,
'diff' => \$diff,
'e' => \$elli,
+ 'fixind' => \$fixind,
'ftr-ad' => \$ftr_ad,
'ftr-ml' => \$ftr_ml,
'groupwise' => \$gw,
-.\" $Id: t-prot.1,v 1.182 2010/04/09 06:45:35 jochen Exp $
+.\" $Id: t-prot.1,v 1.184 2010/07/01 15:58:23 jochen Exp $
.\"
-.TH T-PROT "1" "April 2010" "T-PROT"
+.TH T-PROT "1" "July 2010" "T-PROT"
.SH NAME
t-prot \- TOFU Protection - Display Filter for RFC 5322 messages
.SH SYNOPSIS
Squeezes a sequence of four or more dots, exclamation marks, or question marks
to only three dots or marks, respectively.
.TP
+.BR "\-\-fixind"
+Fix broken quotes to adhere to RFC 3676 by removing spaces between quote
+characters and adding a space after them.
+.br
+.IR NOTE :
+This may produce false positives if spaces in between quote characters
+are intended (thus changing the quoting level, see RFC 3676 for details).
+.TP
.B "\-\-groupwise"
Hides TOFU as produced by Novell Groupwise.
.TP
.TP
.B "\-t"
"TOFU deletion":
-Hides "traditional style" TOFU, where each line begins with an
-indent string like "> ".
-.br
-(You may edit the indent pattern in the script itself to suit your needs,
-but it is surely
-.I not
-recommended at all.)
+Hides "traditional style" TOFU, where each line begins with the
+indent string ">".
.TP
.B "\-w"
"whitespace deletion":
.BR perl (1),
.BR aliases (5),
.sp
-RFCs 2045-2049 and 5322,
+RFCs 2045-2049, 3676 and 5322,
.sp
.I http://freshmeat.net/articles/t\-prot/
(a nice, solid introduction),