#!/usr/bin/perl -w =head1 NAME mhthread - sort an MH folder into 'threaded' order =head1 SYNOPSIS mhthread [options] +folder mhthread [options] /path/to/folder options accepted: [-debug] [-no-write] [-fast] [-lock] =head1 DESCRIPTION This will thread an MH folder. It re-orders the messages (as sortm(1) would do), and annotates each one with a new header, "X-MH-Thread-Markup", which can be displayed by scan(1). Together, this results in the messages being displayed in "threaded" order, as in trn(1) or mutt(1). Sequences will be rewritten appropriately. The folder will also be "packed", as if 'folder -pack' had been run; see folder(1). =head1 RESULTS Here's some sample output from scan(1), after threading the folder: 430 03/23 mathew 3 [Asrg] Re: [OffTopic - NNTP] 431 03/23 Kee Hinckley 5 |- [Asrg] Re: [OffTopic - NNTP] 432 -03/23 Chuq Von Rospach 11 | |- Parameters for success? (was Re: [A 433 03/23 To:Chuq Von Rospa 4 | | \- Re: Parameters for success? (was 434 03/23 Matt Sergeant 3 | \- Re: [Asrg] Re: [OffTopic - NNTP] 435 03/23 Chuq Von Rospach 7 \- Re: [Asrg] Re: [OffTopic - NNTP] =head1 OPTIONS =over 4 =item -fast Use an on-disk cache to speed up operation. =item -lock Use a folder-wide lock-file to synchronize access to folders, so that multiple processes will not stomp on each other's changes or cause folder corruption. If you use this, you should ensure that you also use a locking version of other tools, such as the C script that comes with ExMH (typical location: C). =item -no-write Do not rewrite the messages; instead, output a line for each message noting the actions that would be taken. =item -debug Output debugging info to stderr. =back Note that options will also be read from the C entry in your C<.mh_profile> file, in traditional MH style. =head1 INSTALLATION FOR SCAN To display the results in scan(1) output, use something like the following for the subject-display part of the scan.form file: %(decode{x-mh-thread-markup})%(decode{subject}) If you do not have a "scan.form" file of your own, you will need to set it up. This functionality is accessed using the -form or -format switches to the scan(1) command. To use this, copy the /etc/nmh/scan.default file to your ~/Mail dir and modify it with the above line, then add scan: -form scan.form to your ~/.mh_profile. Copy this script to somewhere in your path, called C. Then run that whenever you want to re-thread the folder, in the same way you would C, C or similar. =head1 INSTALLATION FOR EXMH Copy this script to somewhere in your path, called C. Add the following function to your C<~/.tk/exmh/user.tcl> file: proc Folder_Thread {} { global exmh Background_Wait Exmh_Status "Threading folder..." blue if {[Ftoc_Changes "Thread"] == 0} then { if {[catch {MhExec mhthread +$exmh(folder)} err]} { Exmh_Status $err error } else { # finish off by using the ExMH packing logic to redisplay folder Folder_Pack # then show the first unseen message Msg_ShowUnseen } } } Next, you need to rebuild the C file. Run C and type: auto_mkindex ~/.tk/exmh *.tcl Now add a button to run this function. To do this, you must exit ExMH first, then edit the C<~/.exmh/exmh-defaults> file and add these files at the top of the file: *Fops.ubuttonlist: thread *Fops.thread.text: Thread *Fops.thread.command: Folder_Thread Restart ExMH, and there should be a new button marked B on the folder button-bar. Press this to re-thread the current folder. =head1 NOTES The threading algorithm uses the In-Reply-To, Message-Id and References headers. Thanks to JWZ for guidance, in the form of his page on threading at C. The 'X-MH-Thread-Markup' headers are encoded using RFC-2047 encoding, using 'no-break space' characters for whitespace, as otherwise MH's scan(1) format code will strip them. Here's an example of the results: X-MH-Thread-Markup: =?US-ASCII?Q?=a0=a0=a0=a0=5c=2d=a0?= =head1 TODO dealing with private sequences (stored in .mh_profile); limiting displayed thread-depth to keep UI readable (so far has not been a problem). =head1 BUGS duplicate messages will always be shuffled in order each time C is run, due to handling of identical Message-Ids. =head1 DOWNLOAD Latest version can be found at http://jmason.org/software/mhthread/ . =head1 AUTHOR Justin Mason, C =head1 VERSION version = 1.7, Jul 25 2003 jm =cut sub usage { die " usage: mhthread [options] +folder mhthread [options] /path/to/folder options accepted: [-debug] [-no-write] [-fast] [-lock] "; } use vars qw( $mh_sequences_file_name $mhthread_options ); read_mh_profile_data (); if (defined $mhthread_options) { unshift (@ARGV, split(' ', $mhthread_options)); } my $folder = ''; my $no_write = 0; my $fast = 0; my $lock = 0; my $dbg = 0; use Getopt::Long qw(:config no_ignore_case prefix_pattern=(--|-)); GetOptions( "debug" => \$dbg, "fast" => \$fast, "lock" => \$lock, "no-write" => \$no_write, '<>' => sub { $folder = $_[0]; } ); usage unless ($folder =~ /\S/); my $full_debug_logfile = 0; # hack this line to enable this if ($full_debug_logfile) { $dbg = 1; close STDERR; open (STDERR, ">".$ENV{HOME}."/.mhthread.log"); } ########################################################################### use vars qw( %TZ %MONTH $locked_folder_lockfile ); use strict; use Time::Local; if (!-d $folder) { chomp ($folder = `mhpath $folder`); if (!-d $folder) { usage(); } } init_tz(); if ($lock) { $SIG{INT} = $SIG{TERM} = \&unlock_and_die; mh_lock_folder ($folder); } # trap die()s eval { my $ctx = thread_folder ($folder); mh_rewrite_folder ($folder, $ctx); }; my $err = $@; # always unlock, even if we died if ($lock) { mh_unlock_folder ($folder); } # and finally, propagate the death exception if ($err) { die $err; } if ($full_debug_logfile) { if (open (IN, "<$ENV{HOME}/Mail/inbox/".$mh_sequences_file_name)) { while () { warn "newseqfile: $_"; } close IN; } } # otherwise we're fine, exit 0 exit; ########################################################################### sub mh_lock_folder { $locked_folder_lockfile = $folder."/.lock"; system ("lockfile", $locked_folder_lockfile); if ($? >> 8 != 0) { die "failed to lock folder $folder (lockfile $locked_folder_lockfile)\n"; # $locked_folder_lockfile = undef; # not needed, we're dead ;) } } sub mh_unlock_folder { if (defined $locked_folder_lockfile) { unlink $locked_folder_lockfile; $locked_folder_lockfile = undef; } } sub unlock_and_die { mh_unlock_folder(); die "killed by signal\n"; } ########################################################################### use vars qw(@to_unlink @to_rename %num2seq %newseqs %changedseqs); sub mh_rewrite_folder { my ($folder, $ctx) = @_; my $newnum = 0; my %msg_rewritten = (); foreach my $num (@{$ctx->{all_message_locs}}) { $msg_rewritten{$num} = 0; } # read the mh_sequences file, and create the map of sequences in this # folder... cf. man mh-sequence, man mark. %num2seq = (); %newseqs = (); %changedseqs = (); if (open (IN, "<".$folder."/".$mh_sequences_file_name)) { while () { /^([^:]+): (.+)$/ or next; my $seq = $1; my $msgs = $2; $newseqs{$seq} ||= []; $changedseqs{$seq} ||= 0; $dbg and warn "debug: old sequences: seq=$seq nums=$msgs\n"; foreach my $spec (split (' ', $msgs)) { if ($spec =~ /(\d*)\-(\d*)/) { my $start = $1; $start ||= 1; my $end = $2; if (!$end) { # not supposed to happen with nmh at least warn "oops! no end for sequence: '$_'"; $end = 9999; } my $i; for ($i = $start; $i <= $end; $i++) { $num2seq{$i} ||= []; push (@{$num2seq{$i}}, $seq); } } else { $num2seq{$spec} ||= []; push (@{$num2seq{$spec}}, $seq); } } } close IN; } my $changed = 0; foreach my $line (@{$ctx->{sorted}}) { $newnum++; my $oldnum = $line->{num}; my $oldprefix = $line->{existing_prefix} || ''; my $newprefix = $line->{prefix} || ''; $msg_rewritten{$oldnum} = 1; if (mh_rewrite_reorder_message ($ctx, $folder, $oldnum, $newnum, $oldprefix, $newprefix)) { $changed = 1; } } if ($changed) { # now if we missed any messages we read to start with, something's wrong # in the algorithm, and we could lose mail. Don't modify anything; just # die instead. my $failures = 0; foreach my $num (@{$ctx->{all_message_locs}}) { if (!$msg_rewritten{$num}) { warn "mhthread: message not threaded, adding to end: $num\n"; $newnum++; if (!mh_rewrite_reorder_message ($ctx, $folder, $num, $newnum, '', '')) { $failures++; warn "mhthread: oops! failed to recover: $num\n"; } } } if ($failures) { die "mhthread: not modifying old messages due to errors.\n"; } # otherwise, go right ahead and unlink/rename... foreach my $name (@to_unlink) { unlink $name or warn "unlink $name failed: $!"; } foreach my $name (@to_rename) { rename $name.".new", $name or warn "rename $name.new -> $name failed: $@"; } # now mark the sequences with the new message numbering foreach my $seq (keys %newseqs) { if (!$changedseqs{$seq}) { $dbg and warn "debug: new sequences: seq=$seq unchanged\n"; next; } my @messages = @{$newseqs{$seq}}; next unless (scalar @messages > 0); my @cmd = ('mark', '+'.$folder, @messages, '-sequence', $seq, '-add', '-zero'); $dbg and warn "debug: new sequences: seq=$seq cmd=".join (' ', @cmd)."\n"; system @cmd; if ($? >> 8 != 0) { warn "'".join (' ',@cmd)."' failed\n"; } } } } sub mh_rewrite_reorder_message { my ($ctx, $folder, $oldnum, $newnum, $oldprefix, $newprefix) = @_; my $oldname = $folder."/".$oldnum; my $newname = $folder."/".$newnum; if ($oldnum eq $newnum && $oldprefix eq $newprefix) { if ($no_write) { print "no move for $oldnum; subj-pfx '$newprefix'\n"; } $dbg and warn "debug: $oldnum: no differences, skipping move/rewrite\n"; foreach my $seq (@{$num2seq{$oldnum}}) { $dbg and warn "debug: $oldnum: will keep seq $seq\n"; push (@{$newseqs{$seq}}, $oldnum); } return 0; } if ($no_write) { print "mv $oldnum $newnum; subj-pfx '$newprefix'\n"; return 0; } $dbg and warn "debug: $oldnum->$newnum: move/rewrite\n"; if (!open (IN, "<".$oldname)) { warn "cannot read $oldname: $@"; return 0; } open (OUT, ">".$newname.".new") or die "write to $newname.new failed: $@"; while () { # remove an old thread-subject /^X-MH-Thread-Markup: / and next; /^$/ and last; # end of headers print OUT; } print OUT "X-MH-Thread-Markup: ".$newprefix."\n\n"; # dump the body # TODO: use read()/syswrite() while () { print OUT; } close IN; close OUT or die "write to $newname.new failed: $@"; push (@to_unlink, $oldname); push (@to_rename, $newname); foreach my $seq (@{$num2seq{$oldnum}}) { $dbg and warn "debug: $oldnum->$newnum: will fix seq $seq\n"; push (@{$newseqs{$seq}}, $newnum); $changedseqs{$seq} = 1; } return 1; } ########################################################################### # note: these global vars are ONLY used inside thread_folder (and # inside functions called by that fn). They are undef'd at the end # of that function's scope. # use vars qw(%mid2msg %tree %toplevel %subjtop %subjsets %subjearliest $uniqid ); sub thread_folder { local ($_); %mid2msg = ( ); # message-id to msg object %tree = ( ); # the threaded tree %toplevel = ( ); # top-level nodes of the tree %subjtop = ( ); # top-level nodes for a given subject string %subjsets = ( ); # top-level nodes with the same subject %subjearliest = ( ); # date of earliest message with that subject $uniqid = 1; # used to "unique-ify" duplicate message-Ids # %dupmessages = (); my $ctx = { sorted => [ ], all_message_locs => [ ], done => { } }; if ($fast) { eval { use Storable; $ctx->{fcache} = retrieve ($folder."/.thread.tmp"); }; if ($@) { $ctx->{fcache} = { }; } # kill it if it's corrupt } opendir(DIR, $folder) or warn "cannot opendir $folder: $!\n"; my $num; while (defined ($num = readdir(DIR))) { next unless ($num =~ /^(\d+)$/); my $msgpath = $folder."/".$num; my $stat_details; my $cachedmsg; if (defined $ctx->{fcache}) { my @st = stat($msgpath); if (!defined $st[7]) { warn "cannot stat, skipped: $msgpath\n"; next; } $stat_details = join('|', @st[0 .. 5], $st[7], $st[9], $st[10]); $cachedmsg = $ctx->{fcache}->{$num}; my $cstat = (defined($cachedmsg) ? $cachedmsg->{stat_details} : ''); if ($cstat eq $stat_details) { $dbg and warn "debug: $num cached message matches"; } else { $dbg and warn "debug: $num cached message no match ". "($stat_details vs $cstat)"; undef $cachedmsg; } } my $msg; if (defined ($cachedmsg)) { $msg = $cachedmsg; } else { my $hdrs = mh_read_message_headers ($ctx, $num, $msgpath); next unless defined($hdrs); $msg = parse_message_headers ($ctx, $num, $hdrs, $stat_details); } my ($subj, $sortsubj, $intdate, $irt, $re_in_subj, $mid); $subj = $msg->{subj}; $sortsubj = $msg->{sortsubj}; $intdate = $msg->{intdate}; $irt = $msg->{irt}; $re_in_subj = $msg->{re_in_subj}; $mid = $msg->{mid}; push (@{$ctx->{all_message_locs}}, $num); $mid2msg{$mid} = $msg; # create the node for that mid, if not already existing if (!exists $tree{$mid}) { $tree{$mid} = { }; } if (!defined $irt) { $dbg and warn "debug: $msg->{num} no IRT ($msg->{mid} $msg->{intdate})"; add_to_top_level ($msg); } else { my %seen = (); { next if ($seen{$irt}); $seen{$irt} = 1; if (!exists $tree{$irt}) { $tree{$irt} = { }; } $dbg and warn "debug: $msg->{num} IRT $irt ($mid $msg->{intdate})"; $tree{$irt}->{$mid} = $msg; } } } closedir DIR; # store it here. we don't care if we rewrite the order later on, # we just want to cache the least-changing messages in the folder. # Doing caching post-writes will require more logic in the threading # part to update this cache, and let's not bother with that! if ($fast && !$no_write) { eval { use Storable; store ($ctx->{fcache}, $folder."/.thread.tmp"); }; if ($@) { warn "failed to cache folder data: $@"; } } # now find "orphaned" message trees, and put them into the top level foreach my $mid (keys %tree) { next if (defined $mid2msg{$mid}); # it has a parent # OK, this is a message-id used in an In-Reply-To header, but we # don't have the msg. Reparent all its children down to the top-level # instead. foreach my $kid (keys %{$tree{$mid}}) { my $msg = $mid2msg{$kid}; # if it's already in the top-level, ignore it next if (defined $toplevel{$msg->{mid}}); if (!defined $msg) { warn "oops! nonexistent kid for $mid"; } $dbg and warn "debug: $msg->{num} orphaned ($msg->{mid})"; add_to_top_level ($msg); } } sub add_to_top_level { my ($msg) = @_; my $sortsubj = $msg->{sortsubj}; my $intdate = $msg->{intdate}; $toplevel{$msg->{mid}} = $msg; # if there was no re: tag, add it to the 'top' set for that subject line if (!$msg->{re_in_subj}) { $subjtop{$sortsubj} = $msg; } $subjsets{$sortsubj} ||= [ ]; push (@{$subjsets{$sortsubj}}, $msg); if (!exists ($subjearliest{$sortsubj}) || $subjearliest{$sortsubj} > $intdate) { $subjearliest{$sortsubj} = $intdate; } } # ok; try to figure out a rudimentary tree from the Subject line alone, # for messages that did not use 'In-Reply-To'. foreach my $subj (keys %subjsets) { # is there a suitable candidate for a 'parent' message? my $parent = $subjtop{$subj}; if (!defined $parent) { next; } foreach my $msg (@{$subjsets{$subj}}) { if ($msg->{re_in_subj} != 0) { # this msg has "Re:", but has no In-Reply-To. reparent it $tree{$parent->{mid}}->{$msg->{mid}} = $msg; delete $toplevel{$msg->{mid}}; } } } # now recursively display the tree. Sort by the date of the earliest message # with that subject line, and by existing number if there's a dup. foreach my $top (sort { $subjearliest{$toplevel{$a}->{sortsubj}} <=> $subjearliest{$toplevel{$b}->{sortsubj}} or $toplevel{$a}->{intdate} <=> $toplevel{$b}->{intdate} or $toplevel{$b}->{num} cmp $toplevel{$a}->{num} } keys %toplevel) { dig_thru_tree ($ctx, 0, $top, $toplevel{$top}); } foreach my $mid (keys %mid2msg) { if (!$ctx->{done}->{$mid}) { my $msg = $mid2msg{$mid}; $dbg and warn "debug: $msg->{num} missed ($msg->{mid})"; push (@{$ctx->{sorted}}, { num => $msg->{num}, existing_prefix => $msg->{existing_prefix}, prefix => '', subject => $msg->{subj} }); } } # delete these state arrays, they're unnecessary now undef %mid2msg; undef %tree; undef %toplevel; undef %subjtop; undef %subjsets; undef %subjearliest; return $ctx; } sub dig_thru_tree { my ($ctx, $level, $mid, $msg) = @_; my $num = $msg->{num}; my $subj = $msg->{subj}; # OK, we want a result like this: # X-MH-Thread-Markup: =?US-ASCII?Q?=a0=a0=a0=a0=5c=2d=a0?= # just use the encoded string directly, it's quicker and simpler. my $MARKUP_START = '=?US-ASCII?Q?'; my $MARKUP_NODE_LAST_CHILD = '=5c=2d=a0'; # "\- " my $MARKUP_NODE_CHILD_W_SIBLINGS = '=7c=2d=a0'; # "|- " my $MARKUP_TREE_EMPTY = '=a0=a0'; # " " my $MARKUP_TREE_BRANCH = '=7c=a0'; # "| " my $MARKUP_END = '?='; # TODO: limit levels to 3 for UI reasons my $levelstr = $MARKUP_END; my $iterlev = $level; my $itermsg = $msg; while ($iterlev > 0) { if ($iterlev == $level) { if (!defined $itermsg || $itermsg->{last_in_level}) { $levelstr = $MARKUP_NODE_LAST_CHILD.$levelstr; } else { $levelstr = $MARKUP_NODE_CHILD_W_SIBLINGS.$levelstr; } } else { if (!defined $itermsg || $itermsg->{last_in_level}) { $levelstr = $MARKUP_TREE_EMPTY.$levelstr; } else { $levelstr = $MARKUP_TREE_BRANCH.$levelstr; } } # get the msg object for the parent message my $irt = $itermsg->{irt}; if (defined $irt) { $itermsg = $mid2msg{$irt}; } $iterlev--; } $levelstr = $MARKUP_START.$levelstr; # printf ("%4d %s%s\n", $num, $levelstr, $subj); push (@{$ctx->{sorted}}, { num => $num, prefix => $levelstr, existing_prefix => $msg->{existing_prefix}, subject => $levelstr.$subj }); $ctx->{done}->{$mid} = 1; # within the tree, just sort by post date (or by number if there's # a duplicate). my $kids = $tree{$mid}; my @sorted = (sort { $kids->{$b}->{intdate} <=> $kids->{$a}->{intdate} or $kids->{$b}->{num} cmp $kids->{$a}->{num} } keys %{$kids}); if (scalar @sorted > 0) { # mark the last one in this level, so it can be displayed with "\-", # instead of "|-" $kids->{$sorted[$#sorted]}->{last_in_level} = 1; # and recurse foreach my $kid (@sorted) { dig_thru_tree ($ctx, $level+1, $kid, $kids->{$kid}); } } } ########################################################################### sub mh_read_message_headers { my ($ctx, $num, $msgpath) = @_; if (!open (IN, "<".$msgpath)) { warn "cannot open, skipped: $msgpath\n"; return undef; } my $hdrs = "\n"; while () { $hdrs .= $_; /^$/ and last; } close IN; return $hdrs; } ########################################################################### sub parse_message_headers { my ($ctx, $num, $hdrs, $stat_details) = @_; my $mid = ''; my $subj = ''; # these are not stored in the public "msg" object after parsing my $date = ''; my $pfx = ''; # remove newlines from headers; makes them easier to parse $hdrs =~ s/\n[ \t]+/ /gs; # now parse the headers $hdrs =~ /\nMessage-I[dD]:\s+<([^\n]+)>/ and $mid = $1; $hdrs =~ /\nSubject:\s+([^\n]+)/ and $subj = $1; $hdrs =~ /\nDate:\s+([^\n]+)/ and $date = $1; $hdrs =~ /\nX-MH-Thread-Markup:\s+([^\n]+)\n/ and $pfx = $1; my $intdate = parse_rfc822_date ($date); $intdate ||= 0; # ensure the message-id is unique; if it already exists (a dup msg) then add # some extra bits until it's unique. TODO: figure out a workaround to avoid # the ensuing shuffling of messages; with this algo, if messages 4 and 5 are # identical, then they'll always be swapped each time this is run. if (exists $mid2msg{$mid}) { my $origmid = $mid; while (exists $mid2msg{$mid}) { $uniqid++; $mid .= "|".$uniqid; } # $dupmessages{$origmid} ||= [ ]; # push (@{$dupmessages{$origmid}}, { # 'mid' => $mid, # 'num' => $num # }); } # figure out which message this was a child of. Some In-Reply-To hdrs # contain the email addr of the parent's sender, so add all mid-like # ids; no messages will be found with mid == emailaddr, so it doesn't # matter. For the References hdr, just take the last one. my @irtsary = ($hdrs =~ /\nIn-Reply-To: (?:[^<\n]*<([^\n>]+)>)+/); my @refs = ($hdrs =~ /\nReferences: (?:[^<\n]*<([^\n>]+)>)+/); if (scalar @refs != 0) { unshift (@irtsary, pop @refs); @refs = (); } # In-Reply-To = last of ( reference1, reference2, in-rep-to1, in-rep-to2 ) my $irt = pop @irtsary; # now clean up the subject for sorting, and determine if we have a "Re:" tag my $sortsubj = $subj; my $re_in_subj = 0; while (1) { my $was = $sortsubj; $sortsubj =~ s/^re\[\d+\][:;]\s*//ig and $re_in_subj = 1; $sortsubj =~ s/^re[:;]\s*//ig and $re_in_subj = 1; $sortsubj =~ s/^\s*//ig; ($was eq $sortsubj) and last; } my $msg = { num => $num, subj => $subj, sortsubj => $sortsubj, intdate => $intdate, irt => $irt, re_in_subj => $re_in_subj, existing_prefix => $pfx, mid => $mid }; # cache if we're in "fast" mode if (defined $ctx->{fcache}) { $msg->{stat_details} = $stat_details; $ctx->{fcache}->{$num} = $msg; } return $msg; } ########################################################################### # Parse RFC-822-format dates. sub init_tz { # timezone mappings: in case of conflicts, use RFC 2822, then most # common and least conflicting mapping %TZ = ( # standard 'UT' => '+0000', 'UTC' => '+0000', # US and Canada 'AST' => '-0400', 'ADT' => '-0300', 'EST' => '-0500', 'EDT' => '-0400', 'CST' => '-0600', 'CDT' => '-0500', 'MST' => '-0700', 'MDT' => '-0600', 'PST' => '-0800', 'PDT' => '-0700', 'HST' => '-1000', 'AKST' => '-0900', 'AKDT' => '-0800', # European 'GMT' => '+0000', 'BST' => '+0100', 'IST' => '+0100', 'WET' => '+0000', 'WEST' => '+0100', 'CET' => '+0100', 'CEST' => '+0200', 'EET' => '+0200', 'EEST' => '+0300', 'MSK' => '+0300', 'MSD' => '+0400', # Australian 'AEST' => '+1000', 'AEDT' => '+1100', 'ACST' => '+0930', 'ACDT' => '+1030', 'AWST' => '+0800', ); # month mappings %MONTH = (jan => 1, feb => 2, mar => 3, apr => 4, may => 5, jun => 6, jul => 7, aug => 8, sep => 9, oct => 10, nov => 11, dec => 12); } sub parse_rfc822_date { my ($date) = @_; local ($_); my ($yyyy, $mmm, $dd, $hh, $mm, $ss, $mon, $tzoff); # make it a bit easier to match $_ = " $date "; s/, */ /gs; s/\s+/ /gs; # now match it in parts. Date part first: if (s/ (\d+) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d{4}) / /i) { $dd = $1; $mon = lc($2); $yyyy = $3; } elsif (s/ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) +(\d+) \d+:\d+:\d+ (\d{4}) / /i) { $dd = $2; $mon = lc($1); $yyyy = $3; } elsif (s/ (\d+) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d{2,3}) / /i) { $dd = $1; $mon = lc($2); $yyyy = $3; } else { return undef; } # handle two and three digit dates as specified by RFC 2822 if (defined $yyyy) { if (length($yyyy) == 2 && $yyyy < 50) { $yyyy += 2000; } elsif (length($yyyy) != 4) { # three digit years and two digit years with values between 50 and 99 $yyyy += 1900; } } # hh:mm:ss if (s/ (\d?\d):(\d\d)(:(\d\d))? / /) { $hh = $1; $mm = $2; $ss = $4 || 0; } # numeric timezones if (s/ ([-+]\d{4}) / /) { $tzoff = $1; } # UT, GMT, and North American timezones elsif (s/\b([A-Z]{2,4})\b/ / && exists $TZ{$1}) { $tzoff = $TZ{$1}; } # all other timezones are considered equivalent to "-0000" $tzoff ||= '-0000'; # months if (exists $MONTH{$mon}) { $mmm = $MONTH{$mon}; } $hh ||= 0; $mm ||= 0; $ss ||= 0; $dd ||= 0; $mmm ||= 0; $yyyy ||= 0; my $time; eval { # could croak $time = timegm ($ss, $mm, $hh, $dd, $mmm-1, $yyyy); }; if ($@) { return undef; } if ($tzoff =~ /([-+])(\d\d)(\d\d)$/) # convert to seconds difference { $tzoff = (($2 * 60) + $3) * 60; if ($1 eq '-') { $time += $tzoff; } else { $time -= $tzoff; } } return $time; } sub read_mh_profile_data { my $mhprof = $ENV{HOME}."/.mh_profile"; if (defined $ENV{MH}) { $mhprof = $ENV{MH}; } $mh_sequences_file_name = '.mh_sequences'; $mhthread_options = undef; if (open (IN, "<".$mhprof)) { while () { if (/^mh-sequences: (.+)$/) { $mh_sequences_file_name = $1; next; } if (/^mhthread: (.+)$/) { $mhthread_options = $1; next; } } close IN; } }