RDBC/admin/cvs2cl.pl

0001 #!/bin/sh
0002 exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
0003 #!perl -w
0004
0005 ##############################################################
0006 ###                                                        ###
0007 ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
0008 ###                                                        ###
0009 ##############################################################
0010
0011 ## $Revision: 1.1.1.1 $
0012 ## $Date: 2004/02/18 20:58:02 $
0013 ## $Author: dave $
0014 ##
0015 ##   (C) 1999 Karl Fogel <kfogel@red-bean.com>, under the GNU GPL.
0016 ##
0017 ##   (Extensively hacked on by Melissa O'Neill <oneill@cs.sfu.ca>.)
0018 ##
0019 ## cvs2cl.pl is free software; you can redistribute it and/or modify
0020 ## it under the terms of the GNU General Public License as published by
0021 ## the Free Software Foundation; either version 2, or (at your option)
0022 ## any later version.
0023 ##
0024 ## cvs2cl.pl is distributed in the hope that it will be useful,
0025 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
0026 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0027 ## GNU General Public License for more details.
0028 ##
0029 ## You may have received a copy of the GNU General Public License
0030 ## along with cvs2cl.pl; see the file COPYING.  If not, write to the
0031 ## Free Software Foundation, Inc., 59 Temple Place - Suite 330,
0032 ## Boston, MA 02111-1307, USA.
0033
0034
0035
0036 use strict;
0037 use Text::Wrap;
0038 use Time::Local;
0039 use File::Basename;
0040
0041
0042 # The Plan:
0043 #
0044 # Read in the logs for multiple files, spit out a nice ChangeLog that
0045 # mirrors the information entered during `cvs commit'.
0046 #
0047 # The problem presents some challenges. In an ideal world, we could
0048 # detect files with the same author, log message, and checkin time --
0049 # each <filelist, author, time, logmessage> would be a changelog entry.
0050 # We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic*
0051 # so checkins can span a range of times.  Also, the directory structure
0052 # could be hierarchical.
0053 #
0054 # Another question is whether we really want to have the ChangeLog
0055 # exactly reflect commits. An author could issue two related commits,
0056 # with different log entries, reflecting a single logical change to the
0057 # source. GNU style ChangeLogs group these under a single author/date.
0058 # We try to do the same.
0059 #
0060 # So, we parse the output of `cvs log', storing log messages in a
0061 # multilevel hash that stores the mapping:
0062 #   directory => author => time => message => filelist
0063 # As we go, we notice "nearby" commit times and store them together
0064 # (i.e., under the same timestamp), so they appear in the same log
0065 # entry.
0066 #
0067 # When we've read all the logs, we twist this mapping into
0068 # a time => author => message => filelist mapping for each directory.
0069 #
0070 # If we're not using the `--distributed' flag, the directory is always
0071 # considered to be `./', even as descend into subdirectories.
0072
0073
0074 ############### Globals ################
0075
0076
0077 # What we run to generate it:
0078 my $Log_Source_Command = "cvs log";
0079
0080 # In case we have to print it out:
0081 my $VERSION = '$Revision: 1.1.1.1 $';
0082 $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
0083
0084 ## Vars set by options:
0085
0086 # Print debugging messages?
0087 my $Debug = 0;
0088
0089 # Just show version and exit?
0090 my $Print_Version = 0;
0091
0092 # Just print usage message and exit?
0093 my $Print_Usage = 0;
0094
0095 # Single top-level ChangeLog, or one per subdirectory?
0096 my $Distributed = 0;
0097
0098 # What file should we generate (defaults to "ChangeLog")?
0099 my $Log_File_Name = "ChangeLog";
0100
0101 # Expand usernames to email addresses based on a map file?
0102 my $User_Map_File = "";
0103
0104 # Output to a file or to stdout?
0105 my $Output_To_Stdout = 0;
0106
0107 # Eliminate empty log messages?
0108 my $Prune_Empty_Msgs = 0;
0109
0110 # Don't call Text::Wrap on the body of the message
0111 my $No_Wrap = 0;
0112
0113 # Separates header from log message
0114 my $After_Header = " ";
0115
0116 # Format more for programs than for humans.
0117 my $XML_Output = 0;
0118
0119 # Show times in UTC instead of local time
0120 my $UTC_Times = 0;
0121
0122 # Show day of week in output?
0123 my $Show_Day_Of_Week = 0;
0124
0125 # Show revision numbers in output?
0126 my $Show_Revisions = 0;
0127
0128 # Show tags (symbolic names) in output?
0129 my $Show_Tags = 0;
0130
0131 # Show branches by symbolic name in output?
0132 my $Show_Branches = 0;
0133
0134 # Show only revisions on these branches or their ancestors.
0135 my @Follow_Branches;
0136
0137 # Don't bother with files matching this regexp.
0138 my @Ignore_Files;
0139
0140 # How exactly we match entries.  We definitely want "o",
0141 # and user might add "i" by using --case-insensitive option.
0142 my $Case_Insensitive = 0;
0143
0144 # Maybe only show log messages matching a certain regular expression.
0145 my $Regexp_Gate = "";
0146
0147 # Pass this global option string along to cvs, to the left of `log':
0148 my $Global_Opts = "";
0149
0150 # Pass this option string along to the cvs log subcommand:
0151 my $Command_Opts = "";
0152
0153 # Read log output from stdin instead of invoking cvs log?
0154 my $Input_From_Stdin = 0;
0155
0156 # Max checkin duration. CVS checkin is not atomic, so we may have checkin
0157 # times that span a range of time. We assume that checkins will last no
0158 # longer than $Max_Checkin_Duration seconds, and that similarly, no
0159 # checkins will happen from the same users with the same message less
0160 # than $Max_Checkin_Duration seconds apart.
0161 my $Max_Checkin_Duration = 180;
0162
0163 # What to put at the front of [each] ChangeLog.
0164 my $ChangeLog_Header = "";
0165
0166 ## end vars set by options.
0167
0168 # In 'cvs log' output, one long unbroken line of equal signs separates
0169 # files:
0170 my $file_separator = "======================================="
0171                    . "======================================";
0172
0173 # In 'cvs log' output, a shorter line of dashes separates log messages
0174 # within a file:
0175 my $logmsg_separator = "----------------------------";
0176
0177
0178 ############### End globals ############
0179
0180
0181
0182
0183 &parse_options ();
0184 &derive_change_log ();
0185
0186
0187
0188 ### Everything below is subroutine definitions. ###
0189
0190 # Fills up a ChangeLog structure in the current directory.
0191 sub derive_change_log ()
0192 {
0193   # See "The Plan" above for a full explanation.
0194
0195   my %grand_poobah;
0196
0197   my $file_full_path;
0198   my $time;
0199   my $revision;
0200   my $author;
0201   my $msg_txt;
0202   my $detected_file_separator;
0203
0204   # We might be expanding usernames
0205   my %usermap;
0206
0207   # In general, it's probably not very maintainable to use state
0208   # variables like this to tell the loop what it's doing at any given
0209   # moment, but this is only the first one, and if we never have more
0210   # than a few of these, it's okay.
0211   my $collecting_symbolic_names = 0;
0212   my %symbolic_names;    # Where tag names get stored.
0213   my %branch_names;      # We'll grab branch names while we're at it.
0214   my %branch_numbers;    # Save some revisions for @Follow_Branches
0215   my @branch_roots;      # For showing which files are branch ancestors.
0216
0217
0218   if (! $Input_From_Stdin) {
0219     open (LOG_SOURCE, "$Log_Source_Command |")
0220         or die "unable to run \"${Log_Source_Command}\"";
0221   }
0222   else {
0223     open (LOG_SOURCE, "-") or die "unable to open stdin for reading";
0224   }
0225
0226   %usermap = &maybe_read_user_map_file ();
0227
0228   while (<LOG_SOURCE>)
0229   {
0230     # If on a new file and don't see filename, skip until we find it, and
0231     # when we find it, grab it.
0232     if ((! (defined $file_full_path)) and /^Working file: (.*)/) {
0233       $file_full_path = $1;
0234       if (@Ignore_Files) {
0235         my $base;
0236         ($base, undef, undef) = fileparse ($file_full_path);
0237         # Ouch, I wish trailing operators in regexps could be
0238         # evaluated on the fly!
0239         if ($Case_Insensitive) {
0240           if (grep ($file_full_path =~ m|$_|i, @Ignore_Files)) {
0241             undef $file_full_path;
0242           }
0243         }
0244         elsif (grep ($file_full_path =~ m|$_|, @Ignore_Files)) {
0245           undef $file_full_path;
0246         }
0247       }
0248       next;
0249     }
0250
0251     # Just spin wheels if no file defined yet.
0252     next if (! $file_full_path);
0253
0254     # Collect tag names in case we're asked to print them in the output.
0255     if (/^symbolic names:$/) {
0256       $collecting_symbolic_names = 1;
0257       next;  # There's no more info on this line, so skip to next
0258     }
0259     if ($collecting_symbolic_names)
0260     {
0261       # All tag names are listed with whitespace in front in cvs log
0262       # output; so if see non-whitespace, then we're done collecting.
0263       if (/^\S/) {
0264         $collecting_symbolic_names = 0;
0265       }
0266       else    # we're looking at a tag name, so parse & store it
0267       {
0268         # According to the Cederqvist manual, in node "Tags", tag
0269         # names must start with an uppercase or lowercase letter and
0270         # can contain uppercase and lowercase letters, digits, `-',
0271         # and `_'.  However, it's not our place to enforce that, so
0272         # we'll allow anything CVS hands us to be a tag:
0273         /^\s([^:]+): ([\d.]+)$/;
0274         my $tag_name = $1;
0275         my $tag_rev  = $2;
0276
0277         # You can always tell a branch by the ".0." as the
0278         # second-to-last digit in the revision number.
0279         if ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) {
0280           my $real_branch_rev = $1 . $3;
0281           $branch_names{$real_branch_rev} = $tag_name;
0282           if (@Follow_Branches) {
0283             if (grep ($_ eq $tag_name, @Follow_Branches)) {
0284               $branch_numbers{$tag_name} = $real_branch_rev;
0285             }
0286           }
0287         }
0288         else {
0289           # Else it's just a regular (non-branch) tag.
0290           push (@{$symbolic_names{$tag_rev}}, $tag_name);
0291         }
0292       }
0293     }
0294     # End of code for collecting tag names.
0295
0296     # If have file name, but not revision, and see revision, then grab
0297     # it.  (We collect unconditionally, even though we may or may not
0298     # ever use it.)
0299     if ((! (defined $revision)) and (/^revision (\d+\.[\d.]+)/))
0300     {
0301       $revision = $1;
0302
0303       if (@Follow_Branches)
0304       {
0305         foreach my $branch (@Follow_Branches)
0306         {
0307           # Special case for following trunk revisions
0308           if (($branch =~ /^trunk$/i) and ($revision =~ /^[0-9]+\.[0-9]+$/))
0309           {
0310             goto dengo;
0311           }
0312
0313           my $branch_number = $branch_numbers{$branch};
0314           if ($branch_number)
0315           {
0316             # Are we on one of the follow branches or an ancestor of
0317             # same?
0318             #
0319             # If this revision is a prefix of the branch number, or
0320             # possibly is less in the minormost number, OR if this
0321             # branch number is a prefix of the revision, then yes.
0322             # Otherwise, no.
0323             #
0324             # So below, we determine if any of those conditions are
0325             # met.
0326
0327             # Trivial case: is this revision on the branch?
0328             # (Compare this way to avoid regexps that screw up Emacs
0329             # indentation, argh.)
0330             if ((substr ($revision, 0, ((length ($branch_number)) + 1)))
0331                 eq ($branch_number . "."))
0332             {
0333               goto dengo;
0334             }
0335             # Non-trivial case: check if rev is ancestral to branch
0336             elsif ((length ($branch_number)) > (length ($revision)))
0337             {
0338               $revision =~ /^([\d\.]+)(\d+)$/;
0339               my $r_left = $1;          # still has the trailing "."
0340               my $r_end = $2;
0341
0342               $branch_number =~ /^([\d\.]+)(\d+)\.\d+$/;
0343               my $b_left = $1;  # still has trailing "."
0344               my $b_mid  = $2;   # has no trailing "."
0345
0346               if (($r_left eq $b_left)
0347                   && ($r_end <= $b_mid))
0348               {
0349                 goto dengo;
0350               }
0351             }
0352           }
0353         }
0354       }
0355       else    # (! @Follow_Branches)
0356       {
0357         next;
0358       }
0359
0360       # Else we are following branches, but this revision isn't on the
0361       # path.  So skip it.
0362       undef $revision;
0363     dengo:
0364       next;
0365     }
0366
0367     # If we don't have a revision right now, we couldn't possibly
0368     # be looking at anything useful.
0369     if (! (defined ($revision))) {
0370       $detected_file_separator = /^$file_separator$/o;
0371       if ($detected_file_separator) {
0372         # No revisions for this file; can happen, e.g. "cvs log -d DATE"
0373         goto CLEAR;
0374       }
0375       else {
0376         next;
0377       }
0378     }
0379
0380     # If have file name but not date and author, and see date or
0381     # author, then grab them:
0382     unless (defined $time) {
0383       if (/^date: .*/)
0384       {
0385         ($time, $author) = &parse_date_and_author ($_);
0386         if (defined ($usermap{$author}) and $usermap{$author}) {
0387           $author = $usermap{$author};
0388         }
0389       }
0390       else {
0391         $detected_file_separator = /^$file_separator$/o;
0392         if ($detected_file_separator) {
0393           # No revisions for this file; can happen, e.g. "cvs log -d DATE"
0394           goto CLEAR;
0395         }
0396       }
0397       # If the date/time/author hasn't been found yet, we couldn't
0398       # possibly care about anything we see.  So skip:
0399       next;
0400     }
0401
0402     # A "branches: ..." line here indicates that one or more branches
0403     # are rooted at this revision.  If we're showing branches, then we
0404     # want to show that fact as well, so we collect all the branches
0405     # that this is the latest ancestor of and store them in
0406     # @branch_roots.  Just for reference, the format of the line we're
0407     # seeing at this point is:
0408     #
0409     #    branches:  1.5.2;  1.5.4;  ...;
0410     #
0411     # Okay, here goes:
0412
0413     if (/^branches:\s+(.*);$/)
0414     {
0415       if ($Show_Branches)
0416       {
0417         my $lst = $1;
0418         $lst =~ s/(1\.)+1;|(1\.)+1$//;  # ignore the trivial branch 1.1.1
0419         if ($lst) {
0420           @branch_roots = split (/;\s+/, $lst);
0421         }
0422         else {
0423           undef @branch_roots;
0424         }
0425         next;
0426       }
0427       else
0428       {
0429         # Ugh.  This really bothers me.  Suppose we see a log entry
0430         # like this:
0431         #
0432         #    ----------------------------
0433         #    revision 1.1
0434         #    date: 1999/10/17 03:07:38;  author: jrandom;  state: Exp;
0435         #    branches:  1.1.2;
0436         #    Intended first line of log message begins here.
0437         #    ----------------------------
0438         #
0439         # The question is, how we can tell the difference between that
0440         # log message and a *two*-line log message whose first line is
0441         #
0442         #    "branches:  1.1.2;"
0443         #
0444         # See the problem?  The output of "cvs log" is inherently
0445         # ambiguous.
0446         #
0447         # For now, we punt: we liberally assume that people don't
0448         # write log messages like that, and just toss a "branches:"
0449         # line if we see it but are not showing branches.  I hope no
0450         # one ever loses real log data because of this.
0451         next;
0452       }
0453     }
0454
0455     # If have file name, time, and author, then we're just grabbing
0456     # log message texts:
0457     $detected_file_separator = /^$file_separator$/o;
0458     if ($detected_file_separator && ! (defined $revision)) {
0459       # No revisions for this file; can happen, e.g. "cvs log -d DATE"
0460       goto CLEAR;
0461     }
0462     unless ($detected_file_separator || /^$logmsg_separator$/o)
0463     {
0464       $msg_txt .= $_;   # Normally, just accumulate the message...
0465       next;
0466     }
0467     # ... until a msg separator is encountered:
0468     # Ensure the message contains something:
0469     if ((! $msg_txt)
0470         || ($msg_txt =~ /^\s*\.\s*$|^\s*$/)
0471         || ($msg_txt =~ /\*\*\* empty log message \*\*\*/)) {
0472       if ($Prune_Empty_Msgs) {
0473         goto CLEAR;
0474       }
0475       # else
0476       $msg_txt = "[no log message]\n";
0477     }
0478
0479     ### Store it all in the Grand Poobah:
0480     {
0481       my $dir_key;        # key into %grand_poobah
0482       my %qunk;           # complicated little jobbie, see below
0483
0484       # Each revision of a file has a little data structure (a `qunk')
0485       # associated with it.  That data structure holds not only the
0486       # file's name, but any additional information about the file
0487       # that might be needed in the output, such as the revision
0488       # number, tags, branches, etc.  The reason to have these things
0489       # arranged in a data structure, instead of just appending them
0490       # textually to the file's name, is that we may want to do a
0491       # little rearranging later as we write the output.  For example,
0492       # all the files on a given tag/branch will go together, followed
0493       # by the tag in parentheses (so trunk or otherwise non-tagged
0494       # files would go at the end of the file list for a given log
0495       # message).  This rearrangement is a lot easier to do if we
0496       # don't have to reparse the text.
0497       #
0498       # A qunk looks like this:
0499       #
0500       #   {
0501       #     filename    =>    "hello.c",
0502       #     revision    =>    "1.4.3.2",
0503       #     time        =>    a timegm() return value (moment of commit)
0504       #     tags        =>    [ "tag1", "tag2", ... ],
0505       #     branch      =>    "branchname" # There should be only one, right?
0506       #     branchroots =>    [ "branchtag1", "branchtag2", ... ]
0507       #   }
0508
0509       if ($Distributed) {
0510         # Just the basename, don't include the path.
0511         ($qunk{'filename'}, $dir_key, undef) = fileparse ($file_full_path);
0512       }
0513       else {
0514         $dir_key = "./";
0515         $qunk{'filename'} = $file_full_path;
0516       }
0517
0518       # This may someday be used in a more sophisticated calculation
0519       # of what other files are involved in this commit.  For now, we
0520       # don't use it, because the common-commit-detection algorithm is
0521       # hypothesized to be "good enough" as it stands.
0522       $qunk{'time'} = $time;
0523
0524       # We might be including revision numbers and/or tags and/or
0525       # branch names in the output.  Most of the code from here to
0526       # loop-end deals with organizing these in qunk.
0527
0528       $qunk{'revision'} = $revision;
0529
0530       # Grab the branch, even though we may or may not need it:
0531       $qunk{'revision'} =~ /([\d.]+)\d+/;
0532       my $branch_prefix = $1;
0533       $branch_prefix =~ s/\.$//;  # strip off final dot
0534       if ($branch_names{$branch_prefix}) {
0535         $qunk{'branch'} = $branch_names{$branch_prefix};
0536       }
0537
0538       # If there's anything in the @branch_roots array, then this
0539       # revision is the root of at least one branch.  We'll display
0540       # them as branch names instead of revision numbers, the
0541       # substitution for which is done directly in the array:
0542       if (@branch_roots) {
0543         my @roots = map { $branch_names{$_} } @branch_roots;
0544         $qunk{'branchroots'} = \@roots;
0545       }
0546
0547       # Save tags too.
0548       if (defined ($symbolic_names{$revision})) {
0549         $qunk{'tags'} = $symbolic_names{$revision};
0550         delete $symbolic_names{$revision};
0551       }
0552
0553       # Add this file to the list
0554       # (We use many spoonfuls of autovivication magic. Hashes and arrays
0555       # will spring into existence if they aren't there already.)
0556
0557       &debug ("(pushing log msg for ${dir_key}$qunk{'filename'})\n");
0558
0559       # Store with the files in this commit.  Later we'll loop through
0560       # again, making sure that revisions with the same log message
0561       # and nearby commit times are grouped together as one commit.
0562       push (@{$grand_poobah{$dir_key}{$author}{$time}{$msg_txt}}, \%qunk);
0563     }
0564
0565   CLEAR:
0566     # Make way for the next message
0567     undef $msg_txt;
0568     undef $time;
0569     undef $revision;
0570     undef $author;
0571     undef @branch_roots;
0572
0573     # Maybe even make way for the next file:
0574     if ($detected_file_separator) {
0575       undef $file_full_path;
0576       undef %branch_names;
0577     }
0578   }
0579
0580   close (LOG_SOURCE);
0581
0582   ### Process each ChangeLog
0583
0584   while (my ($dir,$authorhash) = each %grand_poobah)
0585   {
0586     &debug ("DOING DIR: $dir\n");
0587
0588     # Here we twist our hash around, from being
0589     #   author => time => message => filelist
0590     # in %$authorhash to
0591     #   time => author => message => filelist
0592     # in %changelog.
0593     #
0594     # This is also where we merge entries.  The algorithm proceeds
0595     # through the timeline of the changelog with a sliding window of
0596     # $Max_Checkin_Duration seconds; within that window, entries that
0597     # have the same log message are merged.
0598     #
0599     # (To save space, we zap %$authorhash after we've copied
0600     # everything out of it.)
0601
0602     my %changelog;
0603     while (my ($author,$timehash) = each %$authorhash)
0604     {
0605       my $lasttime;
0606       my %stamptime;
0607       foreach my $time (sort {$main::a <=> $main::b} (keys %$timehash))
0608       {
0609         my $msghash = $timehash->{$time};
0610         while (my ($msg,$qunklist) = each %$msghash)
0611         {
0612       my $stamptime = $stamptime{$msg};
0613           if ((defined $stamptime)
0614               and (($time - $stamptime) < $Max_Checkin_Duration)
0615               and (defined $changelog{$stamptime}{$author}{$msg}))
0616           {
0617         push(@{$changelog{$stamptime}{$author}{$msg}}, @$qunklist);
0618           }
0619           else {
0620             $changelog{$time}{$author}{$msg} = $qunklist;
0621             $stamptime{$msg} = $time;
0622           }
0623         }
0624       }
0625     }
0626     undef (%$authorhash);
0627
0628     ### Now we can write out the ChangeLog!
0629
0630     my ($logfile_here, $logfile_bak, $tmpfile);
0631
0632     if (! $Output_To_Stdout) {
0633       $logfile_here =  $dir . $Log_File_Name;
0634       $logfile_here =~ s/^\.\/\//\//;   # fix any leading ".//" problem
0635       $tmpfile      = "${logfile_here}.cvs2cl$$.tmp";
0636       $logfile_bak  = "${logfile_here}.bak";
0637
0638       open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
0639     }
0640     else {
0641       open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
0642     }
0643
0644     print LOG_OUT $ChangeLog_Header;
0645
0646     if ($XML_Output) {
0647       print LOG_OUT "<?xml version=\"1.0\"?>\n\n<changelog>\n\n";
0648     }
0649
0650     foreach my $time (sort {$main::b <=> $main::a} (keys %changelog))
0651     {
0652       my $authorhash = $changelog{$time};
0653       while (my ($author,$mesghash) = each %$authorhash)
0654       {
0655         # If XML, escape in outer loop to avoid compound quoting:
0656         if ($XML_Output) {
0657           $author = &xml_escape ($author);
0658         }
0659
0660         while (my ($msg,$qunklist) = each %$mesghash)
0661         {
0662           my $files               = &pretty_file_list ($qunklist);
0663           my $logtext             = &pretty_msg_text ($msg);
0664           my $header_line;          # date and author
0665           my $body;                 # see below
0666           my $wholething;           # $header_line + $body
0667
0668           # Set up the date/author line.
0669           # kff todo: do some more XML munging here, on the header
0670           # part of the entry:
0671           my ($ignore,$min,$hour,$mday,$mon,$year,$wday)
0672               = $UTC_Times ? gmtime($time) : localtime($time);
0673
0674           # XML output includes everything else, we might as well make
0675           # it always include Day Of Week too, for consistency.
0676           if ($Show_Day_Of_Week or $XML_Output) {
0677             $wday = ("Sunday", "Monday", "Tuesday", "Wednesday",
0678                      "Thursday", "Friday", "Saturday")[$wday];
0679             $wday = ($XML_Output) ? "<weekday>${wday}</weekday>\n" : " $wday";
0680           }
0681           else {
0682             $wday = "";
0683           }
0684
0685           if ($XML_Output) {
0686             $header_line =
0687                 sprintf ("<date>%4u-%02u-%02u</date>\n"
0688                          . "${wday}"
0689                          . "<time>%02u:%02u</time>\n"
0690                          . "<author>%s</author>\n",
0691                          $year+1900, $mon+1, $mday, $hour, $min, $author);
0692           }
0693           else {
0694             $header_line =
0695                 sprintf ("%4u-%02u-%02u${wday} %02u:%02u  %s\n\n",
0696                          $year+1900, $mon+1, $mday, $hour, $min, $author);
0697           }
0698
0699           # Reshape the body according to user preferences.
0700           if ($XML_Output) {
0701             $body = $files . $logtext;
0702           }
0703           elsif ($No_Wrap) {
0704             $files = wrap ("\t", "  ", "$files");
0705             $logtext =~ s/\n(.*)/\n\t$1/g;
0706             unless ($After_Header eq " ") {
0707               $logtext =~ s/^(.*)/\t$1/g;
0708             }
0709             $body = $files . $After_Header . $logtext;
0710           }
0711           else {
0712             $body = $files . $After_Header . $logtext;
0713             $body = wrap ("\t", "        ", "$body");
0714           }
0715
0716           $wholething = $header_line . $body;
0717
0718           if ($XML_Output) {
0719             $wholething = "<entry>\n${wholething}</entry>\n";
0720           }
0721
0722           # One last check: make sure it passes the regexp test, if the
0723           # user asked for that.  We have to do it here, so that the
0724           # test can match against information in the header as well
0725           # as in the text of the log message.
0726
0727           # How annoying to duplicate so much code just because I
0728           # can't figure out a way to evaluate scalars on the trailing
0729           # operator portion of a regular expression.  Grrr.
0730           if ($Case_Insensitive) {
0731             unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/oi)) {
0732               print LOG_OUT "${wholething}\n";
0733             }
0734           }
0735           else {
0736             unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/o)) {
0737               print LOG_OUT "${wholething}\n";
0738             }
0739           }
0740         }
0741       }
0742     }
0743
0744     if ($XML_Output) {
0745       print LOG_OUT "</changelog>\n";
0746     }
0747
0748     close (LOG_OUT);
0749
0750     if (! $Output_To_Stdout)
0751     {
0752       if (-f $logfile_here) {
0753         rename ($logfile_here, $logfile_bak);
0754       }
0755       rename ($tmpfile, $logfile_here);
0756     }
0757   }
0758 }
0759
0760
0761 sub parse_date_and_author ()
0762 {
0763   # Parses the date/time and author out of a line like:
0764   #
0765   # date: 1999/02/19 23:29:05;  author: apharris;  state: Exp;
0766
0767   my $line = shift;
0768
0769   my ($year, $mon, $mday, $hours, $min, $secs, $author) = $line =~
0770       m#(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+);\s+author:\s+([^;]+);#
0771           or  die "Couldn't parse date ``$line''";
0772   die "Bad date or Y2K issues" unless ($year > 1969 and $year < 2258);
0773   # Kinda arbitrary, but useful as a sanity check
0774   my $time = timegm($secs,$min,$hours,$mday,$mon-1,$year-1900);
0775
0776   return ($time, $author);
0777 }
0778
0779
0780 # Here we take a bunch of qunks and convert them into printed
0781 # summary that will include all the information the user asked for.
0782 sub pretty_file_list ()
0783 {
0784   my $qunksref = shift;
0785   my @qunkrefs = @$qunksref;
0786   my @filenames;
0787   my $beauty = "";          # The accumulating header string for this entry.
0788   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
0789   my %unanimous_tags;       # Tags found in all qunks
0790   my %all_branches;         # Branches found in any qunk
0791   my $common_dir;           # Dir of all files, or "" if no common dir
0792   my $fbegun = 0;           # Did we begin printing filenames yet?
0793
0794   # First, loop over the qunks gathering all the tag/branch names.
0795   # We'll put them all in non_unanimous_tags, and take out the
0796   # unanimous ones later.
0797   foreach my $qunkref (@qunkrefs)
0798   {
0799     # Keep track of whether all the files in this commit were in the
0800     # same directory, and memorize it if so.  We can make the output a
0801     # little more compact by mentioning the directory only once.
0802     if ((scalar (@qunkrefs)) > 1)
0803     {
0804       if (! (defined ($common_dir))) {
0805         my ($base, $dir);
0806         ($base, $dir, undef) = fileparse ($$qunkref{'filename'});
0807
0808         if (($dir eq "./") || ($dir eq ".\\")) {
0809           $common_dir = "";
0810         }
0811         else {
0812           $common_dir = $dir;
0813         }
0814
0815         ($dir eq "./") ? ($common_dir = "") : ($common_dir = $dir);
0816       }
0817       elsif ($common_dir) {
0818         $common_dir = &common_path_prefix ($$qunkref{'filename'}, $common_dir);
0819       }
0820     }
0821     else  # only one file in this entry anyway, so common dir not an issue
0822     {
0823       $common_dir = "";
0824     }
0825
0826     if (defined ($$qunkref{'branch'})) {
0827       $all_branches{$$qunkref{'branch'}} = 1;
0828     }
0829     if (defined ($$qunkref{'tags'})) {
0830       foreach my $tag (@{$$qunkref{'tags'}}) {
0831         $non_unanimous_tags{$tag} = 1;
0832       }
0833     }
0834   }
0835
0836   # Any tag held by all qunks will be printed specially... but only if
0837   # there are multiple qunks in the first place!
0838   if ((scalar (@qunkrefs)) > 1) {
0839     foreach my $tag (keys (%non_unanimous_tags)) {
0840       my $everyone_has_this_tag = 1;
0841       foreach my $qunkref (@qunkrefs) {
0842         if ((! (defined ($$qunkref{'tags'})))
0843             or (! (grep ($_ eq $tag, @{$$qunkref{'tags'}})))) {
0844           $everyone_has_this_tag = 0;
0845         }
0846       }
0847       if ($everyone_has_this_tag) {
0848         $unanimous_tags{$tag} = 1;
0849         delete $non_unanimous_tags{$tag};
0850       }
0851     }
0852   }
0853
0854   if ($XML_Output)
0855   {
0856     # If outputting XML, then our task is pretty simple, because we
0857     # don't have to detect common dir, common tags, branch prefixing,
0858     # etc.  We just output exactly what we have, and don't worry about
0859     # redundancy or readability.
0860
0861     foreach my $qunkref (@qunkrefs)
0862     {
0863       my $filename    = $$qunkref{'filename'};
0864       my $revision    = $$qunkref{'revision'};
0865       my $tags        = $$qunkref{'tags'};
0866       my $branch      = $$qunkref{'branch'};
0867       my $branchroots = $$qunkref{'branchroots'};
0868
0869       $filename = &xml_escape ($filename);   # probably paranoia
0870       $revision = &xml_escape ($revision);   # definitely paranoia
0871
0872       $beauty .= "<file>\n";
0873       $beauty .= "<name>${filename}</name>\n";
0874       $beauty .= "<revision>${revision}</revision>\n";
0875       if ($branch) {
0876         $branch   = &xml_escape ($branch);     # more paranoia
0877         $beauty .= "<branch>${branch}</branch>\n";
0878       }
0879       foreach my $tag (@$tags) {
0880         $tag = &xml_escape ($tag);  # by now you're used to the paranoia
0881         $beauty .= "<tag>${tag}</tag>\n";
0882       }
0883       foreach my $root (@$branchroots) {
0884         $root = &xml_escape ($root);  # which is good, because it will continue
0885         $beauty .= "<branchroot>${root}</branchroot>\n";
0886       }
0887       $beauty .= "</file>\n";
0888     }
0889
0890     # Theoretically, we could go home now.  But as long as we're here,
0891     # let's print out the common_dir and utags, as a convenience to
0892     # the receiver (after all, earlier code calculated that stuff
0893     # anyway, so we might as well take advantage of it).
0894
0895     if ((scalar (keys (%unanimous_tags))) > 1) {
0896       foreach my $utag ((keys (%unanimous_tags))) {
0897         $utag = &xml_escape ($utag);   # the usual paranoia
0898         $beauty .= "<utag>${utag}</utag>\n";
0899       }
0900     }
0901     if ($common_dir) {
0902       $common_dir = &xml_escape ($common_dir);
0903       $beauty .= "<commondir>${common_dir}</commondir>\n";
0904     }
0905
0906     # That's enough for XML, time to go home:
0907     return $beauty;
0908   }
0909
0910   # Else not XML output, so complexly compactify for chordate
0911   # consumption.  At this point we have enough global information
0912   # about all the qunks to organize them non-redundantly for output.
0913
0914   if ($common_dir) {
0915     # Note that $common_dir still has its trailing slash
0916     $beauty .= "$common_dir: ";
0917   }
0918
0919   if ($Show_Branches)
0920   {
0921     # For trailing revision numbers.
0922     my @brevisions;
0923
0924     foreach my $branch (keys (%all_branches))
0925     {
0926       foreach my $qunkref (@qunkrefs)
0927       {
0928         if ((defined ($$qunkref{'branch'}))
0929             and ($$qunkref{'branch'} eq $branch))
0930         {
0931           if ($fbegun) {
0932             # kff todo: comma-delimited in XML too?  Sure.
0933             $beauty .= ", ";
0934           }
0935           else {
0936             $fbegun = 1;
0937           }
0938           my $fname = substr ($$qunkref{'filename'}, length ($common_dir));
0939           $beauty .= $fname;
0940           $$qunkref{'printed'} = 1;  # Just setting a mark bit, basically
0941
0942           if ($Show_Tags && (defined @{$$qunkref{'tags'}})) {
0943             my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}});
0944             if (@tags) {
0945               $beauty .= " (tags: ";
0946               $beauty .= join (', ', @tags);
0947               $beauty .= ")";
0948             }
0949           }
0950
0951           if ($Show_Revisions) {
0952             # Collect the revision numbers' last components, but don't
0953             # print them -- they'll get printed with the branch name
0954             # later.
0955             $$qunkref{'revision'} =~ /.+\.([\d])+$/;
0956             push (@brevisions, $1);
0957
0958             # todo: we're still collecting branch roots, but we're not
0959             # showing them anywhere.  If we do show them, it would be
0960             # nifty to just call them revision "0" on a the branch.
0961             # Yeah, that's the ticket.
0962           }
0963         }
0964       }
0965       $beauty .= " ($branch";
0966       if (@brevisions) {
0967         if ((scalar (@brevisions)) > 1) {
0968           $beauty .= ".[";
0969           $beauty .= (join (',', @brevisions));
0970           $beauty .= "]";
0971         }
0972         else {
0973           $beauty .= ".$brevisions[0]";
0974         }
0975       }
0976       $beauty .= ")";
0977     }
0978   }
0979
0980   # Okay; any qunks that were done according to branch are taken care
0981   # of, and marked as printed.  Now print everyone else.
0982
0983   foreach my $qunkref (@qunkrefs)
0984   {
0985     next if (defined ($$qunkref{'printed'}));   # skip if already printed
0986
0987     if ($fbegun) {
0988       $beauty .= ", ";
0989     }
0990     else {
0991       $fbegun = 1;
0992     }
0993     $beauty .= substr ($$qunkref{'filename'}, length ($common_dir));
0994     $$qunkref{'printed'} = 1;  # Set a mark bit.
0995
0996     if ($Show_Revisions || $Show_Tags)
0997     {
0998       my $started_addendum = 0;
0999
1000       if ($Show_Revisions) {
1001         $started_addendum = 1;
1002         $beauty .= " (";
1003         $beauty .= "$$qunkref{'revision'}";
1004       }
1005       if ($Show_Tags && (defined $$qunkref{'tags'})) {
1006         my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}});
1007         if ((scalar (@tags)) > 0) {
1008           if ($started_addendum) {
1009             $beauty .= ", ";
1010           }
1011           else {
1012             $beauty .= " (tags: ";
1013           }
1014           $beauty .= join (', ', @tags);
1015           $started_addendum = 1;
1016         }
1017       }
1018       if ($started_addendum) {
1019         $beauty .= ")";
1020       }
1021     }
1022   }
1023
1024   # Unanimous tags always come last.
1025   if ($Show_Tags && %unanimous_tags)
1026   {
1027     $beauty .= " (utags: ";
1028     $beauty .= join (', ', keys (%unanimous_tags));
1029     $beauty .= ")";
1030   }
1031
1032   # todo: still have to take care of branch_roots?
1033
1034   $beauty = "* $beauty:";
1035
1036   return $beauty;
1037 }
1038
1039
1040 sub common_path_prefix ()
1041 {
1042   my $path1 = shift;
1043   my $path2 = shift;
1044
1045   my ($dir1, $dir2);
1046   (undef, $dir1, undef) = fileparse ($path1);
1047   (undef, $dir2, undef) = fileparse ($path2);
1048
1049   # Transmogrify Windows filenames to look like Unix.
1050   # (It is far more likely that someone is running cvs2cl.pl under
1051   # Windows than that they would genuinely have backslashes in their
1052   # filenames.)
1053   $dir1 =~ tr#\\#/#;
1054   $dir2 =~ tr#\\#/#;
1055
1056   my $accum1 = "";
1057   my $accum2 = "";
1058   my $last_common_prefix = "";
1059
1060   while ($accum1 eq $accum2)
1061   {
1062     $last_common_prefix = $accum1;
1063     last if ($accum1 eq $dir1);
1064     my ($tmp1) = split (/\//, (substr ($dir1, length ($accum1))));
1065     my ($tmp2) = split (/\//, (substr ($dir2, length ($accum2))));
1066     $accum1 .= "$tmp1/" if ((defined ($tmp1)) and $tmp1);
1067     $accum2 .= "$tmp2/" if ((defined ($tmp2)) and $tmp2);
1068   }
1069
1070   return $last_common_prefix;
1071 }
1072
1073
1074 sub pretty_msg_text ()
1075 {
1076   my $text = shift;
1077
1078   # Strip out carriage returns (as they probably result from DOSsy editors).
1079   $text =~ s/\r\n/\n/g;
1080
1081   # If it *looks* like two newlines, make it *be* two newlines:
1082   $text =~ s/\n\s*\n/\n\n/g;
1083
1084   if ($XML_Output)
1085   {
1086     $text = &xml_escape ($text);
1087     $text = "<msg>${text}</msg>\n";
1088   }
1089   elsif (! $No_Wrap)
1090   {
1091     # Strip off lone newlines, but only for lines that don't begin with
1092     # whitespace or a mail-quoting character, since we want to preserve
1093     # that kind of formatting.  Also don't strip newlines that follow a
1094     # period; we handle those specially next.
1095     1 while ($text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g);
1096
1097     # If a newline follows a period, make sure that when we bring up the
1098     # bottom sentence, it begins with two spaces.
1099     1 while ($text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g);
1100   }
1101
1102   return $text;
1103 }
1104
1105
1106 sub xml_escape ()
1107 {
1108   my $txt = shift;
1109   $txt =~ s/&/&amp;/g;
1110   $txt =~ s/</&lt;/g;
1111   $txt =~ s/>/&gt;/g;
1112   return $txt;
1113 }
1114
1115
1116 sub maybe_read_user_map_file ()
1117 {
1118   my %expansions;
1119
1120   if ($User_Map_File)
1121   {
1122     open (MAPFILE, "<$User_Map_File")
1123         or die ("Unable to open $User_Map_File ($!)");
1124
1125     while (<MAPFILE>)
1126     {
1127       my ($username, $expansion) = split ':';
1128       chomp $expansion;
1129       $expansion =~ s/^'(.*)'$/$1/;
1130       $expansion =~ s/^"(.*)"$/$1/;
1131
1132       # If it looks like the expansion has a real name already, then
1133       # we toss the username we got from CVS log.  Otherwise, keep
1134       # it to use in combination with the email address.
1135
1136       if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
1137         # Also, add angle brackets if none present
1138         if (! ($expansion =~ /<\S+@\S+>/)) {
1139           $expansions{$username} = "$username <$expansion>";
1140         }
1141         else {
1142           $expansions{$username} = "$username $expansion";
1143         }
1144       }
1145       else {
1146         $expansions{$username} = $expansion;
1147       }
1148     }
1149
1150     close (MAPFILE);
1151   }
1152
1153   return %expansions;
1154 }
1155
1156
1157 sub parse_options ()
1158 {
1159   # Check this internally before setting the global variable.
1160   my $output_file;
1161
1162   # If this gets set, we encountered unknown options and will exit at
1163   # the end of this subroutine.
1164   my $exit_with_admonishment = 0;
1165
1166   while (my $arg = shift (@ARGV))
1167   {
1168     if ($arg =~ /^-h$|^-help$|^--help$|^--usage$|^-?$/) {
1169       $Print_Usage = 1;
1170     }
1171     elsif ($arg =~ /^--debug$/) {        # unadvertised option, heh
1172       $Debug = 1;
1173     }
1174     elsif ($arg =~ /^--version$/) {
1175       $Print_Version = 1;
1176     }
1177     elsif ($arg =~ /^-g$|^--global-opts$/) {
1178       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1179       # Don't assume CVS is called "cvs" on the user's system:
1180       $Log_Source_Command =~ s/(^\S*)/$1 $narg/;
1181     }
1182     elsif ($arg =~ /^-l$|^--log-opts$/) {
1183       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1184       $Log_Source_Command .= " $narg";
1185     }
1186     elsif ($arg =~ /^-f$|^--file$/) {
1187       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1188       $output_file = $narg;
1189     }
1190     elsif ($arg =~ /^-U$|^--usermap$/) {
1191       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1192       $User_Map_File = $narg;
1193     }
1194     elsif ($arg =~ /^-W$|^--window$/) {
1195       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1196       $Max_Checkin_Duration = $narg;
1197     }
1198     elsif ($arg =~ /^-I$|^--ignore$/) {
1199       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1200       push (@Ignore_Files, $narg);
1201     }
1202     elsif ($arg =~ /^-C$|^--case-insensitive$/) {
1203       $Case_Insensitive = 1;
1204     }
1205     elsif ($arg =~ /^-R$|^--regexp$/) {
1206       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1207       $Regexp_Gate = $narg;
1208     }
1209     elsif ($arg =~ /^--stdout$/) {
1210       $Output_To_Stdout = 1;
1211     }
1212     elsif ($arg =~ /^--version$/) {
1213       $Print_Version = 1;
1214     }
1215     elsif ($arg =~ /^-d$|^--distributed$/) {
1216       $Distributed = 1;
1217     }
1218     elsif ($arg =~ /^-P$|^--prune$/) {
1219       $Prune_Empty_Msgs = 1;
1220     }
1221     elsif ($arg =~ /^-S$|^--separate-header$/) {
1222       #$After_Header = "\n\n";
1223       $After_Header = "\n";
1224     }
1225     elsif ($arg =~ /^--no-wrap$/) {
1226       $No_Wrap = 1;
1227     }
1228     elsif ($arg =~ /^--gmt$|^--utc$/) {
1229       $UTC_Times = 1;
1230     }
1231     elsif ($arg =~ /^-w$|^--day-of-week$/) {
1232       $Show_Day_Of_Week = 1;
1233     }
1234     elsif ($arg =~ /^-r$|^--revisions$/) {
1235       $Show_Revisions = 1;
1236     }
1237     elsif ($arg =~ /^-t$|^--tags$/) {
1238       $Show_Tags = 1;
1239     }
1240     elsif ($arg =~ /^-b$|^--branches$/) {
1241       $Show_Branches = 1;
1242     }
1243     elsif ($arg =~ /^-F$|^--follow$/) {
1244       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1245       push (@Follow_Branches, $narg);
1246     }
1247     elsif ($arg =~ /^--stdin$/) {
1248       $Input_From_Stdin = 1;
1249     }
1250     elsif ($arg =~ /^--header$/) {
1251       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1252       $ChangeLog_Header = &slurp_file ($narg);
1253       if (! defined ($ChangeLog_Header)) {
1254         $ChangeLog_Header = "";
1255       }
1256     }
1257     elsif ($arg =~ /^--xml$/) {
1258       $XML_Output = 1;
1259     }
1260     else {
1261       # Just add a filename as argument to the log command
1262       $Log_Source_Command .= " $arg";
1263     }
1264   }
1265
1266   ## Check for contradictions...
1267
1268   if ($Output_To_Stdout && $Distributed) {
1269     print STDERR "cannot pass both --stdout and --distributed\n";
1270     $exit_with_admonishment = 1;
1271   }
1272
1273   if ($Output_To_Stdout && $output_file) {
1274     print STDERR "cannot pass both --stdout and --file\n";
1275     $exit_with_admonishment = 1;
1276   }
1277
1278   # Or if any other error message has already been printed out, we
1279   # just leave now:
1280   if ($exit_with_admonishment) {
1281     &usage ();
1282     exit (1);
1283   }
1284   elsif ($Print_Usage) {
1285     &usage ();
1286     exit (0);
1287   }
1288   elsif ($Print_Version) {
1289     &version ();
1290     exit (0);
1291   }
1292
1293   ## Else no problems, so proceed.
1294
1295   if ($Output_To_Stdout) {
1296     undef $Log_File_Name;       # not actually necessary
1297   }
1298   elsif ($output_file) {
1299     $Log_File_Name = $output_file;
1300   }
1301 }
1302
1303
1304 sub slurp_file ()
1305 {
1306   my $filename = shift || die ("no filename passed to slurp_file()");
1307   my $retstr;
1308
1309   open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
1310   my $saved_sep = $/;
1311   undef $/;
1312   $retstr = <SLURPEE>;
1313   $/ = $saved_sep;
1314   close (SLURPEE);
1315   return $retstr;
1316 }
1317
1318
1319 sub debug ()
1320 {
1321   if ($Debug) {
1322     my $msg = shift;
1323     print STDERR $msg;
1324   }
1325 }
1326
1327
1328 sub version ()
1329 {
1330   print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
1331 }
1332
1333
1334 sub usage ()
1335 {
1336   &version ();
1337   print <<'END_OF_INFO';
1338 Generate GNU-style ChangeLogs in CVS working copies.
1339
1340 Notes about the output format(s):
1341
1342    The default output of cvs2cl.pl is designed to be compact, formally
1343    unambiguous, but still easy for humans to read.  It is largely
1344    self-explanatory, I hope; the one abbreviation that might not be
1345    obvious is "utags".  That stands for "universal tags" -- a
1346    universal tag is one held by all the files in a given change entry.
1347
1348    If you need output that's easy for a program to parse, use the
1349    --xml option.  Note that with XML output, just about all available
1350    information is included with each change entry, whether you asked
1351    for it or not, on the theory that your parser can ignore anything
1352    it's not looking for.
1353
1354 Notes about the options and arguments (the actual options are listed
1355 last in this usage message):
1356
1357   * The -I and -F options may appear multiple times.
1358
1359   * To follow trunk revisions, use "-F trunk" ("-F TRUNK" also works).
1360     This is okay because no would ever, ever be crazy enough to name a
1361     branch "trunk", right?  Right.
1362
1363   * For the -U option, the UFILE should be formatted like
1364     CVSROOT/-F trunk" ("-F TRUNK" also works).
1365     This is okay because no would ever, ever be crazy enough to name a
1366     branch "trunk", right?  Right.
1367
1368   * For the -U option, the UFILE should be formatted like
1369     CVSROOT/users. That is, each line of UFILE looks like this
1370        jrandom:jrandom@red-bean.com
1371     or maybe even like this
1372        jrandom:'Jesse Q. Random <jrandom@red-bean.com>'
1373     Don't forget to quote the portion after the colon if necessary.
1374
1375   * Many people want to filter by date.  To do so, invoke cvs2cl.pl
1376     like this:
1377        cvs2cl.pl -l "-d'DATESPEC'"
1378     where DATESPEC is any date specification valid for "cvs log -d".
1379     (Note that CVS 1.10.7 and below requires there be no space between
1380     -d and its argument).
1381
1382 Options/-F trunk" ("-F TRUNK" also works).
1383     This is okay because no would ever, ever be crazy enough to name a
1384     branch "trunk", right?  Right.
1385
1386   * For the -U option, the UFILE should be formatted like
1387     CVSROOT/users. That is, each line of UFILE looks like this
1388        jrandom:jrandom@red-bean.com
1389     or maybe even like this
1390        jrandom:'Jesse Q. Random <jrandom@red-bean.com>Arguments:
1391
1392   -h, -help, --help, or -?     Show this usage and exit
1393   --version                    Show version and exit
1394   -r, --revisions              Show revision numbers in output
1395   -b, --branches               Show branch names in revisions when possible
1396   -t, --tags                   Show tags (symbolic names) in output
1397   --stdin                      Read from stdin, don't run cvs log
1398   --stdout                     Output to stdout not to ChangeLog
1399   -d, --distributed            Put ChangeLogs in subdirs
1400   -f FILE, --file FILE         Write to FILE instead of "ChangeLog"
1401   -W SECS, --window SECS       Window of time within which log entries unify
1402   -U UFILE, --usermap UFILE    Expand usernames to email addresses from UFILE
1403   -R REGEXP, --regexp REGEXP   Include only entries that match REGEXP
1404   -I REGEXP, --ignore REGEXP   Ignore files whose names match REGEXP
1405   -C, --case-insensitive       Any regexp matching is done case-insensitively
1406   -F BRANCH, --follow BRANCH   Show only revisions on or ancestral to BRANCH
1407   -S, --separate-header        Blank line between each header and log message
1408   --no-wrap                    Don't auto-wrap log message (recommend -S also)
1409   --gmt, --utc                 Show times in GMT/-F trunk" ("-F TRUNK" also works).
1410     This is okay because no would ever, ever be crazy enough to name a
1411     branch "trunk", right?  Right.
1412
1413   * For the -U option, the UFILE should be formatted like
1414     CVSROOT/users. That is, each line of UFILE looks like this
1415        jrandom:jrandom@red-bean.com
1416     or maybe even like this
1417        jrandom:'Jesse Q. Random <jrandom@red-bean.com>UTC instead of local time
1418   -w, --day-of-week            Show day of week
1419   --header FILE                Get ChangeLog header from FILE ("-" means stdin)
1420   --xml                        Output XML instead of ChangeLog format
1421   -P, --prune                  Don't show empty log messages
1422   -g OPTS, --global-opts OPTS  Invoke like this "cvs OPTS log ..."
1423   -l OPTS, --log-opts OPTS     Invoke like this "cvs ... log OPTS"
1424   FILE1 [FILE2 ...]            Show only log information for the named FILE(s)
1425
1426 See http://-F trunk" ("-F TRUNK" also works).
1427     This is okay because no would ever, ever be crazy enough to name a
1428     branch "trunk", right?  Right.
1429
1430   * For the -U option, the UFILE should be formatted like
1431     CVSROOT/users. That is, each line of UFILE looks like this
1432        jrandom:jrandom@red-bean.com
1433     or maybe even like this
1434        jrandom:'Jesse Q. Random <jrandom@red-bean.com>UTC instead of local time
1435   -w, --day-of-week            Show day of week
1436   --header FILE                Get ChangeLog header from FILE ("-" means stdin)
1437   --xml                        Output XML instead of ChangeLog format
1438   -P, --prune                  Don't show empty log messages
1439   -g OPTS, --global-opts OPTS  Invoke like this "cvs OPTS log ..."
1440   -l OPTS, --log-opts OPTS     Invoke like this "cvs ... log OPTS"
1441   FILE1 [FILE2 ...]            Show only log information for the named FILE(s)
1442
1443 See http://www.red-bean.com/-F trunk" ("-F TRUNK" also works).
1444     This is okay because no would ever, ever be crazy enough to name a
1445     branch "trunk", right?  Right.
1446
1447   * For the -U option, the UFILE should be formatted like
1448     CVSROOT/users. That is, each line of UFILE looks like this
1449        jrandom:jrandom@red-bean.com
1450     or maybe even like this
1451        jrandom:'Jesse Q. Random <jrandom@red-bean.com>www.red-bean.com/~kfogel/1452 END_OF_INFO
1453 }
1454
1455 __END__
1456
1457 =head1 NAME
1458
1459 cvs2cl.pl - produces GNU-style ChangeLogs in CVS working copies, by
1460     running "cvs log" and parsing the output.  Shared log entries are
1461     unified in an intuitive way.
1462
1463 =head1 DESCRIPTION
1464
1465 This script generates GNU-style ChangeLog files from CVS log
1466 information.  Basic usage: just run it inside a working copy and a
1467 ChangeLog will appear.  It requires repository access (i.e., 'cvs log'
1468 must work).  Run "cvs2cl.pl --help" to see more advanced options.
1469
1470 See http:">cvs2cl.shtml for maintenance and bug info.
1471 END_OF_INFO
1472 }
1473
1474 __END__
1475
1476 =head1 NAME
1477
1478 cvs2cl.pl - produces GNU-style ChangeLogs in CVS working copies, by
1479     running "cvs log" and parsing the output.  Shared log entries are
1480     unified in an intuitive way.
1481
1482 =head1 DESCRIPTION
1483
1484 This script generates GNU-style ChangeLog files from CVS log
1485 information.  Basic usage: just run it inside a working copy and a
1486 ChangeLog will appear.  It requires repository access (i.e., 'cvs log'
1487 must work).  Run "cvs2cl.pl --help" to see more advanced options.
1488
1489 See http://www.red-bean.com/~kfogel/cvs2cl.shtml for updates, and
1490 for instructions on getting anonymous CVS access to this script.
1491
1492 Maintainer: Karl Fogel <kfogel@red-bean.com>
1493 Please report bugs to <cvs2cl-bugs@red-bean.com>.
1494
1495 =head1 README
1496
1497 This script generates GNU-style ChangeLog files from CVS log
1498 information.  Basic usage: just run it inside a working copy and a
1499 ChangeLog will appear.  It requires repository access (i.e., 'cvs log'
1500 must work).  Run "cvs2cl.pl --help" to see more advanced options.
1501
1502 See http://www.red-bean.com/~kfogel/cvs2cl.shtml for updates, and
1503 for instructions on getting anonymous CVS access to this script.
1504
1505 Maintainer: Karl Fogel <kfogel@red-bean.com>
1506 Please report bugs to <cvs2cl-bugs@red-bean.com>.
1507
1508 =head1 PREREQUISITES
1509
1510 This script requires C<Text::Wrap>, C<Time::Local>, and
1511 C<File::Basename>.
1512 It also seems to require C<Perl 5.004_04> or higher.
1513
1514 =pod OSNAMES
1515
1516 any
1517
1518 =pod SCRIPT CATEGORIES
1519
1520 Version_Control/CVS
1521
1522 =cut
1523
1524
1525 -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
1526
1527 Note about a bug-slash-opportunity:
1528 -----------------------------------
1529
1530 There's a bug in Text::Wrap, which affects cvs2cl.  This script
1531 reveals it:
1532
1533   #!/usr/bin/perl -w
1534
1535   use Text::Wrap;
1536
1537   my $test_text =
1538   "This script demonstrates a bug in Text::Wrap.  The very long line
1539   following this paragraph will be relocated relative to the surrounding
1540   text:
1541
1542   ====================================================================
1543
1544   See?  When the bug happens, we'll get the line of equal signs below
1545   this paragraph, even though it should be above.";
1546
1547
1548   # Print out the test text with no wrapping:
1549   print "$test_text";
1550   print "\n";
1551   print "\n";
1552
1553   # Now print it out wrapped, and see the bug:
1554   print wrap ("\t", "        ", "$test_text");
1555   print "\n";
1556   print "\n";
1557
1558 If the line of equal signs were one shorter, then the bug doesn't
1559 happen.  Interesting.
1560
1561 Anyway, rather than fix this in Text::Wrap, we might as well write a
1562 new wrap() which has the following much-needed features:
1563
1564 * initial indentation, like current Text::Wrap()
1565 * subsequent line indentation, like current Text::Wrap()
1566 * user chooses among: force-break long words, leave them alone, or die()?
1567 * preserve existing indentation: chopped chunks from an indented line
1568   are indented by same (like this line, not counting the asterisk!)
1569 * optional list of things to preserve on line starts, default ">"
1570
1571 Note that the last two are essentially the same concept, so unify in
1572 implementation and give a good interface to controlling them.
1573
1574 And how about:
1575
1576 Optionally, when encounter a line pre-indented by same as previous
1577 line, then strip the newline and refill, but indent by the same.
1578 Yeah...