Commits

Anonymous committed 58dd491 Merge

Merge branch 'mo/cvsserver'

* mo/cvsserver:
Documentation: Fix skipped section level
git-cvsserver: add ability to guess -kb from contents
implement gitcvs.usecrlfattr
git-cvsserver: add mechanism for managing working tree and current directory

  • Participants
  • Parent commits 6a491a1, b592d88

Comments (0)

Files changed (4)

Documentation/config.txt

 	Path to a log file where the CVS server interface well... logs
 	various stuff. See linkgit:git-cvsserver[1].
 
+gitcvs.usecrlfattr
+	If true, the server will look up the `crlf` attribute for
+	files to determine the '-k' modes to use. If `crlf` is set,
+	the '-k' mode will be left blank, so cvs clients will
+	treat it as text. If `crlf` is explicitly unset, the file
+	will be set with '-kb' mode, which supresses any newline munging
+	the client might otherwise do. If `crlf` is not specified,
+	then 'gitcvs.allbinary' is used. See linkgit:gitattribute[5].
+
 gitcvs.allbinary::
-	If true, all files are sent to the client in mode '-kb'. This
-	causes the client to treat all files as binary files which suppresses
-	any newline munging it otherwise might do. A work-around for the
-	fact that there is no way yet to set single files to mode '-kb'.
+	This is used if 'gitcvs.usecrlfattr' does not resolve
+	the correct '-kb' mode to use. If true, all
+	unresolved files are sent to the client in
+	mode '-kb'. This causes the client to treat them
+	as binary files, which suppresses any newline munging it
+	otherwise might do. Alternatively, if it is set to "guess",
+	then the contents of the file are examined to decide if
+	it is binary, similar to 'core.autocrlf'.
 
 gitcvs.dbname::
 	Database used by git-cvsserver to cache revision information
 	linkgit:git-cvsserver[1] for details).  Any non-alphabetic
 	characters will be replaced with underscores.
 
-All gitcvs variables except for 'gitcvs.allbinary' can also be
-specified as 'gitcvs.<access_method>.<varname>' (where 'access_method'
+All gitcvs variables except for 'gitcvs.usecrlfattr' and
+'gitcvs.allbinary' can also be specified as
+'gitcvs.<access_method>.<varname>' (where 'access_method'
 is one of "ext" and "pserver") to make them apply only for the given
 access method.
 

Documentation/git-cvsserver.txt

 Legacy monitoring operations are not supported (edit, watch and related).
 Exports and tagging (tags and branches) are not supported at this stage.
 
-The server should set the '-k' mode to binary when relevant, however,
-this is not really implemented yet. For now, you can force the server
-to set '-kb' for all files by setting the `gitcvs.allbinary` config
-variable. In proper GIT tradition, the contents of the files are
-always respected. No keyword expansion or newline munging is supported.
+CRLF Line Ending Conversions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By default the server leaves the '-k' mode blank for all files,
+which causes the cvs client to treat them as a text files, subject
+to crlf conversion on some platforms.
+
+You can make the server use `crlf` attributes to set the '-k' modes
+for files by setting the `gitcvs.usecrlfattr` config variable.
+In this case, if `crlf` is explicitly unset ('-crlf'), then the
+server will set '-kb' mode for binary files. If `crlf` is set,
+then the '-k' mode will explicitly be left blank.  See
+also linkgit:gitattributes[5] for more information about the `crlf`
+attribute.
+
+Alternatively, if `gitcvs.usecrlfattr` config is not enabled
+or if the `crlf` attribute is unspecified for a filename, then
+the server uses the `gitcvs.allbinary` config for the default setting.
+If `gitcvs.allbinary` is set, then file not otherwise
+specified will default to '-kb' mode. Otherwise the '-k' mode
+is left blank. But if `gitcvs.allbinary` is set to "guess", then
+the correct '-k' mode will be guessed based on the contents of
+the file.
+
+For best consistency with cvs, it is probably best to override the
+defaults by setting `gitcvs.usecrlfattr` to true,
+and `gitcvs.allbinary` to "guess".
 
 Dependencies
 ------------

git-cvsserver.perl

 
 use Fcntl;
 use File::Temp qw/tempdir tempfile/;
+use File::Path qw/rmtree/;
 use File::Basename;
 use Getopt::Long qw(:config require_order no_ignore_case);
 
 # $state holds all the bits of information the clients sends us that could
 # potentially be useful when it comes to actually _doing_ something.
 my $state = { prependdir => '' };
+
+# Work is for managing temporary working directory
+my $work =
+    {
+        state => undef,  # undef, 1 (empty), 2 (with stuff)
+        workDir => undef,
+        index => undef,
+        emptyDir => undef,
+        tmpDir => undef
+    };
+
 $log->info("--------------- STARTING -----------------");
 
 my $usage =
 $log->debug("Processing time : user=" . (times)[0] . " system=" . (times)[1]);
 $log->info("--------------- FINISH -----------------");
 
+chdir '/';
+exit 0;
+
 # Magic catchall method.
 #    This is the method that will handle all commands we haven't yet
 #    implemented. It simply sends a warning to the log file indicating a
                 print $state->{CVSROOT} . "/$state->{module}/$filename\n";
 
                 # this is an "entries" line
-                my $kopts = kopts_from_path($filepart);
+                my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
                 $log->debug("/$filepart/1.$meta->{revision}//$kopts/");
                 print "/$filepart/1.$meta->{revision}//$kopts/\n";
                 # permissions
 
         print "Checked-in $dirpart\n";
         print "$filename\n";
-        my $kopts = kopts_from_path($filepart);
+        my $kopts = kopts_from_path($filename,"file",
+                        $state->{entries}{$filename}{modified_filename});
         print "/$filepart/0//$kopts/\n";
 
+        my $requestedKopts = $state->{opt}{k};
+        if(defined($requestedKopts))
+        {
+            $requestedKopts = "-k$requestedKopts";
+        }
+        else
+        {
+            $requestedKopts = "";
+        }
+        if( $kopts ne $requestedKopts )
+        {
+            $log->warn("Ignoring requested -k='$requestedKopts'"
+                        . " for '$filename'; detected -k='$kopts' instead");
+            #TODO: Also have option to send warning to user?
+        }
+
         $addcount++;
     }
 
 
         print "Checked-in $dirpart\n";
         print "$filename\n";
-        my $kopts = kopts_from_path($filepart);
+        my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
         print "/$filepart/-1.$wrev//$kopts/\n";
 
         $rmcount++;
     argsplit("co");
 
     my $module = $state->{args}[0];
+    $state->{module} = $module;
     my $checkout_path = $module;
 
     # use the user specified directory if we're given it
         # Don't want to check out deleted files
         next if ( $git->{filehash} eq "deleted" );
 
+        my $fullName = $git->{name};
         ( $git->{name}, $git->{dir} ) = filenamesplit($git->{name});
 
        if (length($git->{dir}) && $git->{dir} ne './'
        print $state->{CVSROOT} . "/$module/" . ( defined ( $git->{dir} ) and $git->{dir} ne "./" ? $git->{dir} . "/" : "" ) . "$git->{name}\n";
 
         # this is an "entries" line
-        my $kopts = kopts_from_path($git->{name});
+        my $kopts = kopts_from_path($fullName,"sha1",$git->{filehash});
         print "/$git->{name}/1.$git->{revision}//$kopts/\n";
         # permissions
         print "u=$git->{mode},g=$git->{mode},o=$git->{mode}\n";
 		print $state->{CVSROOT} . "/$state->{module}/$filename\n";
 
 		# this is an "entries" line
-		my $kopts = kopts_from_path($filepart);
+		my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
 		$log->debug("/$filepart/1.$meta->{revision}//$kopts/");
 		print "/$filepart/1.$meta->{revision}//$kopts/\n";
 
             $log->info("Updating '$filename'");
             my ( $filepart, $dirpart ) = filenamesplit($meta->{name},1);
 
-            my $dir = tempdir( DIR => $TEMP_DIR, CLEANUP => 1 ) . "/";
+            my $mergeDir = setupTmpDir();
 
-            chdir $dir;
             my $file_local = $filepart . ".mine";
+            my $mergedFile = "$mergeDir/$file_local";
             system("ln","-s",$state->{entries}{$filename}{modified_filename}, $file_local);
             my $file_old = $filepart . "." . $oldmeta->{revision};
             transmitfile($oldmeta->{filehash}, { targetfile => $file_old });
             $log->info("Merging $file_local, $file_old, $file_new");
             print "M Merging differences between 1.$oldmeta->{revision} and 1.$meta->{revision} into $filename\n";
 
-            $log->debug("Temporary directory for merge is $dir");
+            $log->debug("Temporary directory for merge is $mergeDir");
 
             my $return = system("git", "merge-file", $file_local, $file_old, $file_new);
             $return >>= 8;
 
+            cleanupTmpDir();
+
             if ( $return == 0 )
             {
                 $log->info("Merged successfully");
                     print "Merged $dirpart\n";
                     $log->debug($state->{CVSROOT} . "/$state->{module}/$filename");
                     print $state->{CVSROOT} . "/$state->{module}/$filename\n";
-                    my $kopts = kopts_from_path($filepart);
+                    my $kopts = kopts_from_path("$dirpart/$filepart",
+                                                "file",$mergedFile);
                     $log->debug("/$filepart/1.$meta->{revision}//$kopts/");
                     print "/$filepart/1.$meta->{revision}//$kopts/\n";
                 }
                 {
                     print "Merged $dirpart\n";
                     print $state->{CVSROOT} . "/$state->{module}/$filename\n";
-                    my $kopts = kopts_from_path($filepart);
+                    my $kopts = kopts_from_path("$dirpart/$filepart",
+                                                "file",$mergedFile);
                     print "/$filepart/1.$meta->{revision}/+/$kopts/\n";
                 }
             }
                 # transmit file, format is single integer on a line by itself (file
                 # size) followed by the file contents
                 # TODO : we should copy files in blocks
-                my $data = `cat $file_local`;
+                my $data = `cat $mergedFile`;
                 $log->debug("File size : " . length($data));
                 print length($data) . "\n";
                 print $data;
             }
-
-            chdir "/";
         }
 
     }
     if ( $state->{method} eq 'pserver')
     {
         print "error 1 pserver access cannot commit\n";
+        cleanupWorkTree();
         exit;
     }
 
     {
         $log->warn("file 'index' already exists in the git repository");
         print "error 1 Index already exists in git repo\n";
+        cleanupWorkTree();
         exit;
     }
 
     my $updater = GITCVS::updater->new($state->{CVSROOT}, $state->{module}, $log);
     $updater->update();
 
-    my $tmpdir = tempdir ( DIR => $TEMP_DIR );
-    my ( undef, $file_index ) = tempfile ( DIR => $TEMP_DIR, OPEN => 0 );
-    $log->info("Lockless commit start, basing commit on '$tmpdir', index file is '$file_index'");
-
-    $ENV{GIT_DIR} = $state->{CVSROOT} . "/";
-    $ENV{GIT_WORK_TREE} = ".";
-    $ENV{GIT_INDEX_FILE} = $file_index;
-
     # Remember where the head was at the beginning.
     my $parenthash = `git show-ref -s refs/heads/$state->{module}`;
     chomp $parenthash;
     if ($parenthash !~ /^[0-9a-f]{40}$/) {
 	    print "error 1 pserver cannot find the current HEAD of module";
+	    cleanupWorkTree();
 	    exit;
     }
 
-    chdir $tmpdir;
+    setupWorkTree($parenthash);
 
-    # populate the temporary index
-    system("git-read-tree", $parenthash);
-    unless ($? == 0)
-    {
-	die "Error running git-read-tree $state->{module} $file_index $!";
-    }
-    $log->info("Created index '$file_index' for head $state->{module} - exit status $?");
+    $log->info("Lockless commit start, basing commit on '$work->{workDir}', index file is '$work->{index}'");
+
+    $log->info("Created index '$work->{index}' for head $state->{module} - exit status $?");
 
     my @committedfiles = ();
     my %oldmeta;
         {
             # fail everything if an up to date check fails
             print "error 1 Up to date check failed for $filename\n";
-            chdir "/";
+            cleanupWorkTree();
             exit;
         }
 
     {
         print "E No files to commit\n";
         print "ok\n";
-        chdir "/";
+        cleanupWorkTree();
         return;
     }
 
     {
         $log->warn("Commit failed (Invalid commit hash)");
         print "error 1 Commit failed (unknown reason)\n";
-        chdir "/";
+        cleanupWorkTree();
         exit;
     }
 
 		{
 			$log->warn("Commit failed (update hook declined to update ref)");
 			print "error 1 Commit failed (update hook declined)\n";
-			chdir "/";
+			cleanupWorkTree();
 			exit;
 		}
 	}
 			"refs/heads/$state->{module}", $commithash, $parenthash)) {
 		$log->warn("update-ref for $state->{module} failed.");
 		print "error 1 Cannot commit -- update first\n";
+		cleanupWorkTree();
 		exit;
 	}
 
             }
             print "Checked-in $dirpart\n";
             print "$filename\n";
-            my $kopts = kopts_from_path($filepart);
+            my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});
             print "/$filepart/1.$meta->{revision}//$kopts/\n";
         }
     }
 
-    chdir "/";
+    cleanupWorkTree();
     print "ok\n";
 }
 
     argsfromdir($updater);
 
     # we'll need a temporary checkout dir
-    my $tmpdir = tempdir ( DIR => $TEMP_DIR );
-    my ( undef, $file_index ) = tempfile ( DIR => $TEMP_DIR, OPEN => 0 );
-    $log->info("Temp checkoutdir creation successful, basing annotate session work on '$tmpdir', index file is '$file_index'");
-
-    $ENV{GIT_DIR} = $state->{CVSROOT} . "/";
-    $ENV{GIT_WORK_TREE} = ".";
-    $ENV{GIT_INDEX_FILE} = $file_index;
+    setupWorkTree();
 
-    chdir $tmpdir;
+    $log->info("Temp checkoutdir creation successful, basing annotate session work on '$work->{workDir}', index file is '$ENV{GIT_INDEX_FILE}'");
 
     # foreach file specified on the command line ...
     foreach my $filename ( @{$state->{args}} )
 	system("git-read-tree", $lastseenin);
 	unless ($? == 0)
 	{
-	    print "E error running git-read-tree $lastseenin $file_index $!\n";
+	    print "E error running git-read-tree $lastseenin $ENV{GIT_INDEX_FILE} $!\n";
 	    return;
 	}
-	$log->info("Created index '$file_index' with commit $lastseenin - exit status $?");
+	$log->info("Created index '$ENV{GIT_INDEX_FILE}' with commit $lastseenin - exit status $?");
 
         # do a checkout of the file
         system('git-checkout-index', '-f', '-u', $filename);
         # git-jsannotate telling us about commits we are hiding
         # from the client.
 
-        my $a_hints = "$tmpdir/.annotate_hints";
+        my $a_hints = "$work->{workDir}/.annotate_hints";
         if (!open(ANNOTATEHINTS, '>', $a_hints)) {
             print "E failed to open '$a_hints' for writing: $!\n";
             return;
     }
 
     # done; get out of the tempdir
-    chdir "/";
+    cleanupWorkDir();
 
     print "ok\n";
 
     return $filename;
 }
 
+sub validateGitDir
+{
+    if( !defined($state->{CVSROOT}) )
+    {
+        print "error 1 CVSROOT not specified\n";
+        cleanupWorkTree();
+        exit;
+    }
+    if( $ENV{GIT_DIR} ne ($state->{CVSROOT} . '/') )
+    {
+        print "error 1 Internally inconsistent CVSROOT\n";
+        cleanupWorkTree();
+        exit;
+    }
+}
+
+# Setup working directory in a work tree with the requested version
+# loaded in the index.
+sub setupWorkTree
+{
+    my ($ver) = @_;
+
+    validateGitDir();
+
+    if( ( defined($work->{state}) && $work->{state} != 1 ) ||
+        defined($work->{tmpDir}) )
+    {
+        $log->warn("Bad work tree state management");
+        print "error 1 Internal setup multiple work trees without cleanup\n";
+        cleanupWorkTree();
+        exit;
+    }
+
+    $work->{workDir} = tempdir ( DIR => $TEMP_DIR );
+
+    if( !defined($work->{index}) )
+    {
+        (undef, $work->{index}) = tempfile ( DIR => $TEMP_DIR, OPEN => 0 );
+    }
+
+    chdir $work->{workDir} or
+        die "Unable to chdir to $work->{workDir}\n";
+
+    $log->info("Setting up GIT_WORK_TREE as '.' in '$work->{workDir}', index file is '$work->{index}'");
+
+    $ENV{GIT_WORK_TREE} = ".";
+    $ENV{GIT_INDEX_FILE} = $work->{index};
+    $work->{state} = 2;
+
+    if($ver)
+    {
+        system("git","read-tree",$ver);
+        unless ($? == 0)
+        {
+            $log->warn("Error running git-read-tree");
+            die "Error running git-read-tree $ver in $work->{workDir} $!\n";
+        }
+    }
+    # else # req_annotate reads tree for each file
+}
+
+# Ensure current directory is in some kind of working directory,
+# with a recent version loaded in the index.
+sub ensureWorkTree
+{
+    if( defined($work->{tmpDir}) )
+    {
+        $log->warn("Bad work tree state management [ensureWorkTree()]");
+        print "error 1 Internal setup multiple dirs without cleanup\n";
+        cleanupWorkTree();
+        exit;
+    }
+    if( $work->{state} )
+    {
+        return;
+    }
+
+    validateGitDir();
+
+    if( !defined($work->{emptyDir}) )
+    {
+        $work->{emptyDir} = tempdir ( DIR => $TEMP_DIR, OPEN => 0);
+    }
+    chdir $work->{emptyDir} or
+        die "Unable to chdir to $work->{emptyDir}\n";
+
+    my $ver = `git show-ref -s refs/heads/$state->{module}`;
+    chomp $ver;
+    if ($ver !~ /^[0-9a-f]{40}$/)
+    {
+        $log->warn("Error from git show-ref -s refs/head$state->{module}");
+        print "error 1 cannot find the current HEAD of module";
+        cleanupWorkTree();
+        exit;
+    }
+
+    if( !defined($work->{index}) )
+    {
+        (undef, $work->{index}) = tempfile ( DIR => $TEMP_DIR, OPEN => 0 );
+    }
+
+    $ENV{GIT_WORK_TREE} = ".";
+    $ENV{GIT_INDEX_FILE} = $work->{index};
+    $work->{state} = 1;
+
+    system("git","read-tree",$ver);
+    unless ($? == 0)
+    {
+        die "Error running git-read-tree $ver $!\n";
+    }
+}
+
+# Cleanup working directory that is not needed any longer.
+sub cleanupWorkTree
+{
+    if( ! $work->{state} )
+    {
+        return;
+    }
+
+    chdir "/" or die "Unable to chdir '/'\n";
+
+    if( defined($work->{workDir}) )
+    {
+        rmtree( $work->{workDir} );
+        undef $work->{workDir};
+    }
+    undef $work->{state};
+}
+
+# Setup a temporary directory (not a working tree), typically for
+# merging dirty state as in req_update.
+sub setupTmpDir
+{
+    $work->{tmpDir} = tempdir ( DIR => $TEMP_DIR );
+    chdir $work->{tmpDir} or die "Unable to chdir $work->{tmpDir}\n";
+
+    return $work->{tmpDir};
+}
+
+# Clean up a previously setupTmpDir.  Restore previous work tree if
+# appropriate.
+sub cleanupTmpDir
+{
+    if ( !defined($work->{tmpDir}) )
+    {
+        $log->warn("cleanup tmpdir that has not been setup");
+        die "Cleanup tmpDir that has not been setup\n";
+    }
+    if( defined($work->{state}) )
+    {
+        if( $work->{state} == 1 )
+        {
+            chdir $work->{emptyDir} or
+                die "Unable to chdir to $work->{emptyDir}\n";
+        }
+        elsif( $work->{state} == 2 )
+        {
+            chdir $work->{workDir} or
+                die "Unable to chdir to $work->{emptyDir}\n";
+        }
+        else
+        {
+            $log->warn("Inconsistent work dir state");
+            die "Inconsistent work dir state\n";
+        }
+    }
+    else
+    {
+        chdir "/" or die "Unable to chdir '/'\n";
+    }
+}
+
 # Given a path, this function returns a string containing the kopts
 # that should go into that path's Entries line.  For example, a binary
 # file should get -kb.
 sub kopts_from_path
 {
-	my ($path) = @_;
+    my ($path, $srcType, $name) = @_;
 
-	# Once it exists, the git attributes system should be used to look up
-	# what attributes apply to this path.
+    if ( defined ( $cfg->{gitcvs}{usecrlfattr} ) and
+         $cfg->{gitcvs}{usecrlfattr} =~ /\s*(1|true|yes)\s*$/i )
+    {
+        my ($val) = check_attr( "crlf", $path );
+        if ( $val eq "set" )
+        {
+            return "";
+        }
+        elsif ( $val eq "unset" )
+        {
+            return "-kb"
+        }
+        else
+        {
+            $log->info("Unrecognized check_attr crlf $path : $val");
+        }
+    }
 
-	# Until then, take the setting from the config file
-    unless ( defined ( $cfg->{gitcvs}{allbinary} ) and $cfg->{gitcvs}{allbinary} =~ /^\s*(1|true|yes)\s*$/i )
+    if ( defined ( $cfg->{gitcvs}{allbinary} ) )
     {
-		# Return "" to give no special treatment to any path
-		return "";
-    } else {
-		# Alternatively, to have all files treated as if they are binary (which
-		# is more like git itself), always return the "-kb" option
-		return "-kb";
+        if( ($cfg->{gitcvs}{allbinary} =~ /^\s*(1|true|yes)\s*$/i) )
+        {
+            return "-kb";
+        }
+        elsif( ($cfg->{gitcvs}{allbinary} =~ /^\s*guess\s*$/i) )
+        {
+            if( $srcType eq "sha1Or-k" &&
+                !defined($name) )
+            {
+                my ($ret)=$state->{entries}{$path}{options};
+                if( !defined($ret) )
+                {
+                    $ret=$state->{opt}{k};
+                    if(defined($ret))
+                    {
+                        $ret="-k$ret";
+                    }
+                    else
+                    {
+                        $ret="";
+                    }
+                }
+                if( ! ($ret=~/^(|-kb|-kkv|-kkvl|-kk|-ko|-kv)$/) )
+                {
+                    print "E Bad -k option\n";
+                    $log->warn("Bad -k option: $ret");
+                    die "Error: Bad -k option: $ret\n";
+                }
+
+                return $ret;
+            }
+            else
+            {
+                if( is_binary($srcType,$name) )
+                {
+                    $log->debug("... as binary");
+                    return "-kb";
+                }
+                else
+                {
+                    $log->debug("... as text");
+                }
+            }
+        }
+    }
+    # Return "" to give no special treatment to any path
+    return "";
+}
+
+sub check_attr
+{
+    my ($attr,$path) = @_;
+    ensureWorkTree();
+    if ( open my $fh, '-|', "git", "check-attr", $attr, "--", $path )
+    {
+        my $val = <$fh>;
+        close $fh;
+        $val =~ s/.*: ([^:\r\n]*)\s*$/$1/;
+        return $val;
+    }
+    else
+    {
+        return undef;
+    }
+}
+
+# This should have the same heuristics as convert.c:is_binary() and related.
+# Note that the bare CR test is done by callers in convert.c.
+sub is_binary
+{
+    my ($srcType,$name) = @_;
+    $log->debug("is_binary($srcType,$name)");
+
+    # Minimize amount of interpreted code run in the inner per-character
+    # loop for large files, by totalling each character value and
+    # then analyzing the totals.
+    my @counts;
+    my $i;
+    for($i=0;$i<256;$i++)
+    {
+        $counts[$i]=0;
+    }
+
+    my $fh = open_blob_or_die($srcType,$name);
+    my $line;
+    while( defined($line=<$fh>) )
+    {
+        # Any '\0' and bare CR are considered binary.
+        if( $line =~ /\0|(\r[^\n])/ )
+        {
+            close($fh);
+            return 1;
+        }
+
+        # Count up each character in the line:
+        my $len=length($line);
+        for($i=0;$i<$len;$i++)
+        {
+            $counts[ord(substr($line,$i,1))]++;
+        }
+    }
+    close $fh;
+
+    # Don't count CR and LF as either printable/nonprintable
+    $counts[ord("\n")]=0;
+    $counts[ord("\r")]=0;
+
+    # Categorize individual character count into printable and nonprintable:
+    my $printable=0;
+    my $nonprintable=0;
+    for($i=0;$i<256;$i++)
+    {
+        if( $i < 32 &&
+            $i != ord("\b") &&
+            $i != ord("\t") &&
+            $i != 033 &&       # ESC
+            $i != 014 )        # FF
+        {
+            $nonprintable+=$counts[$i];
+        }
+        elsif( $i==127 )  # DEL
+        {
+            $nonprintable+=$counts[$i];
+        }
+        else
+        {
+            $printable+=$counts[$i];
+        }
+    }
+
+    return ($printable >> 7) < $nonprintable;
+}
+
+# Returns open file handle.  Possible invocations:
+#  - open_blob_or_die("file",$filename);
+#  - open_blob_or_die("sha1",$filehash);
+sub open_blob_or_die
+{
+    my ($srcType,$name) = @_;
+    my ($fh);
+    if( $srcType eq "file" )
+    {
+        if( !open $fh,"<",$name )
+        {
+            $log->warn("Unable to open file $name: $!");
+            die "Unable to open file $name: $!\n";
+        }
+    }
+    elsif( $srcType eq "sha1" || $srcType eq "sha1Or-k" )
+    {
+        unless ( defined ( $name ) and $name =~ /^[a-zA-Z0-9]{40}$/ )
+        {
+            $log->warn("Need filehash");
+            die "Need filehash\n";
+        }
+
+        my $type = `git cat-file -t $name`;
+        chomp $type;
+
+        unless ( defined ( $type ) and $type eq "blob" )
+        {
+            $log->warn("Invalid type '$type' for '$name'");
+            die ( "Invalid type '$type' (expected 'blob')" )
+        }
+
+        my $size = `git cat-file -s $name`;
+        chomp $size;
+
+        $log->debug("open_blob_or_die($name) size=$size, type=$type");
+
+        unless( open $fh, '-|', "git", "cat-file", "blob", $name )
+        {
+            $log->warn("Unable to open sha1 $name");
+            die "Unable to open sha1 $name\n";
+        }
+    }
+    else
+    {
+        $log->warn("Unknown type of blob source: $srcType");
+        die "Unknown type of blob source: $srcType\n";
     }
+    return $fh;
 }
 
 # Generate a CVS author name from Git author information, by taking

t/t9401-git-cvsserver-crlf.sh

+#!/bin/sh
+#
+# Copyright (c) 2008 Matthew Ogilvie
+# Parts adapted from other tests.
+#
+
+test_description='git-cvsserver -kb modes
+
+tests -kb mode for binary files when accessing a git
+repository using cvs CLI client via git-cvsserver server'
+
+. ./test-lib.sh
+
+q_to_nul () {
+    perl -pe 'y/Q/\000/'
+}
+
+q_to_cr () {
+    tr Q '\015'
+}
+
+marked_as () {
+    foundEntry="$(grep "^/$2/" "$1/CVS/Entries")"
+    if [ x"$foundEntry" = x"" ] ; then
+       echo "NOT FOUND: $1 $2 1 $3" >> "${WORKDIR}/marked.log"
+       return 1
+    fi
+    test x"$(grep "^/$2/" "$1/CVS/Entries" | cut -d/ -f5)" = x"$3"
+    stat=$?
+    echo "$1 $2 $stat '$3'" >> "${WORKDIR}/marked.log"
+    return $stat
+}
+
+not_present() {
+    foundEntry="$(grep "^/$2/" "$1/CVS/Entries")"
+    if [ -r "$1/$2" ] ; then
+        echo "Error: File still exists: $1 $2" >> "${WORKDIR}/marked.log"
+        return 1;
+    fi
+    if [ x"$foundEntry" != x"" ] ; then
+        echo "Error: should not have found: $1 $2" >> "${WORKDIR}/marked.log"
+        return 1;
+    else
+        echo "Correctly not found: $1 $2" >> "${WORKDIR}/marked.log"
+        return 0;
+    fi
+}
+
+cvs >/dev/null 2>&1
+if test $? -ne 1
+then
+    test_expect_success 'skipping git-cvsserver tests, cvs not found' :
+    test_done
+    exit
+fi
+perl -e 'use DBI; use DBD::SQLite' >/dev/null 2>&1 || {
+    test_expect_success 'skipping git-cvsserver tests, Perl SQLite interface unavailable' :
+    test_done
+    exit
+}
+
+unset GIT_DIR GIT_CONFIG
+WORKDIR=$(pwd)
+SERVERDIR=$(pwd)/gitcvs.git
+git_config="$SERVERDIR/config"
+CVSROOT=":fork:$SERVERDIR"
+CVSWORK="$(pwd)/cvswork"
+CVS_SERVER=git-cvsserver
+export CVSROOT CVS_SERVER
+
+rm -rf "$CVSWORK" "$SERVERDIR"
+test_expect_success 'setup' '
+    echo "Simple text file" >textfile.c &&
+    echo "File with embedded NUL: Q <- there" | q_to_nul > binfile.bin &&
+    mkdir subdir &&
+    echo "Another text file" > subdir/file.h &&
+    echo "Another binary: Q (this time CR)" | q_to_cr > subdir/withCr.bin &&
+    echo "Mixed up NUL, but marked text: Q <- there" | q_to_nul > mixedUp.c
+    echo "Unspecified" > subdir/unspecified.other &&
+    echo "/*.bin -crlf" > .gitattributes &&
+    echo "/*.c crlf" >> .gitattributes &&
+    echo "subdir/*.bin -crlf" >> .gitattributes &&
+    echo "subdir/*.c crlf" >> .gitattributes &&
+    echo "subdir/file.h crlf" >> .gitattributes &&
+    git add .gitattributes textfile.c binfile.bin mixedUp.c subdir/* &&
+    git commit -q -m "First Commit" &&
+    git clone -q --local --bare "$WORKDIR/.git" "$SERVERDIR" >/dev/null 2>&1 &&
+    GIT_DIR="$SERVERDIR" git config --bool gitcvs.enabled true &&
+    GIT_DIR="$SERVERDIR" git config gitcvs.logfile "$SERVERDIR/gitcvs.log"
+'
+
+test_expect_success 'cvs co (default crlf)' '
+    GIT_CONFIG="$git_config" cvs -Q co -d cvswork master >cvs.log 2>&1 &&
+    test x"$(grep '/-k' cvswork/CVS/Entries cvswork/subdir/CVS/Entries)" = x""
+'
+
+rm -rf cvswork
+test_expect_success 'cvs co (allbinary)' '
+    GIT_DIR="$SERVERDIR" git config --bool gitcvs.allbinary true &&
+    GIT_CONFIG="$git_config" cvs -Q co -d cvswork master >cvs.log 2>&1 &&
+    marked_as cvswork textfile.c -kb &&
+    marked_as cvswork binfile.bin -kb &&
+    marked_as cvswork .gitattributes -kb &&
+    marked_as cvswork mixedUp.c -kb &&
+    marked_as cvswork/subdir withCr.bin -kb &&
+    marked_as cvswork/subdir file.h -kb &&
+    marked_as cvswork/subdir unspecified.other -kb
+'
+
+rm -rf cvswork cvs.log
+test_expect_success 'cvs co (use attributes/allbinary)' '
+    GIT_DIR="$SERVERDIR" git config --bool gitcvs.usecrlfattr true &&
+    GIT_CONFIG="$git_config" cvs -Q co -d cvswork master >cvs.log 2>&1 &&
+    marked_as cvswork textfile.c "" &&
+    marked_as cvswork binfile.bin -kb &&
+    marked_as cvswork .gitattributes -kb &&
+    marked_as cvswork mixedUp.c "" &&
+    marked_as cvswork/subdir withCr.bin -kb &&
+    marked_as cvswork/subdir file.h "" &&
+    marked_as cvswork/subdir unspecified.other -kb
+'
+
+rm -rf cvswork
+test_expect_success 'cvs co (use attributes)' '
+    GIT_DIR="$SERVERDIR" git config --bool gitcvs.allbinary false &&
+    GIT_CONFIG="$git_config" cvs -Q co -d cvswork master >cvs.log 2>&1 &&
+    marked_as cvswork textfile.c "" &&
+    marked_as cvswork binfile.bin -kb &&
+    marked_as cvswork .gitattributes "" &&
+    marked_as cvswork mixedUp.c "" &&
+    marked_as cvswork/subdir withCr.bin -kb &&
+    marked_as cvswork/subdir file.h "" &&
+    marked_as cvswork/subdir unspecified.other ""
+'
+
+test_expect_success 'adding files' '
+    cd cvswork/subdir &&
+    echo "more text" > src.c &&
+    GIT_CONFIG="$git_config" cvs -Q add src.c >cvs.log 2>&1 &&
+    marked_as . src.c "" &&
+    echo "psuedo-binary" > temp.bin &&
+    cd .. &&
+    GIT_CONFIG="$git_config" cvs -Q add subdir/temp.bin >cvs.log 2>&1 &&
+    marked_as subdir temp.bin "-kb" &&
+    cd subdir &&
+    GIT_CONFIG="$git_config" cvs -Q ci -m "adding files" >cvs.log 2>&1 &&
+    marked_as . temp.bin "-kb" &&
+    marked_as . src.c ""
+'
+
+cd "$WORKDIR"
+test_expect_success 'updating' '
+    git pull gitcvs.git &&
+    echo 'hi' > subdir/newfile.bin &&
+    echo 'junk' > subdir/file.h &&
+    echo 'hi' > subdir/newfile.c &&
+    echo 'hello' >> binfile.bin &&
+    git add subdir/newfile.bin subdir/file.h subdir/newfile.c binfile.bin &&
+    git commit -q -m "Add and change some files" &&
+    git push gitcvs.git >/dev/null &&
+    cd cvswork &&
+    GIT_CONFIG="$git_config" cvs -Q update &&
+    cd .. &&
+    marked_as cvswork textfile.c "" &&
+    marked_as cvswork binfile.bin -kb &&
+    marked_as cvswork .gitattributes "" &&
+    marked_as cvswork mixedUp.c "" &&
+    marked_as cvswork/subdir withCr.bin -kb &&
+    marked_as cvswork/subdir file.h "" &&
+    marked_as cvswork/subdir unspecified.other "" &&
+    marked_as cvswork/subdir newfile.bin -kb &&
+    marked_as cvswork/subdir newfile.c "" &&
+    echo "File with embedded NUL: Q <- there" | q_to_nul > tmpExpect1 &&
+    echo "hello" >> tmpExpect1 &&
+    cmp cvswork/binfile.bin tmpExpect1
+'
+
+rm -rf cvswork
+test_expect_success 'cvs co (use attributes/guess)' '
+    GIT_DIR="$SERVERDIR" git config gitcvs.allbinary guess &&
+    GIT_CONFIG="$git_config" cvs -Q co -d cvswork master >cvs.log 2>&1 &&
+    marked_as cvswork textfile.c "" &&
+    marked_as cvswork binfile.bin -kb &&
+    marked_as cvswork .gitattributes "" &&
+    marked_as cvswork mixedUp.c "" &&
+    marked_as cvswork/subdir withCr.bin -kb &&
+    marked_as cvswork/subdir file.h "" &&
+    marked_as cvswork/subdir unspecified.other "" &&
+    marked_as cvswork/subdir newfile.bin -kb &&
+    marked_as cvswork/subdir newfile.c ""
+'
+
+test_expect_success 'setup multi-line files' '
+    ( echo "line 1" &&
+      echo "line 2" &&
+      echo "line 3" &&
+      echo "line 4 with NUL: Q <-" ) | q_to_nul > multiline.c &&
+    git add multiline.c &&
+    ( echo "line 1" &&
+      echo "line 2" &&
+      echo "line 3" &&
+      echo "line 4" ) | q_to_nul > multilineTxt.c &&
+    git add multilineTxt.c &&
+    git commit -q -m "multiline files" &&
+    git push gitcvs.git >/dev/null
+'
+
+rm -rf cvswork
+test_expect_success 'cvs co (guess)' '
+    GIT_DIR="$SERVERDIR" git config --bool gitcvs.usecrlfattr false &&
+    GIT_CONFIG="$git_config" cvs -Q co -d cvswork master >cvs.log 2>&1 &&
+    marked_as cvswork textfile.c "" &&
+    marked_as cvswork binfile.bin -kb &&
+    marked_as cvswork .gitattributes "" &&
+    marked_as cvswork mixedUp.c -kb &&
+    marked_as cvswork multiline.c -kb &&
+    marked_as cvswork multilineTxt.c "" &&
+    marked_as cvswork/subdir withCr.bin -kb &&
+    marked_as cvswork/subdir file.h "" &&
+    marked_as cvswork/subdir unspecified.other "" &&
+    marked_as cvswork/subdir newfile.bin "" &&
+    marked_as cvswork/subdir newfile.c ""
+'
+
+test_expect_success 'cvs co another copy (guess)' '
+    GIT_CONFIG="$git_config" cvs -Q co -d cvswork2 master >cvs.log 2>&1 &&
+    marked_as cvswork2 textfile.c "" &&
+    marked_as cvswork2 binfile.bin -kb &&
+    marked_as cvswork2 .gitattributes "" &&
+    marked_as cvswork2 mixedUp.c -kb &&
+    marked_as cvswork2 multiline.c -kb &&
+    marked_as cvswork2 multilineTxt.c "" &&
+    marked_as cvswork2/subdir withCr.bin -kb &&
+    marked_as cvswork2/subdir file.h "" &&
+    marked_as cvswork2/subdir unspecified.other "" &&
+    marked_as cvswork2/subdir newfile.bin "" &&
+    marked_as cvswork2/subdir newfile.c ""
+'
+
+test_expect_success 'add text (guess)' '
+    cd cvswork &&
+    echo "simpleText" > simpleText.c &&
+    GIT_CONFIG="$git_config" cvs -Q add simpleText.c &&
+    cd .. &&
+    marked_as cvswork simpleText.c ""
+'
+
+test_expect_success 'add bin (guess)' '
+    cd cvswork &&
+    echo "simpleBin: NUL: Q <- there" | q_to_nul > simpleBin.bin &&
+    GIT_CONFIG="$git_config" cvs -Q add simpleBin.bin &&
+    cd .. &&
+    marked_as cvswork simpleBin.bin -kb
+'
+
+test_expect_success 'remove files (guess)' '
+    cd cvswork &&
+    GIT_CONFIG="$git_config" cvs -Q rm -f subdir/file.h &&
+    cd subdir &&
+    GIT_CONFIG="$git_config" cvs -Q rm -f withCr.bin &&
+    cd ../.. &&
+    marked_as cvswork/subdir withCr.bin -kb &&
+    marked_as cvswork/subdir file.h ""
+'
+
+test_expect_success 'cvs ci (guess)' '
+    cd cvswork &&
+    GIT_CONFIG="$git_config" cvs -Q ci -m "add/rm files" >cvs.log 2>&1 &&
+    cd .. &&
+    marked_as cvswork textfile.c "" &&
+    marked_as cvswork binfile.bin -kb &&
+    marked_as cvswork .gitattributes "" &&
+    marked_as cvswork mixedUp.c -kb &&
+    marked_as cvswork multiline.c -kb &&
+    marked_as cvswork multilineTxt.c "" &&
+    not_present cvswork/subdir withCr.bin &&
+    not_present cvswork/subdir file.h &&
+    marked_as cvswork/subdir unspecified.other "" &&
+    marked_as cvswork/subdir newfile.bin "" &&
+    marked_as cvswork/subdir newfile.c "" &&
+    marked_as cvswork simpleBin.bin -kb &&
+    marked_as cvswork simpleText.c ""
+'
+
+test_expect_success 'update subdir of other copy (guess)' '
+    cd cvswork2/subdir &&
+    GIT_CONFIG="$git_config" cvs -Q update &&
+    cd ../.. &&
+    marked_as cvswork2 textfile.c "" &&
+    marked_as cvswork2 binfile.bin -kb &&
+    marked_as cvswork2 .gitattributes "" &&
+    marked_as cvswork2 mixedUp.c -kb &&
+    marked_as cvswork2 multiline.c -kb &&
+    marked_as cvswork2 multilineTxt.c "" &&
+    not_present cvswork2/subdir withCr.bin &&
+    not_present cvswork2/subdir file.h &&
+    marked_as cvswork2/subdir unspecified.other "" &&
+    marked_as cvswork2/subdir newfile.bin "" &&
+    marked_as cvswork2/subdir newfile.c "" &&
+    not_present cvswork2 simpleBin.bin &&
+    not_present cvswork2 simpleText.c
+'
+
+echo "starting update/merge" >> "${WORKDIR}/marked.log"
+test_expect_success 'update/merge full other copy (guess)' '
+    git pull gitcvs.git master &&
+    sed "s/3/replaced_3/" < multilineTxt.c > ml.temp &&
+    mv ml.temp multilineTxt.c &&
+    git add multilineTxt.c &&
+    git commit -q -m "modify multiline file" >> "${WORKDIR}/marked.log" &&
+    git push gitcvs.git >/dev/null &&
+    cd cvswork2 &&
+    sed "s/1/replaced_1/" < multilineTxt.c > ml.temp &&
+    mv ml.temp multilineTxt.c &&
+    GIT_CONFIG="$git_config" cvs update > cvs.log 2>&1 &&
+    cd .. &&
+    marked_as cvswork2 textfile.c "" &&
+    marked_as cvswork2 binfile.bin -kb &&
+    marked_as cvswork2 .gitattributes "" &&
+    marked_as cvswork2 mixedUp.c -kb &&
+    marked_as cvswork2 multiline.c -kb &&
+    marked_as cvswork2 multilineTxt.c "" &&
+    not_present cvswork2/subdir withCr.bin &&
+    not_present cvswork2/subdir file.h &&
+    marked_as cvswork2/subdir unspecified.other "" &&
+    marked_as cvswork2/subdir newfile.bin "" &&
+    marked_as cvswork2/subdir newfile.c "" &&
+    marked_as cvswork2 simpleBin.bin -kb &&
+    marked_as cvswork2 simpleText.c "" &&
+    echo "line replaced_1" > tmpExpect2 &&
+    echo "line 2" >> tmpExpect2 &&
+    echo "line replaced_3" >> tmpExpect2 &&
+    echo "line 4" | q_to_nul >> tmpExpect2 &&
+    cmp cvswork2/multilineTxt.c tmpExpect2
+'
+
+test_done