Commits

Anonymous committed 331eb92 Merge

Merge branch 'mm/mediawiki-usability'

* mm/mediawiki-usability:
git-remote-mediawiki: allow page names with a ':'
git-remote-mediawiki: fix incorrect test usage in test
git-remote-mediawiki: properly deal with invalid remote revisions
git-remote-mediawiki: show progress information when getting last remote revision
git-remote-mediawiki: show progress information when listing pages
git-remote-mediawiki: use --force when adding notes
git-remote-mediawiki: get rid of O(N^2) loop
git-remote-mediawiki: make mediafiles export optional
git-remote-mediawiki: actually send empty comment when they're empty
git-remote-mediawiki: don't split namespaces with spaces

Comments (0)

Files changed (3)

contrib/mw-to-git/git-remote-mediawiki

 # used to reflect file creation or deletion in diff.
 use constant NULL_SHA1 => "0000000000000000000000000000000000000000";
 
+# Used on Git's side to reflect empty edit messages on the wiki
+use constant EMPTY_MESSAGE => '*Empty MediaWiki Message*';
+
 my $remotename = $ARGV[0];
 my $url = $ARGV[1];
 
 my @tracked_categories = split(/[ \n]/, run_git("config --get-all remote.". $remotename .".categories"));
 chomp(@tracked_categories);
 
-# Import media files too.
+# Import media files on pull
 my $import_media = run_git("config --get --bool remote.". $remotename .".mediaimport");
 chomp($import_media);
 $import_media = ($import_media eq "true");
 
+# Export media files on push
+my $export_media = run_git("config --get --bool remote.". $remotename .".mediaexport");
+chomp($export_media);
+$export_media = !($export_media eq "false");
+
 my $wiki_login = run_git("config --get remote.". $remotename .".mwLogin");
 # Note: mwPassword is discourraged. Use the credential system instead.
 my $wiki_passwd = run_git("config --get remote.". $remotename .".mwPassword");
 sub get_mw_pages {
 	mw_connect_maybe();
 
+	print STDERR "Listing pages on remote wiki...\n";
+
 	my %pages; # hash on page titles to avoid duplicates
 	my $user_defined;
 	if (@tracked_pages) {
 			get_all_mediafiles(\%pages);
 		}
 	}
+	print STDERR (scalar keys %pages) . " pages found.\n";
 	return %pages;
 }
 
 
 	my $max_rev_num = 0;
 
+	print STDERR "Getting last revision id on tracked pages...\n";
+
 	foreach my $page (@pages) {
 		my $id = $page->{pageid};
 
 	my $last_timestamp = 0; # Placeholer in case $rev->timestamp is undefined
 
 	foreach my $pagerevid (@$revision_ids) {
+	        # Count page even if we skip it, since we display
+		# $n/$total and $total includes skipped pages.
+		$n++;
+
 		# fetch the content of the pages
 		my $query = {
 			action => 'query',
 			die "Failed to retrieve modified page for revision $pagerevid";
 		}
 
+		if (defined($result->{query}->{badrevids}->{$pagerevid})) {
+			# The revision id does not exist on the remote wiki.
+			next;
+		}
+
 		if (!defined($result->{query}->{pages})) {
 			die "Invalid revision $pagerevid.";
 		}
 		my $result_page = $result_pages[0];
 		my $rev = $result_pages[0]->{revisions}->[0];
 
-	        # Count page even if we skip it, since we display
-		# $n/$total and $total includes skipped pages.
-		$n++;
-
 		my $page_title = $result_page->{title};
 
 		if (!exists($pages->{$page_title})) {
 
 		my %commit;
 		$commit{author} = $rev->{user} || 'Anonymous';
-		$commit{comment} = $rev->{comment} || '*Empty MediaWiki Message*';
+		$commit{comment} = $rev->{comment} || EMPTY_MESSAGE;
 		$commit{title} = mediawiki_smudge_filename($page_title);
 		$commit{mw_revision} = $rev->{revid};
 		$commit{content} = mediawiki_smudge($rev->{'*'});
 		# Differentiates classic pages and media files.
 		my ($namespace, $filename) = $page_title =~ /^([^:]*):(.*)$/;
 		my %mediafile;
-		if ($namespace && get_mw_namespace_id($namespace) == get_mw_namespace_id("File")) {
-			%mediafile = get_mw_mediafile_for_page_revision($filename, $rev->{timestamp});
+		if ($namespace) {
+			my $id = get_mw_namespace_id($namespace);
+			if ($id && $id == get_mw_namespace_id("File")) {
+				%mediafile = get_mw_mediafile_for_page_revision($filename, $rev->{timestamp});
+			}
 		}
 		# If this is a revision of the media page for new version
 		# of a file do one common commit for both file and media page.
 	my $oldrevid = shift;
 	my $newrevid;
 
+	if ($summary eq EMPTY_MESSAGE) {
+		$summary = '';
+	}
+
 	my $new_sha1 = $diff_info_split[3];
 	my $old_sha1 = $diff_info_split[2];
 	my $page_created = ($old_sha1 eq NULL_SHA1);
 		$extension = "";
 	}
 	if ($extension eq "mw") {
+		my $ns = get_mw_namespace_id_for_page($complete_file_name);
+		if ($ns && $ns == get_mw_namespace_id("File") && (!$export_media)) {
+			print STDERR "Ignoring media file related page: $complete_file_name\n";
+			return ($oldrevid, "ok");
+		}
 		my $file_content;
 		if ($page_deleted) {
 			# Deleting a page usually requires
 		}
 		$newrevid = $result->{edit}->{newrevid};
 		print STDERR "Pushed file: $new_sha1 - $title\n";
-	} else {
+	} elsif ($export_media) {
 		$newrevid = mw_upload_file($complete_file_name, $new_sha1,
 					   $extension, $page_deleted,
 					   $summary);
+	} else {
+		print STDERR "Ignoring media file $title\n";
 	}
 	$newrevid = ($newrevid or $oldrevid);
 	return ($newrevid, "ok");
 	if ($last_local_revid > 0) {
 		my $parsed_sha1 = $remoteorigin_sha1;
 		# Find a path from last MediaWiki commit to pushed commit
+		print STDERR "Computing path from local to remote ...\n";
+		my @local_ancestry = split(/\n/, run_git("rev-list --boundary --parents $local ^$parsed_sha1"));
+		my %local_ancestry;
+		foreach my $line (@local_ancestry) {
+			if (my ($child, $parents) = $line =~ m/^-?([a-f0-9]+) ([a-f0-9 ]+)/) {
+				foreach my $parent (split(' ', $parents)) {
+					$local_ancestry{$parent} = $child;
+				}
+			} elsif (!$line =~ m/^([a-f0-9]+)/) {
+				die "Unexpected output from git rev-list: $line";
+			}
+		}
 		while ($parsed_sha1 ne $HEAD_sha1) {
-			my @commit_info =  grep(/^$parsed_sha1/, split(/\n/, run_git("rev-list --children $local")));
-			if (!@commit_info) {
+			my $child = $local_ancestry{$parsed_sha1};
+			if (!$child) {
+				printf STDERR "Cannot find a path in history from remote commit to last commit\n";
 				return error_non_fast_forward($remote);
 			}
-			my @commit_info_split = split(/ |\n/, $commit_info[0]);
-			# $commit_info_split[1] is the sha1 of the commit to export
-			# $commit_info_split[0] is the sha1 of its direct child
-			push(@commit_pairs, \@commit_info_split);
-			$parsed_sha1 = $commit_info_split[1];
+			push(@commit_pairs, [$parsed_sha1, $child]);
+			$parsed_sha1 = $child;
 		}
 	} else {
 		# No remote mediawiki revision. Export the whole
 			}
 		}
 		unless ($dumb_push) {
-			run_git("notes --ref=$remotename/mediawiki add -m \"mediawiki_revision: $mw_revision\" $sha1_commit");
+			run_git("notes --ref=$remotename/mediawiki add -f -m \"mediawiki_revision: $mw_revision\" $sha1_commit");
 			run_git("update-ref -m \"Git-MediaWiki push\" refs/mediawiki/$remotename/master $sha1_commit $sha1_child");
 		}
 	}
 		# Look at configuration file, if the record for that namespace is
 		# already cached. Namespaces are stored in form:
 		# "Name_of_namespace:Id_namespace", ex.: "File:6".
-		my @temp = split(/[ \n]/, run_git("config --get-all remote."
+		my @temp = split(/[\n]/, run_git("config --get-all remote."
 						. $remotename .".namespaceCache"));
 		chomp(@temp);
 		foreach my $ns (@temp) {
 			my ($n, $id) = split(/:/, $ns);
-			$namespace_id{$n} = $id;
+			if ($id eq 'notANameSpace') {
+				$namespace_id{$n} = {is_namespace => 0};
+			} else {
+				$namespace_id{$n} = {is_namespace => 1, id => $id};
+			}
 			$cached_mw_namespace_id{$n} = 1;
 		}
 	}
 
 	        while (my ($id, $ns) = each(%{$result->{query}->{namespaces}})) {
 	                if (defined($ns->{id}) && defined($ns->{canonical})) {
-				$namespace_id{$ns->{canonical}} = $ns->{id};
+				$namespace_id{$ns->{canonical}} = {is_namespace => 1, id => $ns->{id}};
 				if ($ns->{'*'}) {
 					# alias (e.g. french Fichier: as alias for canonical File:)
-					$namespace_id{$ns->{'*'}} = $ns->{id};
+					$namespace_id{$ns->{'*'}} = {is_namespace => 1, id => $ns->{id}};
 				}
 			}
 	        }
 	}
 
-	my $id = $namespace_id{$name};
+	my $ns = $namespace_id{$name};
+	my $id;
 
-	if (defined $id) {
-		# Store explicitely requested namespaces on disk
-		if (!exists $cached_mw_namespace_id{$name}) {
-			run_git("config --add remote.". $remotename
-				.".namespaceCache \"". $name .":". $id ."\"");
-			$cached_mw_namespace_id{$name} = 1;
-		}
-		return $id;
+	unless (defined $ns) {
+		print STDERR "No such namespace $name on MediaWiki.\n";
+		$ns = {is_namespace => 0};
+		$namespace_id{$name} = $ns;
+	}
+
+	if ($ns->{is_namespace}) {
+		$id = $ns->{id};
+	}
+
+	# Store "notANameSpace" as special value for inexisting namespaces
+	my $store_id = ($id || 'notANameSpace');
+
+	# Store explicitely requested namespaces on disk
+	if (!exists $cached_mw_namespace_id{$name}) {
+		run_git("config --add remote.". $remotename
+			.".namespaceCache \"". $name .":". $store_id ."\"");
+		$cached_mw_namespace_id{$name} = 1;
+	}
+	return $id;
+}
+
+sub get_mw_namespace_id_for_page {
+	if (my ($namespace) = $_[0] =~ /^([^:]*):/) {
+		return get_mw_namespace_id($namespace);
 	} else {
-		die "No such namespace $name on MediaWiki.";
+		return;
 	}
 }

contrib/mw-to-git/t/push-pull-tests.sh

 			git push
 		) &&
 
-		test ! wiki_page_exist Foo
+		test_must_fail wiki_page_exist Foo
 	'
 
 	test_expect_success 'Merge conflict expected and solving it' '

contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh

 	test_path_is_file mw_dir_11/[char_2
 '
 
+test_expect_success 'Pull page with title containing ":" other than namespace separator' '
+	wiki_editpage Foo:Bar content false &&
+	(
+		cd mw_dir_11 &&
+		git pull
+	) &&
+	test_path_is_file mw_dir_11/Foo:Bar.mw
+'
+
+test_expect_success 'Push page with title containing ":" other than namespace separator' '
+	(
+		cd mw_dir_11 &&
+		echo content >NotANameSpace:Page.mw &&
+		git add NotANameSpace:Page.mw &&
+		git commit -m "add page with colon" &&
+		git push
+	) &&
+	wiki_page_exist NotANameSpace:Page
+'
+
 test_expect_success 'test of correct formating for file name from mw to git' '
 	wiki_reset &&
 	git clone mediawiki::'"$WIKI_URL"' mw_dir_12 &&