Commits

Junio C Hamano  committed 2f31068 Merge with conflicts

Merge branch 'jc/maint-name-rev' into pu

"git name-rev" names the given revision based on a ref that can be
reached in the smallest number of steps from the rev, but that is
not useful when the caller wants to know which tag is the oldest one
that contains the rev. This teaches a new mode to the command that
uses the oldest ref among those which contain the rev.

I am not sure if this is worth it; for one thing, even with the help
from notes-cache, it seems to make the "describe --contains" even
slower. Also the command will be unusably slow for a user who does
not have a write access (hence unable to create or update the
notes-cache).

Stalled mostly due to lack of responses.

* jc/maint-name-rev:
describe --contains: use "name-rev --algorithm=weight"
name-rev --algorithm=weight: tests and documentation
name-rev --algorithm=weight: cache the computed weight in notes
name-rev --algorithm=weight: trivial optimization
name-rev: --algorithm option
name_rev: clarify the logic to assign a new tip-name to a commit
name-rev: lose unnecessary typedef

Conflicts:
builtin/name-rev.c

  • Participants
  • Parent commits b161376, ce34cc7
  • Branches pu

Comments (0)

Files changed (4)

File Documentation/git-name-rev.txt

 SYNOPSIS
 --------
 [verse]
-'git name-rev' [--tags] [--refs=<pattern>]
+'git name-rev' [--tags] [--refs=<pattern>] [--algorithm=[default|weight]]
 	       ( --all | --stdin | <committish>... )
 
 DESCRIPTION
 -----------
-Finds symbolic names suitable for human digestion for revisions given in any
-format parsable by 'git rev-parse'.
+Finds symbolic names suitable for human digestion for revisions
+given in any format parsable by 'git rev-parse'.
 
 
 OPTIONS
 --always::
 	Show uniquely abbreviated commit object as fallback.
 
+--algorithm=default::
+	Name commits based on the ref that is reachable with the
+	smallest number of hops.  This is reasonably fast, but can
+	give surprisingly irrelevant results when you merged a side
+	branch that is based on an ancient codebase and with very
+	small number of commits on it.
+
+--algorithm=weight::
+	Name commits based on the oldest ref that contains it.  This
+	is way more expensive than the default behaviour of the
+	command, in which commits are named based on the ref that
+	is topologically the closest, but gives more intuitive
+	answer to the question: what is the oldest tag that contains
+	this commit?
+
+
 EXAMPLE
 -------
 

File builtin/describe.c

 		die(_("--long is incompatible with --abbrev=0"));
 
 	if (contains) {
-		const char **args = xmalloc((7 + argc) * sizeof(char *));
+		const char **args = xmalloc((8 + argc) * sizeof(char *));
 		int i = 0;
 		args[i++] = "name-rev";
 		args[i++] = "--name-only";
 		args[i++] = "--no-undefined";
+		args[i++] = "--algorithm=weight";
 		if (always)
 			args[i++] = "--always";
 		if (!all) {

File builtin/name-rev.c

 #include "tag.h"
 #include "refs.h"
 #include "parse-options.h"
+#include "diff.h"
+#include "revision.h"
+#include "notes-cache.h"
 
 #define CUTOFF_DATE_SLOP 86400 /* one day */
 
-typedef struct rev_name {
+struct rev_name {
 	const char *tip_name;
 	int generation;
 	int distance;
-} rev_name;
+	int weight;
+};
+
+/*
+ * Historically, "name-rev" named a rev based on the tip that is
+ * topologically closest to it.
+ *
+ * It does not give a good answer to "what is the earliest tag that
+ * contains the commit?", however, because you can build a new commit
+ * on top of an ancient commit X, merge it to the tip and tag the
+ * result, which would make X reachable from the new tag in two hops,
+ * even though it appears in the part of the history that is contained
+ * in other ancient tags.
+ *
+ * In order to answer that question, "name-rev" can be told to use
+ * NAME_WEIGHT algorithm to pick the tip with the smallest number of
+ * commits behind it.
+ */
+#define NAME_DEFAULT 0
+#define NAME_WEIGHT 1
+
+static int evaluate_algo = NAME_DEFAULT;
+
+static int parse_algorithm(const char *algo)
+{
+	if (!algo || !strcmp(algo, "default"))
+		return NAME_DEFAULT;
+	if (!strcmp(algo, "weight"))
+		return NAME_WEIGHT;
+	die("--algorithm can take 'weight' or 'default'");
+}
+
+/* To optimize revision traversal */
+static struct commit *painted_commit;
+
+static int compute_ref_weight(struct commit *commit)
+{
+	struct rev_info revs;
+	int weight = 1; /* give root the weight of 1 */
+
+	reset_revision_walk();
+	init_revisions(&revs, NULL);
+	add_pending_object(&revs, (struct object *)commit, NULL);
+	prepare_revision_walk(&revs);
+	while (get_revision(&revs))
+		weight++;
+	painted_commit = commit;
+	return weight;
+}
+
+static struct notes_cache weight_cache;
+static int weight_cache_updated;
+
+static int get_ref_weight(struct commit *commit)
+{
+	struct strbuf buf = STRBUF_INIT;
+	size_t sz;
+	int weight;
+	char *note;
+
+	note = notes_cache_get(&weight_cache, commit->object.sha1, &sz);
+	if (note && !strtol_i(note, 10, &weight)) {
+		free(note);
+		return weight;
+	}
+	free(note);
+
+	weight = compute_ref_weight(commit);
+	strbuf_addf(&buf, "%d", weight);
+	notes_cache_put(&weight_cache, commit->object.sha1,
+			buf.buf, buf.len);
+	strbuf_release(&buf);
+	weight_cache_updated = 1;
+	return weight;
+}
+
+static struct commit *ref_commit(const char *refname, size_t reflen)
+{
+	struct strbuf buf = STRBUF_INIT;
+	unsigned char sha1[20];
+	struct commit *commit;
+
+	strbuf_add(&buf, refname, reflen);
+	if (get_sha1(buf.buf, sha1))
+		die("Internal error: cannot parse tip '%s'", buf.buf);
+	strbuf_release(&buf);
+
+	commit = lookup_commit_reference_gently(sha1, 0);
+	if (!commit)
+		die("Internal error: cannot look up commit '%s'", buf.buf);
+	return commit;
+}
+
+static int ref_weight(struct commit *commit, const char *refname, size_t reflen)
+{
+	struct rev_name *name;
+
+	name = commit->util;
+	if (!name)
+		die("Internal error: a tip without name '%.*s'", (int) reflen, refname);
+	if (!name->weight)
+		name->weight = get_ref_weight(commit);
+	return name->weight;
+}
+
+static int tip_weight_cmp(const char *a, const char *b)
+{
+	size_t reflen_a, reflen_b;
+	struct commit *commit_a, *commit_b;
+	static const char traversal[] = "^~";
+
+	/*
+	 * A "tip" may look like <refname> followed by traversal
+	 * instruction (e.g. ^2~74).  We only are interested in
+	 * the weight of the ref part.
+	 */
+	reflen_a = strcspn(a, traversal);
+	reflen_b = strcspn(b, traversal);
+
+	if (reflen_a == reflen_b && !memcmp(a, b, reflen_a))
+		return 0;
+
+	commit_a = ref_commit(a, reflen_a);
+	commit_b = ref_commit(b, reflen_b);
+
+	/* Have we painted either one of these recently? */
+	if (commit_a == painted_commit &&
+	    (commit_b->object.flags & SHOWN)) {
+		/*
+		 * We know b can be reached from a, so b must be older
+		 * (lighter, as it has fewer commits behind it) than
+		 * a.
+		 */
+		return 1;
+	} else if (commit_b == painted_commit &&
+		   (commit_a->object.flags & SHOWN)) {
+		/* Likewise */
+		return -1;
+	}
+
+	return ref_weight(commit_a, a, reflen_a) - ref_weight(commit_b, b, reflen_b);
+}
 
 static long cutoff = LONG_MAX;
 
 #define MERGE_TRAVERSAL_WEIGHT 65535
 
 static void name_rev(struct commit *commit,
-		const char *tip_name, int generation, int distance,
-		int deref)
+		     const char *tip_name, int generation, int distance,
+		     int deref)
 {
 	struct rev_name *name = (struct rev_name *)commit->util;
 	struct commit_list *parents;
-	int parent_number = 1;
+	int parent_number;
+	int use_this_tip = 0;
 
 	if (!commit->object.parsed)
 		parse_commit(commit);
 			die("generation: %d, but deref?", generation);
 	}
 
-	if (name == NULL) {
-		name = xmalloc(sizeof(rev_name));
+	if (!name) {
+		name = xcalloc(1, sizeof(struct rev_name));
 		commit->util = name;
-		goto copy_data;
-	} else if (name->distance > distance) {
-copy_data:
-		name->tip_name = tip_name;
-		name->generation = generation;
-		name->distance = distance;
-	} else
+		use_this_tip = 1;
+	}
+
+	switch (evaluate_algo) {
+	default:
+		if (distance < name->distance)
+			use_this_tip = 1;
+		break;
+	case NAME_WEIGHT:
+		if (!name->tip_name)
+			use_this_tip = 1;
+		else {
+			/*
+			 * Pick a name based on the ref that is older,
+			 * i.e. having smaller number of commits
+			 * behind it.  Break the tie by picking the
+			 * path with smaller numer of steps to reach
+			 * that ref from the commit.
+			 */
+			int cmp = tip_weight_cmp(name->tip_name, tip_name);
+			if (0 < cmp)
+				use_this_tip = 1;
+			else if (!cmp && distance < name->distance)
+				use_this_tip = 1;
+		}
+		break;
+	}
+
+	if (!use_this_tip)
 		return;
 
-	for (parents = commit->parents;
-			parents;
-			parents = parents->next, parent_number++) {
+	name->tip_name = tip_name;
+	name->generation = generation;
+	name->distance = distance;
+
+	/* Propagate our name to our parents */
+	for (parents = commit->parents, parent_number = 1;
+	     parents;
+	     parents = parents->next, parent_number++) {
 		if (parent_number > 1) {
 			int len = strlen(tip_name);
 			char *new_name = xmalloc(len +
 				len -= 2;
 			if (generation > 0)
 				sprintf(new_name, "%.*s~%d^%d", len, tip_name,
-						generation, parent_number);
+					generation, parent_number);
 			else
 				sprintf(new_name, "%.*s^%d", len, tip_name,
-						parent_number);
-
+					parent_number);
 			name_rev(parents->item, new_name, 0,
-				distance + MERGE_TRAVERSAL_WEIGHT, 0);
+				 distance + MERGE_TRAVERSAL_WEIGHT, 0);
 		} else {
 			name_rev(parents->item, tip_name, generation + 1,
-				distance + 1, 0);
+				 distance + 1, 0);
 		}
 	}
 }
 		fwrite(p_start, p - p_start, 1, stdout);
 }
 
+static const char *get_validity_token(void)
+{
+	/*
+	 * In future versions, we may want to automatically invalidate
+	 * the cached weight data whenever grafts and replacement
+	 * changes.  We could do so by (1) reading the contents of the
+	 * grafts file, (2) enumerating the replacement data (original
+	 * object name and replacement object name) and sorting the
+	 * result, and (3) concatenating (1) and (2) and hashing it,
+	 * to come up with "dynamic validity: [0-9a-f]{40}" or something.
+	 *
+	 * In this verison, we simply do not bother ;-).
+	 */
+	return "static validity token";
+}
+
 int cmd_name_rev(int argc, const char **argv, const char *prefix)
 {
 	struct object_array revs = OBJECT_ARRAY_INIT;
 	int all = 0, transform_stdin = 0, allow_undefined = 1, always = 0;
 	struct name_ref_data data = { 0, 0, NULL };
+	char *eval_algo = NULL;
 	struct option opts[] = {
 		OPT_BOOLEAN(0, "name-only", &data.name_only, N_("print only names (no SHA-1)")),
 		OPT_BOOLEAN(0, "tags", &data.tags_only, N_("only use tags to name the commits")),
 		OPT_BOOLEAN(0, "undefined", &allow_undefined, N_("allow to print `undefined` names")),
 		OPT_BOOLEAN(0, "always",     &always,
 			   N_("show abbreviated commit object as fallback")),
+		OPT_STRING(0, "algorithm", &eval_algo, "algorithm",
+			   N_("algorithm to choose which tips to use to name")),
 		OPT_END(),
 	};
 
 	}
 	if (all || transform_stdin)
 		cutoff = 0;
+	evaluate_algo = parse_algorithm(eval_algo);
+
+	if (evaluate_algo == NAME_WEIGHT)
+		notes_cache_init(&weight_cache, "name-rev-weight",
+				 get_validity_token());
 
 	for (; argc; argc--, argv++) {
 		unsigned char sha1[20];
 				  always, allow_undefined, data.name_only);
 	}
 
+	if (weight_cache_updated)
+		notes_cache_write(&weight_cache);
+
 	return 0;
 }

File t/t6039-name-rev.sh

+#!/bin/sh
+
+test_description='name-rev'
+. ./test-lib.sh
+
+# Prepare a history with this shape
+#
+# ---0--1--2--3--4--Y--5---7---Z
+#     \   /               /
+#      \ /               /
+#       X---------------6
+#
+
+test_expect_success setup '
+	test_tick &&
+	git commit --allow-empty -m 0 &&
+	git branch side &&
+	git commit --allow-empty -m 1 &&
+	git checkout side &&
+	git commit --allow-empty -m X &&
+	git branch X &&
+	git commit --allow-empty -m 6 &&
+	git checkout master &&
+	git merge -m 2 X &&
+	git commit --allow-empty -m 3 &&
+	git commit --allow-empty -m 4 &&
+	git commit --allow-empty -m Y &&
+	git tag Y &&
+	git commit --allow-empty -m 5 &&
+	git merge -m 7 side &&
+	git commit --allow-empty -m Z &&
+	git tag Z
+'
+
+test_expect_success 'name-rev (plain)' '
+	# We expect "X tags/Z~1^2~1" but it could
+	# be written as "X tags/Z^^2^"; the only two
+	# important things that matter are that it
+	# is named after Z (not Y), and it correctly
+	# names X.
+	git name-rev --tags X >actual &&
+	read X T <actual &&
+	test "z$X" = zX &&
+	expr "$T" : 'tags/Z[~^]' &&
+	H1=$(git rev-parse --verify "$T") &&
+	H2=$(git rev-parse --verify X) &&
+	test "z$H1" = "z$H2"
+'
+
+test_expect_success 'name-rev --algorithm=weight' '
+	# Likewise; "X tags/Y~3^2" but we only care
+	# that it is based on Y.
+	git name-rev --algorithm=weight --tags X >actual &&
+	read X T <actual &&
+	test "z$X" = zX &&
+	expr "$T" : 'tags/Y[~^]' &&
+	H1=$(git rev-parse --verify "$T") &&
+	H2=$(git rev-parse --verify X) &&
+	test "z$H1" = "z$H2"
+'
+
+test_done