Commits

Junio C Hamano  committed 026680f Merge with conflicts

Merge branch 'jc/fix-tree-walk'

* jc/fix-tree-walk:
read-tree --debug-unpack
unpack-trees.c: look ahead in the index
unpack-trees.c: prepare for looking ahead in the index
Aggressive three-way merge: fix D/F case
traverse_trees(): handle D/F conflict case sanely
more D/F conflict tests
tests: move convenience regexp to match object names to test-lib.sh

Conflicts:
builtin-read-tree.c
unpack-trees.c
unpack-trees.h

  • Participants
  • Parent commits eca9388, ba655da

Comments (0)

Files changed (19)

File builtin-read-tree.c

 	return 0;
 }
 
+static void debug_stage(const char *label, struct cache_entry *ce,
+			struct unpack_trees_options *o)
+{
+	printf("%s ", label);
+	if (!ce)
+		printf("(missing)\n");
+	else if (ce == o->df_conflict_entry)
+		printf("(conflict)\n");
+	else
+		printf("%06o #%d %s %.8s\n",
+		       ce->ce_mode, ce_stage(ce), ce->name,
+		       sha1_to_hex(ce->sha1));
+}
+
+static int debug_merge(struct cache_entry **stages, struct unpack_trees_options *o)
+{
+	int i;
+
+	printf("* %d-way merge\n", o->merge_size);
+	debug_stage("index", stages[0], o);
+	for (i = 1; i <= o->merge_size; i++) {
+		char buf[24];
+		sprintf(buf, "ent#%d", i);
+		debug_stage(buf, stages[i], o);
+	}
+	return 0;
+}
+
 static struct lock_file lock_file;
 
 int cmd_read_tree(int argc, const char **argv, const char *unused_prefix)
 			    "don't check the working tree after merging", 1),
 		OPT_SET_INT(0, "no-sparse-checkout", &opts.skip_sparse_checkout,
 			    "skip applying sparse checkout filter", 1),
+		OPT_SET_INT(0, "debug-unpack", &opts.debug_unpack,
+			    "debug unpack-trees", 1),
 		OPT_END()
 	};
 
 			opts.head_idx = 1;
 	}
 
+	if (opts.debug_unpack)
+		opts.fn = debug_merge;
+
 	cache_tree_free(&active_cache_tree);
 	for (i = 0; i < nr_trees; i++) {
 		struct tree *tree = trees[i];
 	if (unpack_trees(nr_trees, t, &opts))
 		return 128;
 
+	if (opts.debug_unpack)
+		return 0; /* do not write the index out */
+
 	/*
 	 * When reading only one tree (either the most basic form,
 	 * "-m ent" or "--reset ent" form), we can obtain a fully
 /* Only remove in work directory, not index */
 #define CE_WT_REMOVE (0x400000)
 
+#define CE_UNPACKED  (0x1000000)
+
 /*
  * Extended on-disk flags
  */
 	show_modified(revs, tree, idx, 1, cached, match_missing);
 }
 
-static inline void skip_same_name(struct cache_entry *ce, struct unpack_trees_options *o)
-{
-	int len = ce_namelen(ce);
-	const struct index_state *index = o->src_index;
-
-	while (o->pos < index->cache_nr) {
-		struct cache_entry *next = index->cache[o->pos];
-		if (len != ce_namelen(next))
-			break;
-		if (memcmp(ce->name, next->name, len))
-			break;
-		o->pos++;
-	}
-}
-
 /*
  * The unpack_trees() interface is designed for merging, so
  * the different source entries are designed primarily for
 	struct cache_entry *tree = src[1];
 	struct rev_info *revs = o->unpack_data;
 
-	if (idx && ce_stage(idx))
-		skip_same_name(idx, o);
-
 	/*
 	 * Unpack-trees generates a DF/conflict entry if
 	 * there was a directory in the index and a tree
 		exit(128);
 
 	diff_set_mnemonic_prefix(&revs->diffopt, "c/", cached ? "i/" : "w/");
+	diffcore_fix_diff_index(&revs->diffopt);
 	diffcore_std(&revs->diffopt);
 	diff_flush(&revs->diffopt);
 	return 0;
 	*q = outq;
 }
 
+static int diffnamecmp(const void *a_, const void *b_)
+{
+	const struct diff_filepair *a = *((const struct diff_filepair **)a_);
+	const struct diff_filepair *b = *((const struct diff_filepair **)b_);
+	const char *name_a, *name_b;
+
+	name_a = a->one ? a->one->path : a->two->path;
+	name_b = b->one ? b->one->path : b->two->path;
+	return strcmp(name_a, name_b);
+}
+
+void diffcore_fix_diff_index(struct diff_options *options)
+{
+	struct diff_queue_struct *q = &diff_queued_diff;
+	qsort(q->queue, q->nr, sizeof(q->queue[0]), diffnamecmp);
+}
+
 void diffcore_std(struct diff_options *options)
 {
 	if (options->skip_stat_unmatch)
 #define DIFF_PICKAXE_REGEX	2
 
 extern void diffcore_std(struct diff_options *);
+extern void diffcore_fix_diff_index(struct diff_options *);
 
 #define COMMON_DIFF_OPTIONS_HELP \
 "\ncommon diff options:\n" \

File t/diff-lib.sh

 :
 
-_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
-_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
 sanitize_diff_raw='/^:/s/ '"$_x40"' '"$_x40"' \([A-Z]\)[0-9]*	/ X X \1#	/'
 compare_diff_raw () {
     # When heuristics are improved, the score numbers would change.

File t/t1000-read-tree-m-3way.sh

 100644 X 0	Z/NN
 EOF
 
-_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
-_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
-
 check_result () {
     git ls-files --stage | sed -e 's/ '"$_x40"' / X /' >current &&
     test_cmp expected current

File t/t1001-read-tree-m-2way.sh

     git read-tree -m "$1" "$2" && git ls-files --stage
 }
 
-_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
-_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
 compare_change () {
 	sed -n >current \
 	    -e '/^--- /d; /^+++ /d; /^@@ /d;' \

File t/t1002-read-tree-m-u-2way.sh

 '
 . ./test-lib.sh
 
-_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
-_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
 compare_change () {
 	sed >current \
 	    -e '1{/^diff --git /d;}' \

File t/t1012-read-tree-df.sh

+#!/bin/sh
+
+test_description='read-tree D/F conflict corner cases'
+
+. ./test-lib.sh
+
+maketree () {
+	(
+		rm -f .git/index .git/index.lock &&
+		git clean -d -f -f -q -x &&
+		name="$1" &&
+		shift &&
+		for it
+		do
+			path=$(expr "$it" : '\([^:]*\)') &&
+			mkdir -p $(dirname "$path") &&
+			echo "$it" >"$path" &&
+			git update-index --add "$path" || exit
+		done &&
+		git tag "$name" $(git write-tree)
+	)
+}
+
+settree () {
+	rm -f .git/index .git/index.lock &&
+	git clean -d -f -f -q -x &&
+	git read-tree "$1" &&
+	git checkout-index -f -q -u -a &&
+	git update-index --refresh
+}
+
+checkindex () {
+	git ls-files -s |
+	sed "s|^[0-7][0-7]* $_x40 \([0-3]\)	|\1 |" >current &&
+	cat >expect &&
+	test_cmp expect current
+}
+
+test_expect_success setup '
+	maketree O-000 a/b-2/c/d a/b/c/d a/x &&
+	maketree A-000 a/b-2/c/d a/b/c/d a/x &&
+	maketree A-001 a/b-2/c/d a/b/c/d a/b/c/e a/x &&
+	maketree B-000 a/b-2/c/d a/b     a/x &&
+
+	maketree O-010 t-0     t/1  t/2 t=3 &&
+	maketree A-010 t-0 t            t=3 &&
+	maketree B-010         t/1:     t=3: &&
+
+	maketree O-020 ds/dma/ioat.c ds/dma/ioat_dca.c &&
+	maketree A-020 ds/dma/ioat/Makefile ds/dma/ioat/registers.h &&
+	:
+'
+
+test_expect_success '3-way (1)' '
+	settree A-000 &&
+	git read-tree -m -u O-000 A-000 B-000 &&
+	checkindex <<-EOF
+	3 a/b
+	0 a/b-2/c/d
+	1 a/b/c/d
+	2 a/b/c/d
+	0 a/x
+	EOF
+'
+
+test_expect_success '3-way (2)' '
+	settree A-001 &&
+	git read-tree -m -u O-000 A-001 B-000 &&
+	checkindex <<-EOF
+	3 a/b
+	0 a/b-2/c/d
+	1 a/b/c/d
+	2 a/b/c/d
+	2 a/b/c/e
+	0 a/x
+	EOF
+'
+
+test_expect_success '3-way (3)' '
+	settree A-010 &&
+	git read-tree -m -u O-010 A-010 B-010 &&
+	checkindex <<-EOF
+	2 t
+	1 t-0
+	2 t-0
+	1 t/1
+	3 t/1
+	1 t/2
+	0 t=3
+	EOF
+'
+
+test_expect_success '2-way (1)' '
+	settree O-020 &&
+	git read-tree -m -u O-020 A-020 &&
+	checkindex <<-EOF
+	0 ds/dma/ioat/Makefile
+	0 ds/dma/ioat/registers.h
+	EOF
+'
+
+test_done

File t/t3100-ls-tree-restrict.sh

      tree=`git write-tree` &&
      echo $tree'
 
-_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
-_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
 test_output () {
     sed -e "s/ $_x40	/ X	/" <current >check
     test_cmp expected check

File t/t3101-ls-tree-dirname.sh

      tree=`git write-tree` &&
      echo $tree'
 
-_x05='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
-_x40="$_x05$_x05$_x05$_x05$_x05$_x05$_x05$_x05"
 test_output () {
     sed -e "s/ $_x40	/ X	/" <current >check
     test_cmp expected check

File t/t4006-diff-mode.sh

     'test_chmod +x rezrov &&
      git diff-index $tree >current'
 
-_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
-_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
 sed -e 's/\(:100644 100755\) \('"$_x40"'\) \2 /\1 X X /' <current >check
 echo ":100644 100755 X X M	rezrov" >expected
 

File t/t6012-rev-list-simplify.sh

 	git tag "$1"
 }
 
-_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
-_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
-
 unnote () {
 	git name-rev --tags --stdin | sed -e "s|$_x40 (tags/\([^)]*\)) |\1 |g"
 }

File t/t6035-merge-dir-to-symlink.sh

 	git tag baseline
 '
 
-test_expect_success 'do not lose a/b-2/c/d in merge (resolve)' '
+test_expect_failure 'do not lose a/b-2/c/d in merge (resolve)' '
 	git reset --hard &&
 	git checkout baseline^0 &&
 	git merge -s resolve master &&
 	git tag test2
 '
 
-test_expect_failure 'merge should not have conflicts (resolve)' '
+test_expect_success 'merge should not have conflicts (resolve)' '
 	git reset --hard &&
 	git checkout baseline^0 &&
 	git merge -s resolve test2 &&

File t/test-lib.sh

 		;;
 esac
 
+# Convenience
+#
+# A regexp to match 5 and 40 hexdigits
+_x05='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
+_x40="$_x05$_x05$_x05$_x05$_x05$_x05$_x05$_x05"
+
 # Each test should start with something like this, after copyright notices:
 #
 # test_description='Description of this test...
 	return buf;
 }
 
-static int entry_compare(struct name_entry *a, struct name_entry *b)
-{
-	return df_name_compare(
-			a->path, tree_entry_len(a->path, a->sha1), a->mode,
-			b->path, tree_entry_len(b->path, b->sha1), b->mode);
-}
-
 static void entry_clear(struct name_entry *a)
 {
 	memset(a, 0, sizeof(*a));
 	return path;
 }
 
+struct tree_desc_skip {
+	struct tree_desc_skip *prev;
+	const void *ptr;
+};
+
+struct tree_desc_x {
+	struct tree_desc d;
+	struct tree_desc_skip *skip;
+};
+
+static int name_compare(const char *a, int a_len,
+			const char *b, int b_len)
+{
+	int len = (a_len < b_len) ? a_len : b_len;
+	int cmp = memcmp(a, b, len);
+	if (cmp)
+		return cmp;
+	return (a_len - b_len);
+}
+
+static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
+{
+	/*
+	 * The caller wants to pick *a* from a tree or nothing.
+	 * We are looking at *b* in a tree.
+	 *
+	 * (0) If a and b are the same name, we are trivially happy.
+	 *
+	 * There are three possibilities where *a* could be hiding
+	 * behind *b*.
+	 *
+	 * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
+	 *                                matter what.
+	 * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
+	 * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
+	 *
+	 * Otherwise we know *a* won't appear in the tree without
+	 * scanning further.
+	 */
+
+	int cmp = name_compare(a, a_len, b, b_len);
+
+	/* Most common case first -- reading sync'd trees */
+	if (!cmp)
+		return cmp;
+
+	if (0 < cmp) {
+		/* a comes after b; it does not matter if it is case (3)
+		if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
+			return 1;
+		*/
+		return 1; /* keep looking */
+	}
+
+	/* b comes after a; are we looking at case (2)? */
+	if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
+		return 1; /* keep looking */
+
+	return -1; /* a cannot appear in the tree */
+}
+
+/*
+ * From the extended tree_desc, extract the first name entry, while
+ * paying attention to the candidate "first" name.  Most importantly,
+ * when looking for an entry, if there are entries that sorts earlier
+ * in the tree object representation than that name, skip them and
+ * process the named entry first.  We will remember that we haven't
+ * processed the first entry yet, and in the later call skip the
+ * entry we processed early when update_extended_entry() is called.
+ *
+ * E.g. if the underlying tree object has these entries:
+ *
+ *    blob    "t-1"
+ *    blob    "t-2"
+ *    tree    "t"
+ *    blob    "t=1"
+ *
+ * and the "first" asks for "t", remember that we still need to
+ * process "t-1" and "t-2" but extract "t".  After processing the
+ * entry "t" from this call, the caller will let us know by calling
+ * update_extended_entry() that we can remember "t" has been processed
+ * already.
+ */
+
+static void extended_entry_extract(struct tree_desc_x *t,
+				   struct name_entry *a,
+				   const char *first,
+				   int first_len)
+{
+	const char *path;
+	int len;
+	struct tree_desc probe;
+	struct tree_desc_skip *skip;
+
+	/*
+	 * Extract the first entry from the tree_desc, but skip the
+	 * ones that we already returned in earlier rounds.
+	 */
+	while (1) {
+		if (!t->d.size) {
+			entry_clear(a);
+			break; /* not found */
+		}
+		entry_extract(&t->d, a);
+		for (skip = t->skip; skip; skip = skip->prev)
+			if (a->path == skip->ptr)
+				break; /* found */
+		if (!skip)
+			break;
+		/* We have processed this entry already. */
+		update_tree_entry(&t->d);
+	}
+
+	if (!first || !a->path)
+		return;
+
+	/*
+	 * The caller wants "first" from this tree, or nothing.
+	 */
+	path = a->path;
+	len = tree_entry_len(a->path, a->sha1);
+	switch (check_entry_match(first, first_len, path, len)) {
+	case -1:
+		entry_clear(a);
+	case 0:
+		return;
+	default:
+		break;
+	}
+
+	/*
+	 * We need to look-ahead -- we suspect that a subtree whose
+	 * name is "first" may be hiding behind the current entry "path".
+	 */
+	probe = t->d;
+	while (probe.size) {
+		entry_extract(&probe, a);
+		path = a->path;
+		len = tree_entry_len(a->path, a->sha1);
+		switch (check_entry_match(first, first_len, path, len)) {
+		case -1:
+			entry_clear(a);
+		case 0:
+			return;
+		default:
+			update_tree_entry(&probe);
+			break;
+		}
+		/* keep looking */
+	}
+	entry_clear(a);
+}
+
+static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
+{
+	if (t->d.entry.path == a->path) {
+		update_tree_entry(&t->d);
+	} else {
+		/* we have returned this entry early */
+		struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
+		skip->ptr = a->path;
+		skip->prev = t->skip;
+		t->skip = skip;
+	}
+}
+
+static void free_extended_entry(struct tree_desc_x *t)
+{
+	struct tree_desc_skip *p, *s;
+
+	for (s = t->skip; s; s = p) {
+		p = s->prev;
+		free(s);
+	}
+}
+
 int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 {
 	int ret = 0;
 	struct name_entry *entry = xmalloc(n*sizeof(*entry));
+	int i;
+	struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
+
+	for (i = 0; i < n; i++)
+		tx[i].d = t[i];
 
 	for (;;) {
-		unsigned long mask = 0;
-		unsigned long dirmask = 0;
-		int i, last;
+		unsigned long mask, dirmask;
+		const char *first = NULL;
+		int first_len = 0;
+		struct name_entry *e;
+		int len;
 
-		last = -1;
 		for (i = 0; i < n; i++) {
-			if (!t[i].size)
+			e = entry + i;
+			extended_entry_extract(tx + i, e, NULL, 0);
+		}
+
+		/*
+		 * A tree may have "t-2" at the current location even
+		 * though it may have "t" that is a subtree behind it,
+		 * and another tree may return "t".  We want to grab
+		 * all "t" from all trees to match in such a case.
+		 */
+		for (i = 0; i < n; i++) {
+			e = entry + i;
+			if (!e->path)
 				continue;
-			entry_extract(t+i, entry+i);
-			if (last >= 0) {
-				int cmp = entry_compare(entry+i, entry+last);
-
-				/*
-				 * Is the new name bigger than the old one?
-				 * Ignore it
-				 */
-				if (cmp > 0)
+			len = tree_entry_len(e->path, e->sha1);
+			if (!first) {
+				first = e->path;
+				first_len = len;
+				continue;
+			}
+			if (name_compare(e->path, len, first, first_len) < 0) {
+				first = e->path;
+				first_len = len;
+			}
+		}
+
+		if (first) {
+			for (i = 0; i < n; i++) {
+				e = entry + i;
+				extended_entry_extract(tx + i, e, first, first_len);
+				/* Cull the ones that are not the earliest */
+				if (!e->path)
 					continue;
-				/*
-				 * Is the new name smaller than the old one?
-				 * Ignore all old ones
-				 */
-				if (cmp < 0)
-					mask = 0;
+				len = tree_entry_len(e->path, e->sha1);
+				if (name_compare(e->path, len, first, first_len))
+					entry_clear(e);
 			}
+		}
+
+		/* Now we have in entry[i] the earliest name from the trees */
+		mask = 0;
+		dirmask = 0;
+		for (i = 0; i < n; i++) {
+			if (!entry[i].path)
+				continue;
 			mask |= 1ul << i;
 			if (S_ISDIR(entry[i].mode))
 				dirmask |= 1ul << i;
-			last = i;
 		}
 		if (!mask)
 			break;
-		dirmask &= mask;
-
-		/*
-		 * Clear all the unused name-entries.
-		 */
-		for (i = 0; i < n; i++) {
-			if (mask & (1ul << i))
-				continue;
-			entry_clear(entry + i);
-		}
 		ret = info->fn(n, mask, dirmask, entry, info);
 		if (ret < 0)
 			break;
-		if (ret)
-			mask &= ret;
+		mask &= ret;
 		ret = 0;
-		for (i = 0; i < n; i++) {
+		for (i = 0; i < n; i++)
 			if (mask & (1ul << i))
-				update_tree_entry(t + i);
-		}
+				update_extended_entry(tx + i, entry + i);
 	}
 	free(entry);
+	for (i = 0; i < n; i++)
+		free_extended_entry(tx + i);
+	free(tx);
 	return ret;
 }
 

File unpack-trees.c

 	return ret;
 }
 
-static int unpack_index_entry(struct cache_entry *ce, struct unpack_trees_options *o)
+static void mark_ce_used(struct cache_entry *ce, struct unpack_trees_options *o)
+{
+	ce->ce_flags |= CE_UNPACKED;
+
+	if (o->cache_bottom < o->src_index->cache_nr &&
+	    o->src_index->cache[o->cache_bottom] == ce) {
+		int bottom = o->cache_bottom;
+		while (bottom < o->src_index->cache_nr &&
+		       o->src_index->cache[bottom]->ce_flags & CE_UNPACKED)
+			bottom++;
+		o->cache_bottom = bottom;
+	}
+}
+
+static void mark_all_ce_unused(struct index_state *index)
+{
+	int i;
+	for (i = 0; i < index->cache_nr; i++)
+		index->cache[i]->ce_flags &= ~CE_UNPACKED;
+}
+
+static int locate_in_src_index(struct cache_entry *ce,
+			       struct unpack_trees_options *o)
+{
+	struct index_state *index = o->src_index;
+	int len = ce_namelen(ce);
+	int pos = index_name_pos(index, ce->name, len);
+	if (pos < 0)
+		pos = -1 - pos;
+	return pos;
+}
+
+/*
+ * We call unpack_index_entry() with an unmerged cache entry
+ * only in diff-index, and it wants a single callback.  Skip
+ * the other unmerged entry with the same name.
+ */
+static void mark_ce_used_same_name(struct cache_entry *ce,
+				   struct unpack_trees_options *o)
+{
+	struct index_state *index = o->src_index;
+	int len = ce_namelen(ce);
+	int pos;
+
+	for (pos = locate_in_src_index(ce, o); pos < index->cache_nr; pos++) {
+		struct cache_entry *next = index->cache[pos];
+		if (len != ce_namelen(next) ||
+		    memcmp(ce->name, next->name, len))
+			break;
+		mark_ce_used(next, o);
+	}
+}
+
+static struct cache_entry *next_cache_entry(struct unpack_trees_options *o)
+{
+	const struct index_state *index = o->src_index;
+	int pos = o->cache_bottom;
+
+	while (pos < index->cache_nr) {
+		struct cache_entry *ce = index->cache[pos];
+		if (!(ce->ce_flags & CE_UNPACKED))
+			return ce;
+		pos++;
+	}
+	return NULL;
+}
+
+static void add_same_unmerged(struct cache_entry *ce,
+			      struct unpack_trees_options *o)
+{
+	struct index_state *index = o->src_index;
+	int len = ce_namelen(ce);
+	int pos = index_name_pos(index, ce->name, len);
+
+	if (0 <= pos)
+		die("programming error in a caller of mark_ce_used_same_name");
+	for (pos = -pos - 1; pos < index->cache_nr; pos++) {
+		struct cache_entry *next = index->cache[pos];
+		if (len != ce_namelen(next) ||
+		    memcmp(ce->name, next->name, len))
+			break;
+		add_entry(o, next, 0, 0);
+		mark_ce_used(next, o);
+	}
+}
+
+static int unpack_index_entry(struct cache_entry *ce,
+			      struct unpack_trees_options *o)
 {
 	struct cache_entry *src[5] = { ce, NULL, };
+	int ret;
 
-	o->pos++;
+	mark_ce_used(ce, o);
 	if (ce_stage(ce)) {
 		if (o->skip_unmerged) {
 			add_entry(o, ce, 0, 0);
 			return 0;
 		}
 	}
-	return call_unpack_fn(src, o);
+	ret = call_unpack_fn(src, o);
+	if (ce_stage(ce))
+		mark_ce_used_same_name(ce, o);
+	return ret;
+}
+
+static int find_cache_pos(struct traverse_info *, const struct name_entry *);
+
+static void restore_cache_bottom(struct traverse_info *info, int bottom)
+{
+	struct unpack_trees_options *o = info->data;
+
+	if (o->diff_index_cached)
+		return;
+	o->cache_bottom = bottom;
+}
+
+static int switch_cache_bottom(struct traverse_info *info)
+{
+	struct unpack_trees_options *o = info->data;
+	int ret, pos;
+
+	if (o->diff_index_cached)
+		return 0;
+	ret = o->cache_bottom;
+	pos = find_cache_pos(info->prev, &info->name);
+
+	if (pos < -1)
+		o->cache_bottom = -2 - pos;
+	else if (pos < 0)
+		o->cache_bottom = o->src_index->cache_nr;
+	return ret;
 }
 
 static int traverse_trees_recursive(int n, unsigned long dirmask, unsigned long df_conflicts, struct name_entry *names, struct traverse_info *info)
 {
-	int i;
+	int i, ret, bottom;
 	struct tree_desc t[MAX_UNPACK_TREES];
 	struct traverse_info newinfo;
 	struct name_entry *p;
 			sha1 = names[i].sha1;
 		fill_tree_descriptor(t+i, sha1);
 	}
-	return traverse_trees(n, t, &newinfo);
+
+	bottom = switch_cache_bottom(&newinfo);
+	ret = traverse_trees(n, t, &newinfo);
+	restore_cache_bottom(&newinfo, bottom);
+	return ret;
 }
 
 /*
 	return ce_namelen(ce) > traverse_path_len(info, n);
 }
 
+static int ce_in_traverse_path(const struct cache_entry *ce,
+			       const struct traverse_info *info)
+{
+	if (!info->prev)
+		return 1;
+	if (do_compare_entry(ce, info->prev, &info->name))
+		return 0;
+	/*
+	 * If ce (blob) is the same name as the path (which is a tree
+	 * we will be descending into), it won't be inside it.
+	 */
+	return (info->pathlen < ce_namelen(ce));
+}
+
 static struct cache_entry *create_ce_entry(const struct traverse_info *info, const struct name_entry *n, int stage)
 {
 	int len = traverse_path_len(info, n);
 	return -1;
 }
 
+/* NEEDSWORK: give this a better name and share with tree-walk.c */
+static int name_compare(const char *a, int a_len,
+			const char *b, int b_len)
+{
+	int len = (a_len < b_len) ? a_len : b_len;
+	int cmp = memcmp(a, b, len);
+	if (cmp)
+		return cmp;
+	return (a_len - b_len);
+}
+
+/*
+ * The tree traversal is looking at name p.  If we have a matching entry,
+ * return it.  If name p is a directory in the index, do not return
+ * anything, as we will want to match it when the traversal descends into
+ * the directory.
+ */
+static int find_cache_pos(struct traverse_info *info,
+			  const struct name_entry *p)
+{
+	int pos;
+	struct unpack_trees_options *o = info->data;
+	struct index_state *index = o->src_index;
+	int pfxlen = info->pathlen;
+	int p_len = tree_entry_len(p->path, p->sha1);
+
+	for (pos = o->cache_bottom; pos < index->cache_nr; pos++) {
+		struct cache_entry *ce = index->cache[pos];
+		const char *ce_name, *ce_slash;
+		int cmp, ce_len;
+
+		if (!ce_in_traverse_path(ce, info))
+			continue;
+		if (ce->ce_flags & CE_UNPACKED)
+			continue;
+		ce_name = ce->name + pfxlen;
+		ce_slash = strchr(ce_name, '/');
+		if (ce_slash)
+			ce_len = ce_slash - ce_name;
+		else
+			ce_len = ce_namelen(ce) - pfxlen;
+		cmp = name_compare(p->path, p_len, ce_name, ce_len);
+		/*
+		 * Exact match; if we have a directory we need to
+		 * delay returning it.
+		 */
+		if (!cmp)
+			return ce_slash ? -2 - pos : pos;
+		if (0 < cmp)
+			continue; /* keep looking */
+		/*
+		 * ce_name sorts after p->path; could it be that we
+		 * have files under p->path directory in the index?
+		 * E.g.  ce_name == "t-i", and p->path == "t"; we may
+		 * have "t/a" in the index.
+		 */
+		if (p_len < ce_len && !memcmp(ce_name, p->path, p_len) &&
+		    ce_name[p_len] < '/')
+			continue; /* keep looking */
+		break;
+	}
+	return -1;
+}
+
+static struct cache_entry *find_cache_entry(struct traverse_info *info,
+					    const struct name_entry *p)
+{
+	int pos = find_cache_pos(info, p);
+	struct unpack_trees_options *o = info->data;
+
+	if (0 <= pos)
+		return o->src_index->cache[pos];
+	else
+		return NULL;
+}
+
+static void debug_path(struct traverse_info *info)
+{
+	if (info->prev) {
+		debug_path(info->prev);
+		if (*info->prev->name.path)
+			putchar('/');
+	}
+	printf("%s", info->name.path);
+}
+
+static void debug_name_entry(int i, struct name_entry *n)
+{
+	printf("ent#%d %06o %s\n", i,
+	       n->path ? n->mode : 0,
+	       n->path ? n->path : "(missing)");
+}
+
+static void debug_unpack_callback(int n,
+				  unsigned long mask,
+				  unsigned long dirmask,
+				  struct name_entry *names,
+				  struct traverse_info *info)
+{
+	int i;
+	printf("* unpack mask %lu, dirmask %lu, cnt %d ",
+	       mask, dirmask, n);
+	debug_path(info);
+	putchar('\n');
+	for (i = 0; i < n; i++)
+		debug_name_entry(i, names + i);
+}
+
 static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info)
 {
 	struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
 	while (!p->mode)
 		p++;
 
+	if (o->debug_unpack)
+		debug_unpack_callback(n, mask, dirmask, names, info);
+
 	/* Are we supposed to look at the index too? */
 	if (o->merge) {
-		while (o->pos < o->src_index->cache_nr) {
-			struct cache_entry *ce = o->src_index->cache[o->pos];
-			int cmp = compare_entry(ce, info, p);
+		while (1) {
+			int cmp;
+			struct cache_entry *ce;
+
+			if (o->diff_index_cached)
+				ce = next_cache_entry(o);
+			else
+				ce = find_cache_entry(info, p);
+
+			if (!ce)
+				break;
+			cmp = compare_entry(ce, info, p);
 			if (cmp < 0) {
 				if (unpack_index_entry(ce, o) < 0)
 					return unpack_failed(o, NULL);
 				continue;
 			}
 			if (!cmp) {
-				o->pos++;
 				if (ce_stage(ce)) {
 					/*
-					 * If we skip unmerged index entries, we'll skip this
-					 * entry *and* the tree entries associated with it!
+					 * If we skip unmerged index
+					 * entries, we'll skip this
+					 * entry *and* the tree
+					 * entries associated with it!
 					 */
 					if (o->skip_unmerged) {
-						add_entry(o, ce, 0, 0);
+						add_same_unmerged(ce, o);
 						return mask;
 					}
 				}
 	if (unpack_nondirectories(n, mask, dirmask, src, names, info) < 0)
 		return -1;
 
+	if (src[0]) {
+		if (ce_stage(src[0]))
+			mark_ce_used_same_name(src[0], o);
+		else
+			mark_ce_used(src[0], o);
+	}
+
 	/* Now handle any directories.. */
 	if (dirmask) {
 		unsigned long conflicts = mask & ~dirmask;
 			matches = cache_tree_matches_traversal(o->src_index->cache_tree,
 							       names, info);
 			/*
-			 * Everything under the name matches.  Adjust o->pos to
-			 * skip the entire hierarchy.
+			 * Everything under the name matches; skip the
+			 * entire hierarchy.  diff_index_cached codepath
+			 * special cases D/F conflicts in such a way that
+			 * it does not do any look-ahead, so this is safe.
 			 */
 			if (matches) {
-				o->pos += matches;
+				o->cache_bottom += matches;
 				return mask;
 			}
 		}
 
 	memset(&o->result, 0, sizeof(o->result));
 	o->result.initialized = 1;
-	if (o->src_index) {
-		o->result.timestamp.sec = o->src_index->timestamp.sec;
-		o->result.timestamp.nsec = o->src_index->timestamp.nsec;
-	}
+	o->result.timestamp.sec = o->src_index->timestamp.sec;
+	o->result.timestamp.nsec = o->src_index->timestamp.nsec;
 	o->merge_size = len;
+	mark_all_ce_unused(o->src_index);
 
 	if (!dfc)
 		dfc = xcalloc(1, cache_entry_size(0));
 		info.fn = unpack_callback;
 		info.data = o;
 
-		if (traverse_trees(len, t, &info) < 0) {
-			ret = unpack_failed(o, NULL);
-			goto done;
+		if (o->prefix) {
+			/*
+			 * Unpack existing index entries that sort before the
+			 * prefix the tree is spliced into.  Note that o->merge
+			 * is always true in this case.
+			 */
+			while (1) {
+				struct cache_entry *ce = next_cache_entry(o);
+				if (!ce)
+					break;
+				if (ce_in_traverse_path(ce, &info))
+					break;
+				if (unpack_index_entry(ce, o) < 0)
+					goto return_failed;
+			}
 		}
+
+		if (traverse_trees(len, t, &info) < 0)
+			goto return_failed;
 	}
 
 	/* Any left-over entries in the index? */
 	if (o->merge) {
-		while (o->pos < o->src_index->cache_nr) {
-			struct cache_entry *ce = o->src_index->cache[o->pos];
-			if (unpack_index_entry(ce, o) < 0) {
-				ret = unpack_failed(o, NULL);
-				goto done;
-			}
+		while (1) {
+			struct cache_entry *ce = next_cache_entry(o);
+			if (!ce)
+				break;
+			if (unpack_index_entry(ce, o) < 0)
+				goto return_failed;
 		}
 	}
+	mark_all_ce_unused(o->src_index);
 
 	if (o->trivial_merges_only && o->nontrivial_merge) {
 		ret = unpack_failed(o, "Merge requires file-level merging");
 		free(el.excludes);
 
 	return ret;
+
+return_failed:
+	mark_all_ce_unused(o->src_index);
+	ret = unpack_failed(o, NULL);
+	goto done;
 }
 
 /* Here come the merge functions */
 	 * in that directory.
 	 */
 	namelen = strlen(ce->name);
-	for (i = o->pos; i < o->src_index->cache_nr; i++) {
+	for (i = locate_in_src_index(ce, o);
+	     i < o->src_index->cache_nr;
+	     i++) {
 		struct cache_entry *ce2 = o->src_index->cache[i];
 		int len = ce_namelen(ce2);
 		if (len < namelen ||
 		    ce2->name[namelen] != '/')
 			break;
 		/*
-		 * ce2->name is an entry in the subdirectory.
+		 * ce2->name is an entry in the subdirectory to be
+		 * removed.
 		 */
 		if (!ce_stage(ce2)) {
 			if (verify_uptodate(ce2, o))
 				return -1;
 			add_entry(o, ce2, CE_REMOVE, 0);
+			mark_ce_used(ce2, o);
 		}
 		cnt++;
 	}
 		return 0;
 
 	if (!lstat(ce->name, &st)) {
-		int ret;
 		int dtype = ce_to_dtype(ce);
 		struct cache_entry *result;
 
 			 * files that are in "foo/" we would lose
 			 * them.
 			 */
-			ret = verify_clean_subdirectory(ce, action, o);
-			if (ret < 0)
-				return ret;
-
-			/*
-			 * If this removed entries from the index,
-			 * what that means is:
-			 *
-			 * (1) the caller unpack_callback() saw path/foo
-			 * in the index, and it has not removed it because
-			 * it thinks it is handling 'path' as blob with
-			 * D/F conflict;
-			 * (2) we will return "ok, we placed a merged entry
-			 * in the index" which would cause o->pos to be
-			 * incremented by one;
-			 * (3) however, original o->pos now has 'path/foo'
-			 * marked with "to be removed".
-			 *
-			 * We need to increment it by the number of
-			 * deleted entries here.
-			 */
-			o->pos += ret;
+			if (verify_clean_subdirectory(ce, action, o) < 0)
+				return -1;
 			return 0;
 		}
 
 		remote = NULL;
 	}
 
-	/* First, if there's a #16 situation, note that to prevent #13
+	/*
+	 * First, if there's a #16 situation, note that to prevent #13
 	 * and #14.
 	 */
 	if (!same(remote, head)) {
 		}
 	}
 
-	/* We start with cases where the index is allowed to match
+	/*
+	 * We start with cases where the index is allowed to match
 	 * something other than the head: #14(ALT) and #2ALT, where it
 	 * is permitted to match the result instead.
 	 */
 	if (!head && !remote && any_anc_missing)
 		return 0;
 
-	/* Under the new "aggressive" rule, we resolve mostly trivial
+	/*
+	 * Under the "aggressive" rule, we resolve mostly trivial
 	 * cases that we historically had git-merge-one-file resolve.
 	 */
 	if (o->aggressive) {
-		int head_deleted = !head && !df_conflict_head;
-		int remote_deleted = !remote && !df_conflict_remote;
+		int head_deleted = !head;
+		int remote_deleted = !remote;
 		struct cache_entry *ce = NULL;
 
 		if (index)

File unpack-trees.h

 		     skip_unmerged,
 		     initial_checkout,
 		     diff_index_cached,
+		     debug_unpack,
 		     skip_sparse_checkout,
 		     gently;
 	const char *prefix;
-	int pos;
+	int cache_bottom;
 	struct dir_struct *dir;
 	merge_fn_t fn;
 	struct unpack_trees_error_msgs msgs;