Commits

Junio C Hamano  committed 85d106c

Improve the safety check used in fetch.c

The recent safety check to trust only the commits we have made
things impossibly slow and turn out to waste a lot of memory.

This commit fixes it with the following improvements:

- mark already scanned objects and avoid rescanning the same
object again;

- free the tree entries when we have scanned the tree entries;
this is the same as b0d8923ec01fd91b75ab079034f89ced91500157
which reduced memory usage by rev-list;

- plug memory leak from the object_list dequeuing code;

- use the process_queue not just for fetching but for scanning,
to make things tail recursive to avoid deep recursion; the
deep recursion was especially prominent when we cloned a big
pack.

- avoid has_sha1_file() call when we already know we do not have
that object.

Signed-off-by: Junio C Hamano <junkio@cox.net>

  • Participants
  • Parent commits 490e23d

Comments (0)

Files changed (1)

 		what, missing_hex, sha1_to_hex(current_commit_sha1));
 }
 
-static int make_sure_we_have_it(const char *what, unsigned char *sha1)
-{
-	int status = 0;
-
-	if (!has_sha1_file(sha1)) {
-		status = fetch(sha1);
-		if (status && what)
-			report_missing(what, sha1);
-	}
-	return status;
-}
-
 static int process(unsigned char *sha1, const char *type);
 
 static int process_tree(struct tree *tree)
 {
-	struct tree_entry_list *entries;
+	struct tree_entry_list *entry;
 
 	if (parse_tree(tree))
 		return -1;
 
-	for (entries = tree->entries; entries; entries = entries->next) {
-		if (process(entries->item.any->sha1,
-			    entries->directory ? tree_type : blob_type))
+	entry = tree->entries;
+	tree->entries = NULL;
+	while (entry) {
+		struct tree_entry_list *next = entry->next;
+		if (process(entry->item.any->sha1,
+			    entry->directory ? tree_type : blob_type))
 			return -1;
+		free(entry);
+		entry = next;
 	}
 	return 0;
 }
 
 #define COMPLETE	1U
+#define TO_FETCH	2U
+#define TO_SCAN		4U
+#define SCANNED		8U
+
 static struct commit_list *complete = NULL;
 
 static int process_commit(struct commit *commit)
 	while (complete && complete->item->date >= commit->date) {
 		pop_most_recent_commit(&complete, COMPLETE);
 	}
-		
 
 	if (commit->object.flags & COMPLETE)
 		return 0;
 
 	memcpy(current_commit_sha1, commit->object.sha1, 20);
 
+	pull_say("walk %s\n", sha1_to_hex(commit->object.sha1));
+
 	if (get_tree) {
 		if (process(commit->tree->object.sha1, tree_type))
 			return -1;
 	if (get_history) {
 		struct commit_list *parents = commit->parents;
 		for (; parents; parents = parents->next) {
-			if (process(parents->item->object.sha1,
-				    commit_type))
+			if (process(parents->item->object.sha1, commit_type))
 				return -1;
 		}
 	}
 
 static int process_object(struct object *obj)
 {
+	if (obj->flags & SCANNED)
+		return 0;
+	obj->flags |= SCANNED;
+
 	if (obj->type == commit_type) {
 		if (process_commit((struct commit *)obj))
 			return -1;
 	if (has_sha1_file(sha1)) {
 		parse_object(sha1);
 		/* We already have it, so we should scan it now. */
-		return process_object(obj);
+		if (obj->flags & (SCANNED | TO_SCAN))
+			return 0;
+		object_list_insert(obj, process_queue_end);
+		process_queue_end = &(*process_queue_end)->next;
+		obj->flags |= TO_SCAN;
+		return 0;
 	}
-	if (object_list_contains(process_queue, obj))
+	if (obj->flags & (COMPLETE | TO_FETCH))
 		return 0;
 	object_list_insert(obj, process_queue_end);
 	process_queue_end = &(*process_queue_end)->next;
+	obj->flags |= TO_FETCH;
 
-	//fprintf(stderr, "prefetch %s\n", sha1_to_hex(sha1));
 	prefetch(sha1);
 		
 	return 0;
 
 static int loop(void)
 {
+	struct object_list *elem;
+
 	while (process_queue) {
 		struct object *obj = process_queue->item;
-		/*
-		fprintf(stderr, "%d objects to pull\n", 
-			object_list_length(process_queue));
-		*/
-		process_queue = process_queue->next;
+		elem = process_queue;
+		process_queue = elem->next;
+		free(elem);
 		if (!process_queue)
 			process_queue_end = &process_queue;
 
-		//fprintf(stderr, "fetch %s\n", sha1_to_hex(obj->sha1));
-		
-		if (make_sure_we_have_it(obj->type ? obj->type : "object", 
-					 obj->sha1))
-			return -1;
+		/* If we are not scanning this object, we placed it in
+		 * the queue because we needed to fetch it first.
+		 */
+		if (! (obj->flags & TO_SCAN)) {
+			if (fetch(obj->sha1)) {
+				report_missing(obj->type
+					       ? obj->type
+					       : "object", obj->sha1);
+				return -1;
+			}
+		}
 		if (!obj->type)
 			parse_object(obj->sha1);
 		if (process_object(obj))