Anonymous avatar Anonymous committed 85d106c

Improve the safety check used in fetch.c

The recent safety check to trust only the commits we have made
things impossibly slow and turn out to waste a lot of memory.

This commit fixes it with the following improvements:

- mark already scanned objects and avoid rescanning the same
object again;

- free the tree entries when we have scanned the tree entries;
this is the same as b0d8923ec01fd91b75ab079034f89ced91500157
which reduced memory usage by rev-list;

- plug memory leak from the object_list dequeuing code;

- use the process_queue not just for fetching but for scanning,
to make things tail recursive to avoid deep recursion; the
deep recursion was especially prominent when we cloned a big
pack.

- avoid has_sha1_file() call when we already know we do not have
that object.

Signed-off-by: Junio C Hamano <junkio@cox.net>;

Comments (0)

Files changed (1)

 		what, missing_hex, sha1_to_hex(current_commit_sha1));
 }
 
-static int make_sure_we_have_it(const char *what, unsigned char *sha1)
-{
-	int status = 0;
-
-	if (!has_sha1_file(sha1)) {
-		status = fetch(sha1);
-		if (status && what)
-			report_missing(what, sha1);
-	}
-	return status;
-}
-
 static int process(unsigned char *sha1, const char *type);
 
 static int process_tree(struct tree *tree)
 {
-	struct tree_entry_list *entries;
+	struct tree_entry_list *entry;
 
 	if (parse_tree(tree))
 		return -1;
 
-	for (entries = tree->entries; entries; entries = entries->next) {
-		if (process(entries->item.any->sha1,
-			    entries->directory ? tree_type : blob_type))
+	entry = tree->entries;
+	tree->entries = NULL;
+	while (entry) {
+		struct tree_entry_list *next = entry->next;
+		if (process(entry->item.any->sha1,
+			    entry->directory ? tree_type : blob_type))
 			return -1;
+		free(entry);
+		entry = next;
 	}
 	return 0;
 }
 
 #define COMPLETE	1U
+#define TO_FETCH	2U
+#define TO_SCAN		4U
+#define SCANNED		8U
+
 static struct commit_list *complete = NULL;
 
 static int process_commit(struct commit *commit)
 	while (complete && complete->item->date >= commit->date) {
 		pop_most_recent_commit(&complete, COMPLETE);
 	}
-		
 
 	if (commit->object.flags & COMPLETE)
 		return 0;
 
 	memcpy(current_commit_sha1, commit->object.sha1, 20);
 
+	pull_say("walk %s\n", sha1_to_hex(commit->object.sha1));
+
 	if (get_tree) {
 		if (process(commit->tree->object.sha1, tree_type))
 			return -1;
 	if (get_history) {
 		struct commit_list *parents = commit->parents;
 		for (; parents; parents = parents->next) {
-			if (process(parents->item->object.sha1,
-				    commit_type))
+			if (process(parents->item->object.sha1, commit_type))
 				return -1;
 		}
 	}
 
 static int process_object(struct object *obj)
 {
+	if (obj->flags & SCANNED)
+		return 0;
+	obj->flags |= SCANNED;
+
 	if (obj->type == commit_type) {
 		if (process_commit((struct commit *)obj))
 			return -1;
 	if (has_sha1_file(sha1)) {
 		parse_object(sha1);
 		/* We already have it, so we should scan it now. */
-		return process_object(obj);
+		if (obj->flags & (SCANNED | TO_SCAN))
+			return 0;
+		object_list_insert(obj, process_queue_end);
+		process_queue_end = &(*process_queue_end)->next;
+		obj->flags |= TO_SCAN;
+		return 0;
 	}
-	if (object_list_contains(process_queue, obj))
+	if (obj->flags & (COMPLETE | TO_FETCH))
 		return 0;
 	object_list_insert(obj, process_queue_end);
 	process_queue_end = &(*process_queue_end)->next;
+	obj->flags |= TO_FETCH;
 
-	//fprintf(stderr, "prefetch %s\n", sha1_to_hex(sha1));
 	prefetch(sha1);
 		
 	return 0;
 
 static int loop(void)
 {
+	struct object_list *elem;
+
 	while (process_queue) {
 		struct object *obj = process_queue->item;
-		/*
-		fprintf(stderr, "%d objects to pull\n", 
-			object_list_length(process_queue));
-		*/
-		process_queue = process_queue->next;
+		elem = process_queue;
+		process_queue = elem->next;
+		free(elem);
 		if (!process_queue)
 			process_queue_end = &process_queue;
 
-		//fprintf(stderr, "fetch %s\n", sha1_to_hex(obj->sha1));
-		
-		if (make_sure_we_have_it(obj->type ? obj->type : "object", 
-					 obj->sha1))
-			return -1;
+		/* If we are not scanning this object, we placed it in
+		 * the queue because we needed to fetch it first.
+		 */
+		if (! (obj->flags & TO_SCAN)) {
+			if (fetch(obj->sha1)) {
+				report_missing(obj->type
+					       ? obj->type
+					       : "object", obj->sha1);
+				return -1;
+			}
+		}
 		if (!obj->type)
 			parse_object(obj->sha1);
 		if (process_object(obj))
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.