Anonymous avatar Anonymous committed 92392b4

index-pack: Honor core.deltaBaseCacheLimit when resolving deltas

If we are trying to resolve deltas for a long delta chain composed
of multi-megabyte objects we can easily run into requiring 500M+
of memory to hold each object in the chain on the call stack while
we recurse into the dependent objects and resolve them.

We now use a simple delta cache that discards objects near the
bottom of the call stack first, as they are the most least recently
used objects in this current delta chain. If we recurse out of a
chain we may find the base object is no longer available, as it was
free'd to keep memory under the deltaBaseCacheLimit. In such cases
we must unpack the base object again, which will require recursing
back to the root of the top of the delta chain as we released that
root first.

The astute reader will probably realize that we can still exceed
the delta base cache limit, but this happens only if the most
recent base plus the delta plus the inflated dependent sum up to
more than the base cache limit. Due to the way patch_delta is
currently implemented we cannot operate in less memory anyway.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>;
Signed-off-by: Junio C Hamano <gitster@pobox.com>;

Comments (0)

Files changed (1)

 static struct object_entry *objects;
 static struct delta_entry *deltas;
 static struct base_data *base_cache;
+static size_t base_cache_used;
 static int nr_objects;
 static int nr_deltas;
 static int nr_resolved_deltas;
 	die("pack has bad object at offset %lu: %s", offset, buf);
 }
 
+static void prune_base_data(struct base_data *retain)
+{
+	struct base_data *b = base_cache;
+	for (b = base_cache;
+	     base_cache_used > delta_base_cache_limit && b;
+	     b = b->child) {
+		if (b->data && b != retain) {
+			free(b->data);
+			b->data = NULL;
+			base_cache_used -= b->size;
+		}
+	}
+}
+
 static void link_base_data(struct base_data *base, struct base_data *c)
 {
 	if (base)
 
 	c->base = base;
 	c->child = NULL;
+	base_cache_used += c->size;
+	prune_base_data(c);
 }
 
 static void unlink_base_data(struct base_data *c)
 		base->child = NULL;
 	else
 		base_cache = NULL;
-	free(c->data);
+	if (c->data) {
+		free(c->data);
+		base_cache_used -= c->size;
+	}
 }
 
 static void *unpack_entry_data(unsigned long offset, unsigned long size)
 	}
 }
 
+static void *get_base_data(struct base_data *c)
+{
+	if (!c->data) {
+		struct object_entry *obj = c->obj;
+
+		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
+			void *base = get_base_data(c->base);
+			void *raw = get_data_from_pack(obj);
+			c->data = patch_delta(
+				base, c->base->size,
+				raw, obj->size,
+				&c->size);
+			free(raw);
+			if (!c->data)
+				bad_object(obj->idx.offset, "failed to apply delta");
+		} else
+			c->data = get_data_from_pack(obj);
+
+		base_cache_used += c->size;
+		prune_base_data(c);
+	}
+	return c->data;
+}
+
 static void resolve_delta(struct object_entry *delta_obj,
 			  struct base_data *base_obj, enum object_type type)
 {
 	delta_obj->real_type = type;
 	delta_data = get_data_from_pack(delta_obj);
 	delta_size = delta_obj->size;
-	result.data = patch_delta(base_obj->data, base_obj->size,
+	result.data = patch_delta(get_base_data(base_obj), base_obj->size,
 			     delta_data, delta_size,
 			     &result.size);
 	free(delta_data);
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.