From dd3693eb0859274d62feac8047e1d486b3beaf31 Mon Sep 17 00:00:00 2001 From: Andrew Au Date: Thu, 12 Mar 2026 21:49:37 +0000 Subject: [PATCH 01/34] transport-helper, connect: use clean_on_exit to reap children on abnormal exit When a long-running service (e.g., a source indexer) runs as PID 1 inside a container and repeatedly spawns git, git may in turn spawn child processes such as git-remote-https or ssh. If git exits abnormally (e.g., via exit(128) on a transport error), the normal cleanup paths (disconnect_helper, finish_connect) are bypassed, and these children are never waited on. The children are reparented to PID 1, which does not reap them, so they accumulate as zombies over time. Set clean_on_exit and wait_after_clean on child_process structs in both transport-helper.c and connect.c so that the existing run-command cleanup infrastructure handles reaping on any exit path. This avoids rolling custom atexit handlers that call finish_command(), which could deadlock if the child is blocked waiting for the parent to close a pipe. The clean_on_exit mechanism sends SIGTERM first, then waits, ensuring the child terminates promptly. It also handles signal-based exits, not just atexit. Signed-off-by: Andrew Au Signed-off-by: Junio C Hamano --- connect.c | 4 ++++ transport-helper.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/connect.c b/connect.c index a02583a1027241..fcd35c5539a76e 100644 --- a/connect.c +++ b/connect.c @@ -1054,6 +1054,8 @@ static struct child_process *git_proxy_connect(int fd[2], char *host) strvec_push(&proxy->args, port); proxy->in = -1; proxy->out = -1; + proxy->clean_on_exit = 1; + proxy->wait_after_clean = 1; if (start_command(proxy)) die(_("cannot start proxy %s"), git_proxy_command); fd[0] = proxy->out; /* read from proxy stdout */ @@ -1515,6 +1517,8 @@ struct child_process *git_connect(int fd[2], const char *url, } strvec_push(&conn->args, cmd.buf); + conn->clean_on_exit = 1; + conn->wait_after_clean = 1; if (start_command(conn)) die(_("unable to fork")); diff --git a/transport-helper.c b/transport-helper.c index 4d95d84f9e4d05..570d7c6439569a 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -154,6 +154,8 @@ static struct child_process *get_helper(struct transport *transport) helper->trace2_child_class = helper->args.v[0]; /* "remote-" */ + helper->clean_on_exit = 1; + helper->wait_after_clean = 1; code = start_command(helper); if (code < 0 && errno == ENOENT) die(_("unable to find remote helper for '%s'"), data->name); From 736cef847cf788d90f39d15bb4be684bc4ba1013 Mon Sep 17 00:00:00 2001 From: Ramsay Jones Date: Thu, 19 Mar 2026 22:49:06 +0000 Subject: [PATCH 02/34] object-file: fix sparse 'plain integer as NULL pointer' error Signed-off-by: Ramsay Jones Signed-off-by: Junio C Hamano --- object-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object-file.c b/object-file.c index 569ce6eaed95d4..fa2ca60a5941e8 100644 --- a/object-file.c +++ b/object-file.c @@ -1916,7 +1916,7 @@ int odb_source_loose_count_objects(struct odb_source *source, } else { *out = 0; ret = odb_source_loose_for_each_object(source, NULL, count_loose_object, - out, 0); + out, NULL); } out: From 1382e54a9c9e5f98271a943af9c10299c6ba934b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:27 +0100 Subject: [PATCH 03/34] oidtree: modernize the code a bit The "oidtree.c" subsystem is rather small and self-contained and tends to just work. It thus doesn't typically receive a lot of attention, which has as a consequence that it's coding style is somewhat dated nowadays. Modernize the style of this subsystem a bit: - Rename the `oidtree_iter()` function to `oidtree_each_cb()`. - Rename `struct oidtree_iter_data` to `struct oidtree_each_data` to match the renamed callback function type. - Rename parameters and variables to clarify their intent. - Add comments that explain what some of the functions do. - Adapt the return value of `oidtree_contains()` to be a boolean. This prepares for some changes to the subsystem that'll happen in the next commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- oidtree.c | 61 ++++++++++++++++++++-------------------- oidtree.h | 42 ++++++++++++++++++++++----- t/unit-tests/u-oidtree.c | 14 ++++----- 3 files changed, 73 insertions(+), 44 deletions(-) diff --git a/oidtree.c b/oidtree.c index 324de949344e69..a4d10cd429c908 100644 --- a/oidtree.c +++ b/oidtree.c @@ -6,14 +6,6 @@ #include "oidtree.h" #include "hash.h" -struct oidtree_iter_data { - oidtree_iter fn; - void *arg; - size_t *last_nibble_at; - uint32_t algo; - uint8_t last_byte; -}; - void oidtree_init(struct oidtree *ot) { cb_init(&ot->tree); @@ -54,8 +46,7 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid) cb_insert(&ot->tree, on, sizeof(*oid)); } - -int oidtree_contains(struct oidtree *ot, const struct object_id *oid) +bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) { struct object_id k; size_t klen = sizeof(k); @@ -69,41 +60,51 @@ int oidtree_contains(struct oidtree *ot, const struct object_id *oid) klen += BUILD_ASSERT_OR_ZERO(offsetof(struct object_id, hash) < offsetof(struct object_id, algo)); - return cb_lookup(&ot->tree, (const uint8_t *)&k, klen) ? 1 : 0; + return !!cb_lookup(&ot->tree, (const uint8_t *)&k, klen); } -static enum cb_next iter(struct cb_node *n, void *arg) +struct oidtree_each_data { + oidtree_each_cb cb; + void *cb_data; + size_t *last_nibble_at; + uint32_t algo; + uint8_t last_byte; +}; + +static enum cb_next iter(struct cb_node *n, void *cb_data) { - struct oidtree_iter_data *x = arg; + struct oidtree_each_data *data = cb_data; struct object_id k; /* Copy to provide 4-byte alignment needed by struct object_id. */ memcpy(&k, n->k, sizeof(k)); - if (x->algo != GIT_HASH_UNKNOWN && x->algo != k.algo) + if (data->algo != GIT_HASH_UNKNOWN && data->algo != k.algo) return CB_CONTINUE; - if (x->last_nibble_at) { - if ((k.hash[*x->last_nibble_at] ^ x->last_byte) & 0xf0) + if (data->last_nibble_at) { + if ((k.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) return CB_CONTINUE; } - return x->fn(&k, x->arg); + return data->cb(&k, data->cb_data); } -void oidtree_each(struct oidtree *ot, const struct object_id *oid, - size_t oidhexsz, oidtree_iter fn, void *arg) +void oidtree_each(struct oidtree *ot, const struct object_id *prefix, + size_t prefix_hex_len, oidtree_each_cb cb, void *cb_data) { - size_t klen = oidhexsz / 2; - struct oidtree_iter_data x = { 0 }; - assert(oidhexsz <= GIT_MAX_HEXSZ); - - x.fn = fn; - x.arg = arg; - x.algo = oid->algo; - if (oidhexsz & 1) { - x.last_byte = oid->hash[klen]; - x.last_nibble_at = &klen; + struct oidtree_each_data data = { + .cb = cb, + .cb_data = cb_data, + .algo = prefix->algo, + }; + size_t klen = prefix_hex_len / 2; + assert(prefix_hex_len <= GIT_MAX_HEXSZ); + + if (prefix_hex_len & 1) { + data.last_byte = prefix->hash[klen]; + data.last_nibble_at = &klen; } - cb_each(&ot->tree, (const uint8_t *)oid, klen, iter, &x); + + cb_each(&ot->tree, prefix->hash, klen, iter, &data); } diff --git a/oidtree.h b/oidtree.h index 77898f510a1227..06514010176beb 100644 --- a/oidtree.h +++ b/oidtree.h @@ -5,18 +5,46 @@ #include "hash.h" #include "mem-pool.h" +/* + * OID trees are an efficient storage for object IDs that use a critbit tree + * internally. Common prefixes are duplicated and object IDs are stored in a + * way that allow easy iteration over the objects in lexicographic order. As a + * consequence, operations that want to enumerate all object IDs that match a + * given prefix can be answered efficiently. + * + * Note that it is not (yet) possible to store data other than the object IDs + * themselves in this tree. + */ struct oidtree { struct cb_tree tree; struct mem_pool mem_pool; }; -void oidtree_init(struct oidtree *); -void oidtree_clear(struct oidtree *); -void oidtree_insert(struct oidtree *, const struct object_id *); -int oidtree_contains(struct oidtree *, const struct object_id *); +/* Initialize the oidtree so that it is ready for use. */ +void oidtree_init(struct oidtree *ot); -typedef enum cb_next (*oidtree_iter)(const struct object_id *, void *data); -void oidtree_each(struct oidtree *, const struct object_id *, - size_t oidhexsz, oidtree_iter, void *data); +/* + * Release all memory associated with the oidtree and reinitialize it for + * subsequent use. + */ +void oidtree_clear(struct oidtree *ot); + +/* Insert the object ID into the tree. */ +void oidtree_insert(struct oidtree *ot, const struct object_id *oid); + +/* Check whether the tree contains the given object ID. */ +bool oidtree_contains(struct oidtree *ot, const struct object_id *oid); + +/* Callback function used for `oidtree_each()`. */ +typedef enum cb_next (*oidtree_each_cb)(const struct object_id *oid, + void *cb_data); + +/* + * Iterate through all object IDs in the tree whose prefix matches the given + * object ID prefix and invoke the callback function on each of them. + */ +void oidtree_each(struct oidtree *ot, + const struct object_id *prefix, size_t prefix_hex_len, + oidtree_each_cb cb, void *cb_data); #endif /* OIDTREE_H */ diff --git a/t/unit-tests/u-oidtree.c b/t/unit-tests/u-oidtree.c index e6eede27404842..def47c67953bcf 100644 --- a/t/unit-tests/u-oidtree.c +++ b/t/unit-tests/u-oidtree.c @@ -24,7 +24,7 @@ static int fill_tree_loc(struct oidtree *ot, const char *hexes[], size_t n) return 0; } -static void check_contains(struct oidtree *ot, const char *hex, int expected) +static void check_contains(struct oidtree *ot, const char *hex, bool expected) { struct object_id oid; @@ -88,12 +88,12 @@ void test_oidtree__cleanup(void) void test_oidtree__contains(void) { FILL_TREE(&ot, "444", "1", "2", "3", "4", "5", "a", "b", "c", "d", "e"); - check_contains(&ot, "44", 0); - check_contains(&ot, "441", 0); - check_contains(&ot, "440", 0); - check_contains(&ot, "444", 1); - check_contains(&ot, "4440", 1); - check_contains(&ot, "4444", 0); + check_contains(&ot, "44", false); + check_contains(&ot, "441", false); + check_contains(&ot, "440", false); + check_contains(&ot, "444", true); + check_contains(&ot, "4440", true); + check_contains(&ot, "4444", false); } void test_oidtree__each(void) From fe446b01aeaab307adcbfb39d4aaa72c37afbcda Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:28 +0100 Subject: [PATCH 04/34] oidtree: extend iteration to allow for arbitrary return codes The interface `cb_each()` iterates through a crit-bit tree and calls a specific callback function for each of the contained items. The callback function is expected to return either: - `CB_CONTINUE` in case iteration shall continue. - `CB_BREAK` to abort iteration. This is needlessly restrictive though, as callers may want to return arbitrary values and have them be bubbled up to the `cb_each()` call site. In fact, this is a rather common pattern we have: whenever such a callback function returns a non-zero error code, we abort iteration and bubble up the code as-is. Refactor both the crit-bit tree and oidtree subsystems to behave accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- cbtree.c | 21 ++++++++++++--------- cbtree.h | 17 +++++++++-------- object-name.c | 4 ++-- oidtree.c | 12 ++++++------ oidtree.h | 18 ++++++++++++------ t/unit-tests/u-oidtree.c | 4 ++-- 6 files changed, 43 insertions(+), 33 deletions(-) diff --git a/cbtree.c b/cbtree.c index cf8cf75b895e95..4ab794bddce0c6 100644 --- a/cbtree.c +++ b/cbtree.c @@ -96,26 +96,28 @@ struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen) return p && !memcmp(p->k, k, klen) ? p : NULL; } -static enum cb_next cb_descend(struct cb_node *p, cb_iter fn, void *arg) +static int cb_descend(struct cb_node *p, cb_iter fn, void *arg) { if (1 & (uintptr_t)p) { struct cb_node *q = cb_node_of(p); - enum cb_next n = cb_descend(q->child[0], fn, arg); - - return n == CB_BREAK ? n : cb_descend(q->child[1], fn, arg); + int ret = cb_descend(q->child[0], fn, arg); + if (ret) + return ret; + return cb_descend(q->child[1], fn, arg); } else { return fn(p, arg); } } -void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, - cb_iter fn, void *arg) +int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, + cb_iter fn, void *arg) { struct cb_node *p = t->root; struct cb_node *top = p; size_t i = 0; - if (!p) return; /* empty tree */ + if (!p) + return 0; /* empty tree */ /* Walk tree, maintaining top pointer */ while (1 & (uintptr_t)p) { @@ -130,7 +132,8 @@ void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, for (i = 0; i < klen; i++) { if (p->k[i] != kpfx[i]) - return; /* "best" match failed */ + return 0; /* "best" match failed */ } - cb_descend(top, fn, arg); + + return cb_descend(top, fn, arg); } diff --git a/cbtree.h b/cbtree.h index 43193abdda23ce..c374b1b3db9d82 100644 --- a/cbtree.h +++ b/cbtree.h @@ -30,11 +30,6 @@ struct cb_tree { struct cb_node *root; }; -enum cb_next { - CB_CONTINUE = 0, - CB_BREAK = 1 -}; - #define CBTREE_INIT { 0 } static inline void cb_init(struct cb_tree *t) @@ -46,9 +41,15 @@ static inline void cb_init(struct cb_tree *t) struct cb_node *cb_lookup(struct cb_tree *, const uint8_t *k, size_t klen); struct cb_node *cb_insert(struct cb_tree *, struct cb_node *, size_t klen); -typedef enum cb_next (*cb_iter)(struct cb_node *, void *arg); +/* + * Callback invoked by `cb_each()` for each node in the critbit tree. A return + * value of 0 will cause the iteration to continue, a non-zero return code will + * cause iteration to abort. The error code will be relayed back from + * `cb_each()` in that case. + */ +typedef int (*cb_iter)(struct cb_node *, void *arg); -void cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen, - cb_iter, void *arg); +int cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen, + cb_iter, void *arg); #endif /* CBTREE_H */ diff --git a/object-name.c b/object-name.c index e5adec4c9d5084..a24a1b48e12ae9 100644 --- a/object-name.c +++ b/object-name.c @@ -103,12 +103,12 @@ static void update_candidates(struct disambiguate_state *ds, const struct object static int match_hash(unsigned, const unsigned char *, const unsigned char *); -static enum cb_next match_prefix(const struct object_id *oid, void *arg) +static int match_prefix(const struct object_id *oid, void *arg) { struct disambiguate_state *ds = arg; /* no need to call match_hash, oidtree_each did prefix match */ update_candidates(ds, oid); - return ds->ambiguous ? CB_BREAK : CB_CONTINUE; + return ds->ambiguous; } static void find_short_object_filename(struct disambiguate_state *ds) diff --git a/oidtree.c b/oidtree.c index a4d10cd429c908..ab9fe7ec7aecce 100644 --- a/oidtree.c +++ b/oidtree.c @@ -71,7 +71,7 @@ struct oidtree_each_data { uint8_t last_byte; }; -static enum cb_next iter(struct cb_node *n, void *cb_data) +static int iter(struct cb_node *n, void *cb_data) { struct oidtree_each_data *data = cb_data; struct object_id k; @@ -80,18 +80,18 @@ static enum cb_next iter(struct cb_node *n, void *cb_data) memcpy(&k, n->k, sizeof(k)); if (data->algo != GIT_HASH_UNKNOWN && data->algo != k.algo) - return CB_CONTINUE; + return 0; if (data->last_nibble_at) { if ((k.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) - return CB_CONTINUE; + return 0; } return data->cb(&k, data->cb_data); } -void oidtree_each(struct oidtree *ot, const struct object_id *prefix, - size_t prefix_hex_len, oidtree_each_cb cb, void *cb_data) +int oidtree_each(struct oidtree *ot, const struct object_id *prefix, + size_t prefix_hex_len, oidtree_each_cb cb, void *cb_data) { struct oidtree_each_data data = { .cb = cb, @@ -106,5 +106,5 @@ void oidtree_each(struct oidtree *ot, const struct object_id *prefix, data.last_nibble_at = &klen; } - cb_each(&ot->tree, prefix->hash, klen, iter, &data); + return cb_each(&ot->tree, prefix->hash, klen, iter, &data); } diff --git a/oidtree.h b/oidtree.h index 06514010176beb..2b7bad2e60a51d 100644 --- a/oidtree.h +++ b/oidtree.h @@ -35,16 +35,22 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid); /* Check whether the tree contains the given object ID. */ bool oidtree_contains(struct oidtree *ot, const struct object_id *oid); -/* Callback function used for `oidtree_each()`. */ -typedef enum cb_next (*oidtree_each_cb)(const struct object_id *oid, - void *cb_data); +/* + * Callback function used for `oidtree_each()`. Returning a non-zero exit code + * will cause iteration to stop. The exit code will be propagated to the caller + * of `oidtree_each()`. + */ +typedef int (*oidtree_each_cb)(const struct object_id *oid, + void *cb_data); /* * Iterate through all object IDs in the tree whose prefix matches the given * object ID prefix and invoke the callback function on each of them. + * + * Returns any non-zero exit code from the provided callback function. */ -void oidtree_each(struct oidtree *ot, - const struct object_id *prefix, size_t prefix_hex_len, - oidtree_each_cb cb, void *cb_data); +int oidtree_each(struct oidtree *ot, + const struct object_id *prefix, size_t prefix_hex_len, + oidtree_each_cb cb, void *cb_data); #endif /* OIDTREE_H */ diff --git a/t/unit-tests/u-oidtree.c b/t/unit-tests/u-oidtree.c index def47c67953bcf..d4d05c7dc3e4f7 100644 --- a/t/unit-tests/u-oidtree.c +++ b/t/unit-tests/u-oidtree.c @@ -38,7 +38,7 @@ struct expected_hex_iter { const char *query; }; -static enum cb_next check_each_cb(const struct object_id *oid, void *data) +static int check_each_cb(const struct object_id *oid, void *data) { struct expected_hex_iter *hex_iter = data; struct object_id expected; @@ -49,7 +49,7 @@ static enum cb_next check_each_cb(const struct object_id *oid, void *data) &expected); cl_assert_equal_s(oid_to_hex(oid), oid_to_hex(&expected)); hex_iter->i += 1; - return CB_CONTINUE; + return 0; } LAST_ARG_MUST_BE_NULL From cfd575f0a9730712107e4ee6799a37665bcd8204 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:29 +0100 Subject: [PATCH 05/34] odb: introduce `struct odb_for_each_object_options` The `odb_for_each_object()` function only accepts a bitset of flags. In a subsequent commit we'll want to change object iteration to also support iterating over only those objects that have a specific prefix. While we could of course add the prefix to the function signature, or alternatively introduce a new function, both of these options don't really seem to be that sensible. Instead, introduce a new `struct odb_for_each_object_options` that can be passed to a new `odb_for_each_object_ext()` function. Splice through the options structure into the respective object database sources. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 7 +++++-- builtin/pack-objects.c | 12 +++++++----- commit-graph.c | 5 ++++- object-file.c | 9 +++++---- object-file.h | 2 +- odb.c | 26 +++++++++++++++++++------- odb.h | 16 ++++++++++++++++ odb/source-files.c | 8 ++++---- odb/source.h | 6 +++--- packfile.c | 12 ++++++------ packfile.h | 2 +- 11 files changed, 71 insertions(+), 34 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index b6f12f41d6070a..cd13a3a89f1851 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -848,6 +848,9 @@ static void batch_each_object(struct batch_options *opt, .callback = callback, .payload = _payload, }; + struct odb_for_each_object_options opts = { + .flags = flags, + }; struct bitmap_index *bitmap = NULL; struct odb_source *source; @@ -860,7 +863,7 @@ static void batch_each_object(struct batch_options *opt, odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) { int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi, - &payload, flags); + &payload, &opts); if (ret) break; } @@ -884,7 +887,7 @@ static void batch_each_object(struct batch_options *opt, for (source = the_repository->objects->sources; source; source = source->next) { struct odb_source_files *files = odb_source_files_downcast(source); int ret = packfile_store_for_each_object(files->packed, &oi, - batch_one_object_oi, &payload, flags); + batch_one_object_oi, &payload, &opts); if (ret) break; } diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index cd013c0b68a6a3..3bb57ff1837290 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4344,6 +4344,12 @@ static void add_objects_in_unpacked_packs(void) { struct odb_source *source; time_t mtime; + struct odb_for_each_object_options opts = { + .flags = ODB_FOR_EACH_OBJECT_PACK_ORDER | + ODB_FOR_EACH_OBJECT_LOCAL_ONLY | + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS, + }; struct object_info oi = { .mtimep = &mtime, }; @@ -4356,11 +4362,7 @@ static void add_objects_in_unpacked_packs(void) continue; if (packfile_store_for_each_object(files->packed, &oi, - add_object_in_unpacked_pack, NULL, - ODB_FOR_EACH_OBJECT_PACK_ORDER | - ODB_FOR_EACH_OBJECT_LOCAL_ONLY | - ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | - ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + add_object_in_unpacked_pack, NULL, &opts)) die(_("cannot open pack index")); } } diff --git a/commit-graph.c b/commit-graph.c index c0300033304404..df4b4a125e977b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1969,6 +1969,9 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) { struct odb_source *source; enum object_type type; + struct odb_for_each_object_options opts = { + .flags = ODB_FOR_EACH_OBJECT_PACK_ORDER, + }; struct object_info oi = { .typep = &type, }; @@ -1983,7 +1986,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) for (source = ctx->r->objects->sources; source; source = source->next) { struct odb_source_files *files = odb_source_files_downcast(source); packfile_store_for_each_object(files->packed, &oi, add_packed_commits_oi, - ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER); + ctx, &opts); } if (ctx->progress_done < ctx->approx_nr_objects) diff --git a/object-file.c b/object-file.c index 9764c8dd06797f..56cbb27ab908a8 100644 --- a/object-file.c +++ b/object-file.c @@ -1849,7 +1849,7 @@ int odb_source_loose_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { struct for_each_object_wrapper_data data = { .source = source, @@ -1859,9 +1859,9 @@ int odb_source_loose_for_each_object(struct odb_source *source, }; /* There are no loose promisor objects, so we can return immediately. */ - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) + if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) return 0; - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) + if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) return 0; return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, @@ -1914,9 +1914,10 @@ int odb_source_loose_count_objects(struct odb_source *source, *out = count * 256; ret = 0; } else { + struct odb_for_each_object_options opts = { 0 }; *out = 0; ret = odb_source_loose_for_each_object(source, NULL, count_loose_object, - out, NULL); + out, &opts); } out: diff --git a/object-file.h b/object-file.h index f8d8805a18cc8c..46dfa7b632b334 100644 --- a/object-file.h +++ b/object-file.h @@ -137,7 +137,7 @@ int odb_source_loose_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags); + const struct odb_for_each_object_options *opts); /* * Count the number of loose objects in this source. diff --git a/odb.c b/odb.c index 350e23f3c0798d..3019957b8747a9 100644 --- a/odb.c +++ b/odb.c @@ -896,20 +896,20 @@ int odb_freshen_object(struct object_database *odb, return 0; } -int odb_for_each_object(struct object_database *odb, - const struct object_info *request, - odb_for_each_object_cb cb, - void *cb_data, - unsigned flags) +int odb_for_each_object_ext(struct object_database *odb, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts) { int ret; odb_prepare_alternates(odb); for (struct odb_source *source = odb->sources; source; source = source->next) { - if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) + if (opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) continue; - ret = odb_source_for_each_object(source, request, cb, cb_data, flags); + ret = odb_source_for_each_object(source, request, cb, cb_data, opts); if (ret) return ret; } @@ -917,6 +917,18 @@ int odb_for_each_object(struct object_database *odb, return 0; } +int odb_for_each_object(struct object_database *odb, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + struct odb_for_each_object_options opts = { + .flags = flags, + }; + return odb_for_each_object_ext(odb, request, cb, cb_data, &opts); +} + int odb_count_objects(struct object_database *odb, enum odb_count_objects_flags flags, unsigned long *out) diff --git a/odb.h b/odb.h index 9aee260105ae54..a19a8bb50d86e7 100644 --- a/odb.h +++ b/odb.h @@ -481,6 +481,15 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, struct object_info *oi, void *cb_data); +/* + * Options that can be passed to `odb_for_each_object()` and its + * backend-specific implementations. + */ +struct odb_for_each_object_options { + /* A bitfield of `odb_for_each_object_flags`. */ + enum odb_for_each_object_flags flags; +}; + /* * Iterate through all objects contained in the object database. Note that * objects may be iterated over multiple times in case they are either stored @@ -495,6 +504,13 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, * Returns 0 on success, a negative error code in case a failure occurred, or * an arbitrary non-zero error code returned by the callback itself. */ +int odb_for_each_object_ext(struct object_database *odb, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts); + +/* Same as `odb_for_each_object_ext()` with `opts.flags` set to the given flags. */ int odb_for_each_object(struct object_database *odb, const struct object_info *request, odb_for_each_object_cb cb, diff --git a/odb/source-files.c b/odb/source-files.c index c08d8993e378a0..e90bb689bb01e6 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -75,18 +75,18 @@ static int odb_source_files_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { struct odb_source_files *files = odb_source_files_downcast(source); int ret; - if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { - ret = odb_source_loose_for_each_object(source, request, cb, cb_data, flags); + if (!(opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { + ret = odb_source_loose_for_each_object(source, request, cb, cb_data, opts); if (ret) return ret; } - ret = packfile_store_for_each_object(files->packed, request, cb, cb_data, flags); + ret = packfile_store_for_each_object(files->packed, request, cb, cb_data, opts); if (ret) return ret; diff --git a/odb/source.h b/odb/source.h index 96c906e7a1b350..ee5d6ed530d519 100644 --- a/odb/source.h +++ b/odb/source.h @@ -140,7 +140,7 @@ struct odb_source { const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags); + const struct odb_for_each_object_options *opts); /* * This callback is expected to count objects in the given object @@ -343,9 +343,9 @@ static inline int odb_source_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { - return source->for_each_object(source, request, cb, cb_data, flags); + return source->for_each_object(source, request, cb, cb_data, opts); } /* diff --git a/packfile.c b/packfile.c index d4de9f3ffe831e..a6f3d2035d44fd 100644 --- a/packfile.c +++ b/packfile.c @@ -2375,7 +2375,7 @@ int packfile_store_for_each_object(struct packfile_store *store, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags) + const struct odb_for_each_object_options *opts) { struct packfile_store_for_each_object_wrapper_data data = { .store = store, @@ -2391,15 +2391,15 @@ int packfile_store_for_each_object(struct packfile_store *store, for (e = packfile_store_get_packs(store); e; e = e->next) { struct packed_git *p = e->pack; - if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) + if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; - if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && + if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && !p->pack_promisor) continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && + if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && p->pack_keep_in_core) continue; - if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && + if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && p->pack_keep) continue; if (open_pack_index(p)) { @@ -2408,7 +2408,7 @@ int packfile_store_for_each_object(struct packfile_store *store, } ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper, - &data, flags); + &data, opts->flags); if (ret) goto out; } diff --git a/packfile.h b/packfile.h index a16ec3950d2507..fa41dfda389327 100644 --- a/packfile.h +++ b/packfile.h @@ -367,7 +367,7 @@ int packfile_store_for_each_object(struct packfile_store *store, const struct object_info *request, odb_for_each_object_cb cb, void *cb_data, - unsigned flags); + const struct odb_for_each_object_options *opts); /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 From 284b7862be735bb47276ac288ace153ae3d06938 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:30 +0100 Subject: [PATCH 06/34] object-name: move logic to iterate through loose prefixed objects The logic to iterate through loose objects that have a certain prefix is currently hosted in "object-name.c". This logic reaches into specifics of the loose object source, so it breaks once a different backend is used for the object storage. Move the logic to iterate through loose objects with a prefix into "object-file.c". This is done by extending the for-each-object options to support an optional prefix that is then honored by the loose source. Naturally, we'll also have this support in the packfile store. This is done in the next commit. Furthermore, there are no users of the loose cache outside of "object-file.c" anymore. As such, convert `odb_source_loose_cache()` to have file scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 29 +++++++++++++++++++++++++++-- object-file.h | 7 ------- object-name.c | 10 ++++++---- odb.h | 7 +++++++ 4 files changed, 40 insertions(+), 13 deletions(-) diff --git a/object-file.c b/object-file.c index 56cbb27ab908a8..13732f324f5bcc 100644 --- a/object-file.c +++ b/object-file.c @@ -33,6 +33,9 @@ /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 +static struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid); + static int get_conv_flags(unsigned flags) { if (flags & INDEX_RENORMALIZE) @@ -1845,6 +1848,23 @@ static int for_each_object_wrapper_cb(const struct object_id *oid, } } +static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid, + void *cb_data) +{ + struct for_each_object_wrapper_data *data = cb_data; + if (data->request) { + struct object_info oi = *data->request; + + if (odb_source_loose_read_object_info(data->source, + oid, &oi, 0) < 0) + return -1; + + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + int odb_source_loose_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, @@ -1864,6 +1884,11 @@ int odb_source_loose_for_each_object(struct odb_source *source, if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) return 0; + if (opts->prefix) + return oidtree_each(odb_source_loose_cache(source, opts->prefix), + opts->prefix, opts->prefix_hex_len, + for_each_prefixed_object_wrapper_cb, &data); + return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, NULL, NULL, &data); } @@ -1935,8 +1960,8 @@ static int append_loose_object(const struct object_id *oid, return 0; } -struct oidtree *odb_source_loose_cache(struct odb_source *source, - const struct object_id *oid) +static struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid) { struct odb_source_files *files = odb_source_files_downcast(source); int subdir_nr = oid->hash[0]; diff --git a/object-file.h b/object-file.h index 46dfa7b632b334..f11ad58f6c0874 100644 --- a/object-file.h +++ b/object-file.h @@ -74,13 +74,6 @@ int odb_source_loose_write_stream(struct odb_source *source, struct odb_write_stream *stream, size_t len, struct object_id *oid); -/* - * Populate and return the loose object cache array corresponding to the - * given object ID. - */ -struct oidtree *odb_source_loose_cache(struct odb_source *source, - const struct object_id *oid); - /* * Put in `buf` the name of the file in the local object database that * would be used to store a loose object with the specified oid. diff --git a/object-name.c b/object-name.c index a24a1b48e12ae9..929a68dbd0d32b 100644 --- a/object-name.c +++ b/object-name.c @@ -16,7 +16,6 @@ #include "remote.h" #include "dir.h" #include "oid-array.h" -#include "oidtree.h" #include "packfile.h" #include "pretty.h" #include "object-file.h" @@ -103,7 +102,7 @@ static void update_candidates(struct disambiguate_state *ds, const struct object static int match_hash(unsigned, const unsigned char *, const unsigned char *); -static int match_prefix(const struct object_id *oid, void *arg) +static int match_prefix(const struct object_id *oid, struct object_info *oi UNUSED, void *arg) { struct disambiguate_state *ds = arg; /* no need to call match_hash, oidtree_each did prefix match */ @@ -113,11 +112,14 @@ static int match_prefix(const struct object_id *oid, void *arg) static void find_short_object_filename(struct disambiguate_state *ds) { + struct odb_for_each_object_options opts = { + .prefix = &ds->bin_pfx, + .prefix_hex_len = ds->len, + }; struct odb_source *source; for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - oidtree_each(odb_source_loose_cache(source, &ds->bin_pfx), - &ds->bin_pfx, ds->len, match_prefix, ds); + odb_source_loose_for_each_object(source, NULL, match_prefix, ds, &opts); } static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) diff --git a/odb.h b/odb.h index a19a8bb50d86e7..e80fd8f7abd534 100644 --- a/odb.h +++ b/odb.h @@ -488,6 +488,13 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, struct odb_for_each_object_options { /* A bitfield of `odb_for_each_object_flags`. */ enum odb_for_each_object_flags flags; + + /* + * If set, only iterate through objects whose first `prefix_hex_len` + * hex characters matches the given prefix. + */ + const struct object_id *prefix; + size_t prefix_hex_len; }; /* From e30bff8f8402f0f147a08fe00b75e24f293fa870 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:31 +0100 Subject: [PATCH 07/34] object-name: move logic to iterate through packed prefixed objects Similar to the preceding commit, move the logic to iterate through objects that have a given prefix into "packfile.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 94 ++------------------------- packfile.c | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+), 87 deletions(-) diff --git a/object-name.c b/object-name.c index 929a68dbd0d32b..ff0de06ff9bd87 100644 --- a/object-name.c +++ b/object-name.c @@ -100,8 +100,6 @@ static void update_candidates(struct disambiguate_state *ds, const struct object /* otherwise, current can be discarded and candidate is still good */ } -static int match_hash(unsigned, const unsigned char *, const unsigned char *); - static int match_prefix(const struct object_id *oid, struct object_info *oi UNUSED, void *arg) { struct disambiguate_state *ds = arg; @@ -122,103 +120,25 @@ static void find_short_object_filename(struct disambiguate_state *ds) odb_source_loose_for_each_object(source, NULL, match_prefix, ds, &opts); } -static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) -{ - do { - if (*a != *b) - return 0; - a++; - b++; - len -= 2; - } while (len > 1); - if (len) - if ((*a ^ *b) & 0xf0) - return 0; - return 1; -} - -static void unique_in_midx(struct multi_pack_index *m, - struct disambiguate_state *ds) -{ - for (; m; m = m->base_midx) { - uint32_t num, i, first = 0; - const struct object_id *current = NULL; - int len = ds->len > ds->repo->hash_algo->hexsz ? - ds->repo->hash_algo->hexsz : ds->len; - - if (!m->num_objects) - continue; - - num = m->num_objects + m->num_objects_in_base; - - bsearch_one_midx(&ds->bin_pfx, m, &first); - - /* - * At this point, "first" is the location of the lowest - * object with an object name that could match - * "bin_pfx". See if we have 0, 1 or more objects that - * actually match(es). - */ - for (i = first; i < num && !ds->ambiguous; i++) { - struct object_id oid; - current = nth_midxed_object_oid(&oid, m, i); - if (!match_hash(len, ds->bin_pfx.hash, current->hash)) - break; - update_candidates(ds, current); - } - } -} - -static void unique_in_pack(struct packed_git *p, - struct disambiguate_state *ds) -{ - uint32_t num, i, first = 0; - int len = ds->len > ds->repo->hash_algo->hexsz ? - ds->repo->hash_algo->hexsz : ds->len; - - if (p->multi_pack_index) - return; - - if (open_pack_index(p) || !p->num_objects) - return; - - num = p->num_objects; - bsearch_pack(&ds->bin_pfx, p, &first); - - /* - * At this point, "first" is the location of the lowest object - * with an object name that could match "bin_pfx". See if we have - * 0, 1 or more objects that actually match(es). - */ - for (i = first; i < num && !ds->ambiguous; i++) { - struct object_id oid; - nth_packed_object_id(&oid, p, i); - if (!match_hash(len, ds->bin_pfx.hash, oid.hash)) - break; - update_candidates(ds, &oid); - } -} - static void find_short_packed_object(struct disambiguate_state *ds) { + struct odb_for_each_object_options opts = { + .prefix = &ds->bin_pfx, + .prefix_hex_len = ds->len, + }; struct odb_source *source; - struct packed_git *p; /* Skip, unless oids from the storage hash algorithm are wanted */ if (ds->bin_pfx.algo && (&hash_algos[ds->bin_pfx.algo] != ds->repo->hash_algo)) return; odb_prepare_alternates(ds->repo->objects); - for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - unique_in_midx(m, ds); - } + for (source = ds->repo->objects->sources; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); - repo_for_each_pack(ds->repo, p) { + packfile_store_for_each_object(files->packed, NULL, match_prefix, ds, &opts); if (ds->ambiguous) break; - unique_in_pack(p, ds); } } diff --git a/packfile.c b/packfile.c index a6f3d2035d44fd..2539a371c13e9a 100644 --- a/packfile.c +++ b/packfile.c @@ -2371,6 +2371,177 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid, } } +static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) +{ + do { + if (*a != *b) + return 0; + a++; + b++; + len -= 2; + } while (len > 1); + if (len) + if ((*a ^ *b) & 0xf0) + return 0; + return 1; +} + +static int for_each_prefixed_object_in_midx( + struct packfile_store *store, + struct multi_pack_index *m, + const struct odb_for_each_object_options *opts, + struct packfile_store_for_each_object_wrapper_data *data) +{ + int ret; + + for (; m; m = m->base_midx) { + uint32_t num, i, first = 0; + int len = opts->prefix_hex_len > m->source->odb->repo->hash_algo->hexsz ? + m->source->odb->repo->hash_algo->hexsz : opts->prefix_hex_len; + + if (!m->num_objects) + continue; + + num = m->num_objects + m->num_objects_in_base; + + bsearch_one_midx(opts->prefix, m, &first); + + /* + * At this point, "first" is the location of the lowest + * object with an object name that could match "opts->prefix". + * See if we have 0, 1 or more objects that actually match(es). + */ + for (i = first; i < num; i++) { + const struct object_id *current = NULL; + struct object_id oid; + + current = nth_midxed_object_oid(&oid, m, i); + + if (!match_hash(len, opts->prefix->hash, current->hash)) + break; + + if (data->request) { + struct object_info oi = *data->request; + + ret = packfile_store_read_object_info(store, current, + &oi, 0); + if (ret) + goto out; + + ret = data->cb(&oid, &oi, data->cb_data); + if (ret) + goto out; + } else { + ret = data->cb(&oid, NULL, data->cb_data); + if (ret) + goto out; + } + } + } + + ret = 0; + +out: + return ret; +} + +static int for_each_prefixed_object_in_pack( + struct packfile_store *store, + struct packed_git *p, + const struct odb_for_each_object_options *opts, + struct packfile_store_for_each_object_wrapper_data *data) +{ + uint32_t num, i, first = 0; + int len = opts->prefix_hex_len > p->repo->hash_algo->hexsz ? + p->repo->hash_algo->hexsz : opts->prefix_hex_len; + int ret; + + num = p->num_objects; + bsearch_pack(opts->prefix, p, &first); + + /* + * At this point, "first" is the location of the lowest object + * with an object name that could match "bin_pfx". See if we have + * 0, 1 or more objects that actually match(es). + */ + for (i = first; i < num; i++) { + struct object_id oid; + + nth_packed_object_id(&oid, p, i); + if (!match_hash(len, opts->prefix->hash, oid.hash)) + break; + + if (data->request) { + struct object_info oi = *data->request; + + ret = packfile_store_read_object_info(store, &oid, &oi, 0); + if (ret) + goto out; + + ret = data->cb(&oid, &oi, data->cb_data); + if (ret) + goto out; + } else { + ret = data->cb(&oid, NULL, data->cb_data); + if (ret) + goto out; + } + } + + ret = 0; + +out: + return ret; +} + +static int packfile_store_for_each_prefixed_object( + struct packfile_store *store, + const struct odb_for_each_object_options *opts, + struct packfile_store_for_each_object_wrapper_data *data) +{ + struct packfile_list_entry *e; + struct multi_pack_index *m; + bool pack_errors = false; + int ret; + + if (opts->flags) + BUG("flags unsupported"); + + store->skip_mru_updates = true; + + m = get_multi_pack_index(store->source); + if (m) { + ret = for_each_prefixed_object_in_midx(store, m, opts, data); + if (ret) + goto out; + } + + for (e = packfile_store_get_packs(store); e; e = e->next) { + if (e->pack->multi_pack_index) + continue; + + if (open_pack_index(e->pack)) { + pack_errors = true; + continue; + } + + if (!e->pack->num_objects) + continue; + + ret = for_each_prefixed_object_in_pack(store, e->pack, opts, data); + if (ret) + goto out; + } + + ret = 0; + +out: + store->skip_mru_updates = false; + if (!ret && pack_errors) + ret = -1; + return ret; +} + int packfile_store_for_each_object(struct packfile_store *store, const struct object_info *request, odb_for_each_object_cb cb, @@ -2386,6 +2557,9 @@ int packfile_store_for_each_object(struct packfile_store *store, struct packfile_list_entry *e; int pack_errors = 0, ret; + if (opts->prefix) + return packfile_store_for_each_prefixed_object(store, opts, &data); + store->skip_mru_updates = true; for (e = packfile_store_get_packs(store); e; e = e->next) { From 28c9254e3b3e1304b5a6c36146cf6fec29f3f5d3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:32 +0100 Subject: [PATCH 08/34] object-name: extract function to parse object ID prefixes Extract the logic that parses an object ID prefix into a new function. This function will be used by a second callsite in a subsequent commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 60 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/object-name.c b/object-name.c index ff0de06ff9bd87..fd1b010ab3b35c 100644 --- a/object-name.c +++ b/object-name.c @@ -270,41 +270,57 @@ int set_disambiguate_hint_config(const char *var, const char *value) return error("unknown hint type for '%s': %s", var, value); } +static int parse_oid_prefix(const char *name, int len, + const struct git_hash_algo *algo, + char *hex_out, + struct object_id *oid_out) +{ + for (int i = 0; i < len; i++) { + unsigned char c = name[i]; + unsigned char val; + if (c >= '0' && c <= '9') { + val = c - '0'; + } else if (c >= 'a' && c <= 'f') { + val = c - 'a' + 10; + } else if (c >= 'A' && c <='F') { + val = c - 'A' + 10; + c -= 'A' - 'a'; + } else { + return -1; + } + + if (hex_out) + hex_out[i] = c; + if (oid_out) { + if (!(i & 1)) + val <<= 4; + oid_out->hash[i >> 1] |= val; + } + } + + if (hex_out) + hex_out[len] = '\0'; + if (oid_out) + oid_out->algo = algo ? hash_algo_by_ptr(algo) : GIT_HASH_UNKNOWN; + + return 0; +} + static int init_object_disambiguation(struct repository *r, const char *name, int len, const struct git_hash_algo *algo, struct disambiguate_state *ds) { - int i; - if (len < MINIMUM_ABBREV || len > GIT_MAX_HEXSZ) return -1; memset(ds, 0, sizeof(*ds)); - for (i = 0; i < len ;i++) { - unsigned char c = name[i]; - unsigned char val; - if (c >= '0' && c <= '9') - val = c - '0'; - else if (c >= 'a' && c <= 'f') - val = c - 'a' + 10; - else if (c >= 'A' && c <='F') { - val = c - 'A' + 10; - c -= 'A' - 'a'; - } - else - return -1; - ds->hex_pfx[i] = c; - if (!(i & 1)) - val <<= 4; - ds->bin_pfx.hash[i >> 1] |= val; - } + if (parse_oid_prefix(name, len, algo, ds->hex_pfx, &ds->bin_pfx) < 0) + return -1; ds->len = len; - ds->hex_pfx[len] = '\0'; ds->repo = r; - ds->bin_pfx.algo = algo ? hash_algo_by_ptr(algo) : GIT_HASH_UNKNOWN; odb_prepare_alternates(r->objects); return 0; } From d2612fe59e605102cb422fadbb0cb8bea499daee Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:33 +0100 Subject: [PATCH 09/34] object-name: backend-generic `repo_collect_ambiguous()` The function `repo_collect_ambiguous()` is responsible for collecting objects whose IDs match a specific prefix. The information is then used to inform the user about which objects they could have meant in case a short object ID is ambiguous. The logic to do this uses the object disambiguation infrastructure and calls into backend-specific functions to iterate through loose and packed objects. This isn't really required anymore though: all we want to do is to enumerate objects that have such a prefix and then append those objects to a `struct oid_array`. This can be trivially achieved in a generic way now that `odb_for_each_object()` has learned to yield only objects that match such a prefix. Refactor the code to use the backend-generic infrastructure instead. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/object-name.c b/object-name.c index fd1b010ab3b35c..4c3ace150ecdc2 100644 --- a/object-name.c +++ b/object-name.c @@ -448,8 +448,8 @@ static int collect_ambiguous(const struct object_id *oid, void *data) return 0; } -static int repo_collect_ambiguous(struct repository *r UNUSED, - const struct object_id *oid, +static int repo_collect_ambiguous(const struct object_id *oid, + struct object_info *oi UNUSED, void *data) { return collect_ambiguous(oid, data); @@ -586,18 +586,19 @@ int repo_for_each_abbrev(struct repository *r, const char *prefix, const struct git_hash_algo *algo, each_abbrev_fn fn, void *cb_data) { + struct object_id prefix_oid = { 0 }; + struct odb_for_each_object_options opts = { + .prefix = &prefix_oid, + .prefix_hex_len = strlen(prefix), + }; struct oid_array collect = OID_ARRAY_INIT; - struct disambiguate_state ds; int ret; - if (init_object_disambiguation(r, prefix, strlen(prefix), algo, &ds) < 0) + if (parse_oid_prefix(prefix, opts.prefix_hex_len, algo, NULL, &prefix_oid) < 0) return -1; - ds.always_call_fn = 1; - ds.fn = repo_collect_ambiguous; - ds.cb_data = &collect; - find_short_object_filename(&ds); - find_short_packed_object(&ds); + if (odb_for_each_object_ext(r->objects, NULL, repo_collect_ambiguous, &collect, &opts) < 0) + return -1; ret = oid_array_for_each_unique(&collect, fn, cb_data); oid_array_clear(&collect); From eac58debd9f61391a61b832e3cee349a20bd2c4a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:34 +0100 Subject: [PATCH 10/34] object-name: backend-generic `get_short_oid()` The function `get_short_oid()` takes as input an abbreviated object ID and tries to turn that object ID into the full object ID. This is done by iterating through all objects that have the user-provided prefix. If that yields exactly one object we know that the abbreviated object ID is unambiguous, otherwise it is ambiguous and we print the list of objects that match the prefix. We iterate through all objects with the given prefix by calling both `find_short_packed_object()` and `find_short_object_filename()`, which is of course specific to the "files" backend. But we now have a generic way to iterate through objects with a specific prefix. Refactor the code to use `odb_for_each_object()` instead so that it works with object backends different than the "files" backend. Remove the now-unused `find_short_packed_object()` function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/object-name.c b/object-name.c index 4c3ace150ecdc2..7a224ab4afc0c6 100644 --- a/object-name.c +++ b/object-name.c @@ -120,28 +120,6 @@ static void find_short_object_filename(struct disambiguate_state *ds) odb_source_loose_for_each_object(source, NULL, match_prefix, ds, &opts); } -static void find_short_packed_object(struct disambiguate_state *ds) -{ - struct odb_for_each_object_options opts = { - .prefix = &ds->bin_pfx, - .prefix_hex_len = ds->len, - }; - struct odb_source *source; - - /* Skip, unless oids from the storage hash algorithm are wanted */ - if (ds->bin_pfx.algo && (&hash_algos[ds->bin_pfx.algo] != ds->repo->hash_algo)) - return; - - odb_prepare_alternates(ds->repo->objects); - for (source = ds->repo->objects->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - - packfile_store_for_each_object(files->packed, NULL, match_prefix, ds, &opts); - if (ds->ambiguous) - break; - } -} - static int finish_object_disambiguation(struct disambiguate_state *ds, struct object_id *oid) { @@ -499,6 +477,7 @@ static enum get_oid_result get_short_oid(struct repository *r, struct object_id *oid, unsigned flags) { + struct odb_for_each_object_options opts = { 0 }; int status; struct disambiguate_state ds; int quietly = !!(flags & GET_OID_QUIETLY); @@ -526,8 +505,10 @@ static enum get_oid_result get_short_oid(struct repository *r, else ds.fn = default_disambiguate_hint; - find_short_object_filename(&ds); - find_short_packed_object(&ds); + opts.prefix = &ds.bin_pfx; + opts.prefix_hex_len = ds.len; + + odb_for_each_object_ext(r->objects, NULL, match_prefix, &ds, &opts); status = finish_object_disambiguation(&ds, oid); /* @@ -537,8 +518,7 @@ static enum get_oid_result get_short_oid(struct repository *r, */ if (status == MISSING_OBJECT) { odb_reprepare(r->objects); - find_short_object_filename(&ds); - find_short_packed_object(&ds); + odb_for_each_object_ext(r->objects, NULL, match_prefix, &ds, &opts); status = finish_object_disambiguation(&ds, oid); } From e9b7caa1b14bc1fe825b216941a0655d6afdffe5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:35 +0100 Subject: [PATCH 11/34] object-name: merge `update_candidates()` and `match_prefix()` There's only a single callsite for `match_prefix()`, and that function is a rather trivial wrapper of `update_candidates()`. Merge these two functions into a single `update_disambiguate_state()` function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/object-name.c b/object-name.c index 7a224ab4afc0c6..f55a332032fc20 100644 --- a/object-name.c +++ b/object-name.c @@ -51,27 +51,31 @@ struct disambiguate_state { unsigned always_call_fn:1; }; -static void update_candidates(struct disambiguate_state *ds, const struct object_id *current) +static int update_disambiguate_state(const struct object_id *current, + struct object_info *oi UNUSED, + void *cb_data) { + struct disambiguate_state *ds = cb_data; + /* The hash algorithm of current has already been filtered */ if (ds->always_call_fn) { ds->ambiguous = ds->fn(ds->repo, current, ds->cb_data) ? 1 : 0; - return; + return ds->ambiguous; } if (!ds->candidate_exists) { /* this is the first candidate */ oidcpy(&ds->candidate, current); ds->candidate_exists = 1; - return; + return 0; } else if (oideq(&ds->candidate, current)) { /* the same as what we already have seen */ - return; + return 0; } if (!ds->fn) { /* cannot disambiguate between ds->candidate and current */ ds->ambiguous = 1; - return; + return ds->ambiguous; } if (!ds->candidate_checked) { @@ -84,7 +88,7 @@ static void update_candidates(struct disambiguate_state *ds, const struct object /* discard the candidate; we know it does not satisfy fn */ oidcpy(&ds->candidate, current); ds->candidate_checked = 0; - return; + return 0; } /* if we reach this point, we know ds->candidate satisfies fn */ @@ -95,17 +99,12 @@ static void update_candidates(struct disambiguate_state *ds, const struct object */ ds->candidate_ok = 0; ds->ambiguous = 1; + return ds->ambiguous; } /* otherwise, current can be discarded and candidate is still good */ -} -static int match_prefix(const struct object_id *oid, struct object_info *oi UNUSED, void *arg) -{ - struct disambiguate_state *ds = arg; - /* no need to call match_hash, oidtree_each did prefix match */ - update_candidates(ds, oid); - return ds->ambiguous; + return 0; } static void find_short_object_filename(struct disambiguate_state *ds) @@ -117,7 +116,8 @@ static void find_short_object_filename(struct disambiguate_state *ds) struct odb_source *source; for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - odb_source_loose_for_each_object(source, NULL, match_prefix, ds, &opts); + odb_source_loose_for_each_object(source, NULL, update_disambiguate_state, + ds, &opts); } static int finish_object_disambiguation(struct disambiguate_state *ds, @@ -508,7 +508,8 @@ static enum get_oid_result get_short_oid(struct repository *r, opts.prefix = &ds.bin_pfx; opts.prefix_hex_len = ds.len; - odb_for_each_object_ext(r->objects, NULL, match_prefix, &ds, &opts); + odb_for_each_object_ext(r->objects, NULL, update_disambiguate_state, + &ds, &opts); status = finish_object_disambiguation(&ds, oid); /* @@ -518,7 +519,8 @@ static enum get_oid_result get_short_oid(struct repository *r, */ if (status == MISSING_OBJECT) { odb_reprepare(r->objects); - odb_for_each_object_ext(r->objects, NULL, match_prefix, &ds, &opts); + odb_for_each_object_ext(r->objects, NULL, update_disambiguate_state, + &ds, &opts); status = finish_object_disambiguation(&ds, oid); } From 67f47eab61c3a2c14f2d0351c3844f12fbd95dd2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:36 +0100 Subject: [PATCH 12/34] object-name: abbreviate loose object names without `disambiguate_state` The function `find_short_object_filename()` takes an object ID and computes the minimum required object name length to make it unique. This is done by reusing the object disambiguation infrastructure, where we iterate through every loose object and then update the disambiguate state one by one. Ultimately, we don't care about the disambiguate state though. It is used because this infrastructure knows how to enumerate only those objects that match a given prefix. But now that we have extended the `odb_for_each_object()` function to do this for us we have an easier way to do this. Consequently, we really only use the disambiguate state now to propagate `struct min_abbrev_data`. Refactor the code and drop this indirection so that we use `struct min_abbrev_data` directly. This also allows us to drop some now-unused logic from the disambiguate infrastructure. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 54 +++++++++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/object-name.c b/object-name.c index f55a332032fc20..d82fb49f39de44 100644 --- a/object-name.c +++ b/object-name.c @@ -48,7 +48,6 @@ struct disambiguate_state { unsigned candidate_ok:1; unsigned disambiguate_fn_used:1; unsigned ambiguous:1; - unsigned always_call_fn:1; }; static int update_disambiguate_state(const struct object_id *current, @@ -58,10 +57,6 @@ static int update_disambiguate_state(const struct object_id *current, struct disambiguate_state *ds = cb_data; /* The hash algorithm of current has already been filtered */ - if (ds->always_call_fn) { - ds->ambiguous = ds->fn(ds->repo, current, ds->cb_data) ? 1 : 0; - return ds->ambiguous; - } if (!ds->candidate_exists) { /* this is the first candidate */ oidcpy(&ds->candidate, current); @@ -107,19 +102,6 @@ static int update_disambiguate_state(const struct object_id *current, return 0; } -static void find_short_object_filename(struct disambiguate_state *ds) -{ - struct odb_for_each_object_options opts = { - .prefix = &ds->bin_pfx, - .prefix_hex_len = ds->len, - }; - struct odb_source *source; - - for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - odb_source_loose_for_each_object(source, NULL, update_disambiguate_state, - ds, &opts); -} - static int finish_object_disambiguation(struct disambiguate_state *ds, struct object_id *oid) { @@ -632,11 +614,26 @@ static int extend_abbrev_len(const struct object_id *oid, return 0; } -static int repo_extend_abbrev_len(struct repository *r UNUSED, - const struct object_id *oid, - void *cb_data) +static int extend_abbrev_len_loose(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data) { - return extend_abbrev_len(oid, cb_data); + struct min_abbrev_data *data = cb_data; + extend_abbrev_len(oid, data); + return 0; +} + +static void find_abbrev_len_loose(struct min_abbrev_data *mad) +{ + struct odb_for_each_object_options opts = { + .prefix = mad->oid, + .prefix_hex_len = mad->cur_len, + }; + struct odb_source *source; + + for (source = mad->repo->objects->sources; source; source = source->next) + odb_source_loose_for_each_object(source, NULL, extend_abbrev_len_loose, + mad, &opts); } static void find_abbrev_len_for_midx(struct multi_pack_index *m, @@ -752,9 +749,7 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, { const struct git_hash_algo *algo = oid->algo ? &hash_algos[oid->algo] : r->hash_algo; - struct disambiguate_state ds; struct min_abbrev_data mad; - struct object_id oid_ret; const unsigned hexsz = algo->hexsz; if (len < 0) { @@ -794,16 +789,7 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, mad.oid = oid; find_abbrev_len_packed(&mad); - - if (init_object_disambiguation(r, hex, mad.cur_len, algo, &ds) < 0) - return -1; - - ds.fn = repo_extend_abbrev_len; - ds.always_call_fn = 1; - ds.cb_data = (void *)&mad; - - find_short_object_filename(&ds); - (void)finish_object_disambiguation(&ds, &oid_ret); + find_abbrev_len_loose(&mad); hex[mad.cur_len] = 0; return mad.cur_len; From 1a2842d1b1e91d5e068d231cf4df4e783bdf9205 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:37 +0100 Subject: [PATCH 13/34] object-name: simplify computing common prefixes The function `extend_abbrev_len()` computes the length of common hex characters between two object IDs. This is done by: - Making the caller provide the `hex` string for the needle object ID. - Comparing every hex position of the haystack object ID with `get_hex_char_from_oid()`. Turning the binary representation into hex first is roundabout though: we can simply compare the binary representation and give some special attention to the final nibble. Introduce a new function `oid_common_prefix_hexlen()` that does exactly this and refactor the code to use the new function. This allows us to drop the `struct min_abbrev_data::hex` field. Furthermore, this function will be used in by some other callsites in subsequent commits. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- hash.c | 18 ++++++++++++++++++ hash.h | 3 +++ object-name.c | 23 +++-------------------- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/hash.c b/hash.c index 553f2008eaab3e..e925b9754e04fc 100644 --- a/hash.c +++ b/hash.c @@ -317,3 +317,21 @@ const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop) /* Otherwise use the default one. */ return algop; } + +unsigned oid_common_prefix_hexlen(const struct object_id *a, + const struct object_id *b) +{ + unsigned rawsz = hash_algos[a->algo].rawsz; + + for (unsigned i = 0; i < rawsz; i++) { + if (a->hash[i] == b->hash[i]) + continue; + + if ((a->hash[i] ^ b->hash[i]) & 0xf0) + return i * 2; + else + return i * 2 + 1; + } + + return rawsz * 2; +} diff --git a/hash.h b/hash.h index d51efce1d3c792..c082a53c9ac93d 100644 --- a/hash.h +++ b/hash.h @@ -396,6 +396,9 @@ static inline int oideq(const struct object_id *oid1, const struct object_id *oi return !memcmp(oid1->hash, oid2->hash, GIT_MAX_RAWSZ); } +unsigned oid_common_prefix_hexlen(const struct object_id *a, + const struct object_id *b); + static inline void oidcpy(struct object_id *dst, const struct object_id *src) { memcpy(dst->hash, src->hash, GIT_MAX_RAWSZ); diff --git a/object-name.c b/object-name.c index d82fb49f39de44..32e9c23e402ab9 100644 --- a/object-name.c +++ b/object-name.c @@ -585,32 +585,16 @@ static unsigned msb(unsigned long val) struct min_abbrev_data { unsigned int init_len; unsigned int cur_len; - char *hex; struct repository *repo; const struct object_id *oid; }; -static inline char get_hex_char_from_oid(const struct object_id *oid, - unsigned int pos) -{ - static const char hex[] = "0123456789abcdef"; - - if ((pos & 1) == 0) - return hex[oid->hash[pos >> 1] >> 4]; - else - return hex[oid->hash[pos >> 1] & 0xf]; -} - static int extend_abbrev_len(const struct object_id *oid, struct min_abbrev_data *mad) { - unsigned int i = mad->init_len; - while (mad->hex[i] && mad->hex[i] == get_hex_char_from_oid(oid, i)) - i++; - - if (mad->hex[i] && i >= mad->cur_len) - mad->cur_len = i + 1; - + unsigned len = oid_common_prefix_hexlen(oid, mad->oid); + if (len != hash_algos[oid->algo].hexsz && len >= mad->cur_len) + mad->cur_len = len + 1; return 0; } @@ -785,7 +769,6 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, mad.repo = r; mad.init_len = len; mad.cur_len = len; - mad.hex = hex; mad.oid = oid; find_abbrev_len_packed(&mad); From ab3ab1038dd38d2be62e3bacf39a3248929a7a98 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:38 +0100 Subject: [PATCH 14/34] object-name: move logic to compute loose abbreviation length The function `repo_find_unique_abbrev_r()` takes as input an object ID as well as a minimum object ID length and returns the minimum required prefix to make the object ID unique. The logic that computes the abbreviation length for loose objects is deeply tied to the loose object storage format. As such, it would fail in case a different object storage format was used. Prepare for making this logic generic to the backend by moving the logic into a new `odb_source_loose_find_abbrev_len()` function that is part of "object-file.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 38 ++++++++++++++++++++++++++++++++++++++ object-file.h | 12 ++++++++++++ object-name.c | 27 ++++----------------------- 3 files changed, 54 insertions(+), 23 deletions(-) diff --git a/object-file.c b/object-file.c index 13732f324f5bcc..4f77ce0982625d 100644 --- a/object-file.c +++ b/object-file.c @@ -1952,6 +1952,44 @@ int odb_source_loose_count_objects(struct odb_source *source, return ret; } +struct find_abbrev_len_data { + const struct object_id *oid; + unsigned len; +}; + +static int find_abbrev_len_cb(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data) +{ + struct find_abbrev_len_data *data = cb_data; + unsigned len = oid_common_prefix_hexlen(oid, data->oid); + if (len != hash_algos[oid->algo].hexsz && len >= data->len) + data->len = len + 1; + return 0; +} + +int odb_source_loose_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_for_each_object_options opts = { + .prefix = oid, + .prefix_hex_len = min_len, + }; + struct find_abbrev_len_data data = { + .oid = oid, + .len = min_len, + }; + int ret; + + ret = odb_source_loose_for_each_object(source, NULL, find_abbrev_len_cb, + &data, &opts); + *out = data.len; + + return ret; +} + static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) diff --git a/object-file.h b/object-file.h index f11ad58f6c0874..3686f182e4deb0 100644 --- a/object-file.h +++ b/object-file.h @@ -146,6 +146,18 @@ int odb_source_loose_count_objects(struct odb_source *source, enum odb_count_objects_flags flags, unsigned long *out); +/* + * Find the shortest unique prefix for the given object ID, where `min_len` is + * the minimum length that the prefix should have. + * + * Returns 0 on success, in which case the computed length will be written to + * `out`. Otherwise, a negative error code is returned. + */ +int odb_source_loose_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out); + /** * format_object_header() is a thin wrapper around s xsnprintf() that * writes the initial " " part of the loose object diff --git a/object-name.c b/object-name.c index 32e9c23e402ab9..4e21dbfa97bf1c 100644 --- a/object-name.c +++ b/object-name.c @@ -598,28 +598,6 @@ static int extend_abbrev_len(const struct object_id *oid, return 0; } -static int extend_abbrev_len_loose(const struct object_id *oid, - struct object_info *oi UNUSED, - void *cb_data) -{ - struct min_abbrev_data *data = cb_data; - extend_abbrev_len(oid, data); - return 0; -} - -static void find_abbrev_len_loose(struct min_abbrev_data *mad) -{ - struct odb_for_each_object_options opts = { - .prefix = mad->oid, - .prefix_hex_len = mad->cur_len, - }; - struct odb_source *source; - - for (source = mad->repo->objects->sources; source; source = source->next) - odb_source_loose_for_each_object(source, NULL, extend_abbrev_len_loose, - mad, &opts); -} - static void find_abbrev_len_for_midx(struct multi_pack_index *m, struct min_abbrev_data *mad) { @@ -772,7 +750,10 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, mad.oid = oid; find_abbrev_len_packed(&mad); - find_abbrev_len_loose(&mad); + + odb_prepare_alternates(r->objects); + for (struct odb_source *s = r->objects->sources; s; s = s->next) + odb_source_loose_find_abbrev_len(s, mad.oid, mad.cur_len, &mad.cur_len); hex[mad.cur_len] = 0; return mad.cur_len; From 6c2ede6e4abed754bb5891c2904212c05efcfb11 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:39 +0100 Subject: [PATCH 15/34] object-file: move logic to compute packed abbreviation length Same as the preceding commit, move the logic that computes the minimum required prefix length to make a given object ID unique for the packfile store into a new function `packfile_store_find_abbrev_len()` that is part of "packfile.c". This prepares for making the logic fully generic via pluggable object databases. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 135 +++++--------------------------------------------- packfile.c | 111 +++++++++++++++++++++++++++++++++++++++++ packfile.h | 5 ++ 3 files changed, 128 insertions(+), 123 deletions(-) diff --git a/object-name.c b/object-name.c index 4e21dbfa97bf1c..bb2294a1932a8d 100644 --- a/object-name.c +++ b/object-name.c @@ -582,115 +582,6 @@ static unsigned msb(unsigned long val) return r; } -struct min_abbrev_data { - unsigned int init_len; - unsigned int cur_len; - struct repository *repo; - const struct object_id *oid; -}; - -static int extend_abbrev_len(const struct object_id *oid, - struct min_abbrev_data *mad) -{ - unsigned len = oid_common_prefix_hexlen(oid, mad->oid); - if (len != hash_algos[oid->algo].hexsz && len >= mad->cur_len) - mad->cur_len = len + 1; - return 0; -} - -static void find_abbrev_len_for_midx(struct multi_pack_index *m, - struct min_abbrev_data *mad) -{ - for (; m; m = m->base_midx) { - int match = 0; - uint32_t num, first = 0; - struct object_id oid; - const struct object_id *mad_oid; - - if (!m->num_objects) - continue; - - num = m->num_objects + m->num_objects_in_base; - mad_oid = mad->oid; - match = bsearch_one_midx(mad_oid, m, &first); - - /* - * first is now the position in the packfile where we - * would insert mad->hash if it does not exist (or the - * position of mad->hash if it does exist). Hence, we - * consider a maximum of two objects nearby for the - * abbreviation length. - */ - mad->init_len = 0; - if (!match) { - if (nth_midxed_object_oid(&oid, m, first)) - extend_abbrev_len(&oid, mad); - } else if (first < num - 1) { - if (nth_midxed_object_oid(&oid, m, first + 1)) - extend_abbrev_len(&oid, mad); - } - if (first > 0) { - if (nth_midxed_object_oid(&oid, m, first - 1)) - extend_abbrev_len(&oid, mad); - } - mad->init_len = mad->cur_len; - } -} - -static void find_abbrev_len_for_pack(struct packed_git *p, - struct min_abbrev_data *mad) -{ - int match = 0; - uint32_t num, first = 0; - struct object_id oid; - const struct object_id *mad_oid; - - if (p->multi_pack_index) - return; - - if (open_pack_index(p) || !p->num_objects) - return; - - num = p->num_objects; - mad_oid = mad->oid; - match = bsearch_pack(mad_oid, p, &first); - - /* - * first is now the position in the packfile where we would insert - * mad->hash if it does not exist (or the position of mad->hash if - * it does exist). Hence, we consider a maximum of two objects - * nearby for the abbreviation length. - */ - mad->init_len = 0; - if (!match) { - if (!nth_packed_object_id(&oid, p, first)) - extend_abbrev_len(&oid, mad); - } else if (first < num - 1) { - if (!nth_packed_object_id(&oid, p, first + 1)) - extend_abbrev_len(&oid, mad); - } - if (first > 0) { - if (!nth_packed_object_id(&oid, p, first - 1)) - extend_abbrev_len(&oid, mad); - } - mad->init_len = mad->cur_len; -} - -static void find_abbrev_len_packed(struct min_abbrev_data *mad) -{ - struct packed_git *p; - - odb_prepare_alternates(mad->repo->objects); - for (struct odb_source *source = mad->repo->objects->sources; source; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - find_abbrev_len_for_midx(m, mad); - } - - repo_for_each_pack(mad->repo, p) - find_abbrev_len_for_pack(p, mad); -} - void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo, const struct object_id *oid, int abbrev_len) { @@ -707,14 +598,14 @@ void strbuf_add_unique_abbrev(struct strbuf *sb, const struct object_id *oid, } int repo_find_unique_abbrev_r(struct repository *r, char *hex, - const struct object_id *oid, int len) + const struct object_id *oid, int min_len) { const struct git_hash_algo *algo = oid->algo ? &hash_algos[oid->algo] : r->hash_algo; - struct min_abbrev_data mad; const unsigned hexsz = algo->hexsz; + unsigned len; - if (len < 0) { + if (min_len < 0) { unsigned long count; if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) @@ -738,25 +629,23 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, */ if (len < FALLBACK_DEFAULT_ABBREV) len = FALLBACK_DEFAULT_ABBREV; + } else { + len = min_len; } oid_to_hex_r(hex, oid); if (len >= hexsz || !len) return hexsz; - mad.repo = r; - mad.init_len = len; - mad.cur_len = len; - mad.oid = oid; - - find_abbrev_len_packed(&mad); - odb_prepare_alternates(r->objects); - for (struct odb_source *s = r->objects->sources; s; s = s->next) - odb_source_loose_find_abbrev_len(s, mad.oid, mad.cur_len, &mad.cur_len); + for (struct odb_source *s = r->objects->sources; s; s = s->next) { + struct odb_source_files *files = odb_source_files_downcast(s); + packfile_store_find_abbrev_len(files->packed, oid, len, &len); + odb_source_loose_find_abbrev_len(s, oid, len, &len); + } - hex[mad.cur_len] = 0; - return mad.cur_len; + hex[len] = 0; + return len; } const char *repo_find_unique_abbrev(struct repository *r, diff --git a/packfile.c b/packfile.c index 2539a371c13e9a..ee9c7ea1d142ec 100644 --- a/packfile.c +++ b/packfile.c @@ -2597,6 +2597,117 @@ int packfile_store_for_each_object(struct packfile_store *store, return ret; } +static int extend_abbrev_len(const struct object_id *a, + const struct object_id *b, + unsigned *out) +{ + unsigned len = oid_common_prefix_hexlen(a, b); + if (len != hash_algos[a->algo].hexsz && len >= *out) + *out = len + 1; + return 0; +} + +static void find_abbrev_len_for_midx(struct multi_pack_index *m, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + unsigned len = min_len; + + for (; m; m = m->base_midx) { + int match = 0; + uint32_t num, first = 0; + struct object_id found_oid; + + if (!m->num_objects) + continue; + + num = m->num_objects + m->num_objects_in_base; + match = bsearch_one_midx(oid, m, &first); + + /* + * first is now the position in the packfile where we + * would insert the object ID if it does not exist (or the + * position of the object ID if it does exist). Hence, we + * consider a maximum of two objects nearby for the + * abbreviation length. + */ + + if (!match) { + if (nth_midxed_object_oid(&found_oid, m, first)) + extend_abbrev_len(&found_oid, oid, &len); + } else if (first < num - 1) { + if (nth_midxed_object_oid(&found_oid, m, first + 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + if (first > 0) { + if (nth_midxed_object_oid(&found_oid, m, first - 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + } + + *out = len; +} + +static void find_abbrev_len_for_pack(struct packed_git *p, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + int match; + uint32_t num, first = 0; + struct object_id found_oid; + unsigned len = min_len; + + num = p->num_objects; + match = bsearch_pack(oid, p, &first); + + /* + * first is now the position in the packfile where we would insert + * the object ID if it does not exist (or the position of mad->hash if + * it does exist). Hence, we consider a maximum of two objects + * nearby for the abbreviation length. + */ + if (!match) { + if (!nth_packed_object_id(&found_oid, p, first)) + extend_abbrev_len(&found_oid, oid, &len); + } else if (first < num - 1) { + if (!nth_packed_object_id(&found_oid, p, first + 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + if (first > 0) { + if (!nth_packed_object_id(&found_oid, p, first - 1)) + extend_abbrev_len(&found_oid, oid, &len); + } + + *out = len; +} + +int packfile_store_find_abbrev_len(struct packfile_store *store, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct packfile_list_entry *e; + struct multi_pack_index *m; + + m = get_multi_pack_index(store->source); + if (m) + find_abbrev_len_for_midx(m, oid, min_len, &min_len); + + for (e = packfile_store_get_packs(store); e; e = e->next) { + if (e->pack->multi_pack_index) + continue; + if (open_pack_index(e->pack) || !e->pack->num_objects) + continue; + + find_abbrev_len_for_pack(e->pack, oid, min_len, &min_len); + } + + *out = min_len; + return 0; +} + struct add_promisor_object_data { struct repository *repo; struct oidset *set; diff --git a/packfile.h b/packfile.h index fa41dfda389327..45b35973f07b37 100644 --- a/packfile.h +++ b/packfile.h @@ -369,6 +369,11 @@ int packfile_store_for_each_object(struct packfile_store *store, void *cb_data, const struct odb_for_each_object_options *opts); +int packfile_store_find_abbrev_len(struct packfile_store *store, + const struct object_id *oid, + unsigned min_len, + unsigned *out); + /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 #define PACKDIR_FILE_IDX 2 From 83869e15fa9ef3b0ea2adbfe2fe68a309f95b856 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:40 +0100 Subject: [PATCH 16/34] odb: introduce generic `odb_find_abbrev_len()` Introduce a new generic `odb_find_abbrev_len()` function as well as source-specific callback functions. This makes the logic to compute the required prefix length to make a given object unique fully pluggable. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 57 +++--------------------------------- odb.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++ odb.h | 16 ++++++++++ odb/source-files.c | 25 ++++++++++++++++ odb/source.h | 24 +++++++++++++++ 5 files changed, 142 insertions(+), 53 deletions(-) diff --git a/object-name.c b/object-name.c index bb2294a1932a8d..f6e1f29e1fee65 100644 --- a/object-name.c +++ b/object-name.c @@ -15,10 +15,9 @@ #include "refs.h" #include "remote.h" #include "dir.h" +#include "odb.h" #include "oid-array.h" -#include "packfile.h" #include "pretty.h" -#include "object-file.h" #include "read-cache-ll.h" #include "repo-settings.h" #include "repository.h" @@ -569,19 +568,6 @@ int repo_for_each_abbrev(struct repository *r, const char *prefix, return ret; } -/* - * Return the slot of the most-significant bit set in "val". There are various - * ways to do this quickly with fls() or __builtin_clzl(), but speed is - * probably not a big deal here. - */ -static unsigned msb(unsigned long val) -{ - unsigned r = 0; - while (val >>= 1) - r++; - return r; -} - void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo, const struct object_id *oid, int abbrev_len) { @@ -602,49 +588,14 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, { const struct git_hash_algo *algo = oid->algo ? &hash_algos[oid->algo] : r->hash_algo; - const unsigned hexsz = algo->hexsz; unsigned len; - if (min_len < 0) { - unsigned long count; - - if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) - count = 0; - - /* - * Add one because the MSB only tells us the highest bit set, - * not including the value of all the _other_ bits (so "15" - * is only one off of 2^4, but the MSB is the 3rd bit. - */ - len = msb(count) + 1; - /* - * We now know we have on the order of 2^len objects, which - * expects a collision at 2^(len/2). But we also care about hex - * chars, not bits, and there are 4 bits per hex. So all - * together we need to divide by 2 and round up. - */ - len = DIV_ROUND_UP(len, 2); - /* - * For very small repos, we stick with our regular fallback. - */ - if (len < FALLBACK_DEFAULT_ABBREV) - len = FALLBACK_DEFAULT_ABBREV; - } else { - len = min_len; - } + if (odb_find_abbrev_len(r->objects, oid, min_len, &len) < 0) + len = algo->hexsz; oid_to_hex_r(hex, oid); - if (len >= hexsz || !len) - return hexsz; - - odb_prepare_alternates(r->objects); - for (struct odb_source *s = r->objects->sources; s; s = s->next) { - struct odb_source_files *files = odb_source_files_downcast(s); - packfile_store_find_abbrev_len(files->packed, oid, len, &len); - odb_source_loose_find_abbrev_len(s, oid, len, &len); - } - hex[len] = 0; + return len; } diff --git a/odb.c b/odb.c index 3019957b8747a9..3f94a53df157e1 100644 --- a/odb.c +++ b/odb.c @@ -12,6 +12,7 @@ #include "midx.h" #include "object-file-convert.h" #include "object-file.h" +#include "object-name.h" #include "odb.h" #include "packfile.h" #include "path.h" @@ -964,6 +965,78 @@ int odb_count_objects(struct object_database *odb, return ret; } +/* + * Return the slot of the most-significant bit set in "val". There are various + * ways to do this quickly with fls() or __builtin_clzl(), but speed is + * probably not a big deal here. + */ +static unsigned msb(unsigned long val) +{ + unsigned r = 0; + while (val >>= 1) + r++; + return r; +} + +int odb_find_abbrev_len(struct object_database *odb, + const struct object_id *oid, + int min_length, + unsigned *out) +{ + const struct git_hash_algo *algo = + oid->algo ? &hash_algos[oid->algo] : odb->repo->hash_algo; + const unsigned hexsz = algo->hexsz; + unsigned len; + int ret; + + if (min_length < 0) { + unsigned long count; + + if (odb_count_objects(odb, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) + count = 0; + + /* + * Add one because the MSB only tells us the highest bit set, + * not including the value of all the _other_ bits (so "15" + * is only one off of 2^4, but the MSB is the 3rd bit. + */ + len = msb(count) + 1; + /* + * We now know we have on the order of 2^len objects, which + * expects a collision at 2^(len/2). But we also care about hex + * chars, not bits, and there are 4 bits per hex. So all + * together we need to divide by 2 and round up. + */ + len = DIV_ROUND_UP(len, 2); + /* + * For very small repos, we stick with our regular fallback. + */ + if (len < FALLBACK_DEFAULT_ABBREV) + len = FALLBACK_DEFAULT_ABBREV; + } else { + len = min_length; + } + + if (len >= hexsz || !len) { + *out = hexsz; + ret = 0; + goto out; + } + + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) { + ret = odb_source_find_abbrev_len(source, oid, len, &len); + if (ret) + goto out; + } + + ret = 0; + *out = len; + +out: + return ret; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { diff --git a/odb.h b/odb.h index e80fd8f7abd534..984bafca9d652b 100644 --- a/odb.h +++ b/odb.h @@ -545,6 +545,22 @@ int odb_count_objects(struct object_database *odb, enum odb_count_objects_flags flags, unsigned long *out); +/* + * Given an object ID, find the minimum required length required to make the + * object ID unique across the whole object database. + * + * The `min_len` determines the minimum abbreviated length that'll be returned + * by this function. If `min_len < 0`, then the function will set a sensible + * default minimum abbreviation length. + * + * Returns 0 on success, a negative error code otherwise. The computed length + * will be assigned to `*out`. + */ +int odb_find_abbrev_len(struct object_database *odb, + const struct object_id *oid, + int min_len, + unsigned *out); + enum { /* * By default, `odb_write_object()` does not actually write anything diff --git a/odb/source-files.c b/odb/source-files.c index e90bb689bb01e6..76797569de4280 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -122,6 +122,30 @@ static int odb_source_files_count_objects(struct odb_source *source, return ret; } +static int odb_source_files_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + unsigned len = min_len; + int ret; + + ret = packfile_store_find_abbrev_len(files->packed, oid, len, &len); + if (ret < 0) + goto out; + + ret = odb_source_loose_find_abbrev_len(source, oid, len, &len); + if (ret < 0) + goto out; + + *out = len; + ret = 0; + +out: + return ret; +} + static int odb_source_files_freshen_object(struct odb_source *source, const struct object_id *oid) { @@ -250,6 +274,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.read_object_stream = odb_source_files_read_object_stream; files->base.for_each_object = odb_source_files_for_each_object; files->base.count_objects = odb_source_files_count_objects; + files->base.find_abbrev_len = odb_source_files_find_abbrev_len; files->base.freshen_object = odb_source_files_freshen_object; files->base.write_object = odb_source_files_write_object; files->base.write_object_stream = odb_source_files_write_object_stream; diff --git a/odb/source.h b/odb/source.h index ee5d6ed530d519..a9d7d0b96fde72 100644 --- a/odb/source.h +++ b/odb/source.h @@ -157,6 +157,18 @@ struct odb_source { enum odb_count_objects_flags flags, unsigned long *out); + /* + * This callback is expected to find the minimum required length to + * make the given object ID unique. + * + * The callback is expected to return a negative error code in case it + * failed, 0 otherwise. + */ + int (*find_abbrev_len)(struct odb_source *source, + const struct object_id *oid, + unsigned min_length, + unsigned *out); + /* * This callback is expected to freshen the given object so that its * last access time is set to the current time. This is used to ensure @@ -360,6 +372,18 @@ static inline int odb_source_count_objects(struct odb_source *source, return source->count_objects(source, flags, out); } +/* + * Determine the minimum required length to make the given object ID unique in + * the given source. Returns 0 on success, a negative error code otherwise. + */ +static inline int odb_source_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + return source->find_abbrev_len(source, oid, min_len, out); +} + /* * Freshen an object in the object database by updating its timestamp. * Returns 1 in case the object has been freshened, 0 in case the object does From e8b79a96ebaa2113391d14bfcdabe239f6ff8611 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Tue, 24 Mar 2026 20:35:41 +0100 Subject: [PATCH 17/34] replay: support replaying down from root commit git-replay(1) doesn't allow replaying commits all the way down to the root commit. Fix that. Signed-off-by: Toon Claes Signed-off-by: Junio C Hamano --- replay.c | 27 +++++++++++++++++---------- t/t3650-replay-basics.sh | 10 +++++++--- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/replay.c b/replay.c index a63f6714c4c2ec..92f2279156d69a 100644 --- a/replay.c +++ b/replay.c @@ -209,7 +209,10 @@ static struct commit *mapped_commit(kh_oid_map_t *replayed_commits, struct commit *commit, struct commit *fallback) { - khint_t pos = kh_get_oid_map(replayed_commits, commit->object.oid); + khint_t pos; + if (!commit) + return fallback; + pos = kh_get_oid_map(replayed_commits, commit->object.oid); if (pos == kh_end(replayed_commits)) return fallback; return kh_value(replayed_commits, pos); @@ -225,16 +228,24 @@ static struct commit *pick_regular_commit(struct repository *repo, struct commit *base, *replayed_base; struct tree *pickme_tree, *base_tree, *replayed_base_tree; - base = pickme->parents->item; - replayed_base = mapped_commit(replayed_commits, base, onto); + if (pickme->parents) { + base = pickme->parents->item; + base_tree = repo_get_commit_tree(repo, base); + } else { + base = NULL; + base_tree = lookup_tree(repo, repo->hash_algo->empty_tree); + } + replayed_base = mapped_commit(replayed_commits, base, onto); replayed_base_tree = repo_get_commit_tree(repo, replayed_base); pickme_tree = repo_get_commit_tree(repo, pickme); - base_tree = repo_get_commit_tree(repo, base); merge_opt->branch1 = short_commit_name(repo, replayed_base); merge_opt->branch2 = short_commit_name(repo, pickme); - merge_opt->ancestor = xstrfmt("parent of %s", merge_opt->branch2); + if (pickme->parents) + merge_opt->ancestor = xstrfmt("parent of %s", merge_opt->branch2); + else + merge_opt->ancestor = xstrdup("empty tree"); merge_incore_nonrecursive(merge_opt, base_tree, @@ -293,8 +304,6 @@ int replay_revisions(struct rev_info *revs, set_up_replay_mode(revs->repo, &revs->cmdline, opts->onto, &detached_head, &advance, &onto, &update_refs); - /* FIXME: Should allow replaying commits with the first as a root commit */ - if (prepare_revision_walk(revs) < 0) { ret = error(_("error preparing revisions")); goto out; @@ -309,9 +318,7 @@ int replay_revisions(struct rev_info *revs, khint_t pos; int hr; - if (!commit->parents) - die(_("replaying down from root commit is not supported yet!")); - if (commit->parents->next) + if (commit->parents && commit->parents->next) die(_("replaying merge commits is not supported yet!")); last_commit = pick_regular_commit(revs->repo, commit, replayed_commits, diff --git a/t/t3650-replay-basics.sh b/t/t3650-replay-basics.sh index a03f8f9293eb12..9c55b62757625b 100755 --- a/t/t3650-replay-basics.sh +++ b/t/t3650-replay-basics.sh @@ -81,9 +81,13 @@ test_expect_success 'option --onto or --advance is mandatory' ' test_cmp expect actual ' -test_expect_success 'no base or negative ref gives no-replaying down to root error' ' - echo "fatal: replaying down from root commit is not supported yet!" >expect && - test_must_fail git replay --onto=topic1 topic2 2>actual && +test_expect_success 'replay down to root onto another branch' ' + git replay --ref-action=print --onto main topic2 >result && + + test_line_count = 1 result && + + git log --format=%s $(cut -f 3 -d " " result) >actual && + test_write_lines E D C M L B A >expect && test_cmp expect actual ' From 4d5fb9377bba1f45a940e10b0b7354fe7db2b301 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:04:44 -0400 Subject: [PATCH 18/34] revision: make handle_dotdot() interface less confusing There are two very subtle bits to the way we parse ".." (and "...") range operators: 1. In handle_dotdot_1(), we assume that the incoming arguments "dotdot" and "arg" are part of the same string, with the first digit of the range-operator blanked to a NUL. Then when we want the full name (e.g., to report an error), we replace the NUL with a dot to restore the original string. 2. In handle_dotdot(), we take in a const string, but then we modify it by overwriting the range operator with a NUL. This has worked OK in practice since we tend to pass in buffers that are actually writeable (including argv), but segfaults with something like: handle_revision_arg("..HEAD", &revs, 0, 0); On top of that, building with recent versions of glibc causes the compiler to complain, because it notices when we use strchr() or strstr() to launder away constness (basically detecting the possibility of the segfault above via the type system). Instead of munging the buffer, let's instead make a temporary copy of the left-hand side of the range operator. That avoids any const violations, and lets us pass around the parsed elements independently: the left-hand side, the right-hand side, the number of dots (via the "symmetric" flag), and the original full string for error messages. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- revision.c | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/revision.c b/revision.c index 31808e3df055c7..f61262436f5678 100644 --- a/revision.c +++ b/revision.c @@ -2038,41 +2038,32 @@ static void prepare_show_merge(struct rev_info *revs) free(prune); } -static int dotdot_missing(const char *arg, char *dotdot, +static int dotdot_missing(const char *full_name, struct rev_info *revs, int symmetric) { if (revs->ignore_missing) return 0; - /* de-munge so we report the full argument */ - *dotdot = '.'; die(symmetric ? "Invalid symmetric difference expression %s" - : "Invalid revision range %s", arg); + : "Invalid revision range %s", full_name); } -static int handle_dotdot_1(const char *arg, char *dotdot, +static int handle_dotdot_1(const char *a_name, const char *b_name, + const char *full_name, int symmetric, struct rev_info *revs, int flags, int cant_be_filename, struct object_context *a_oc, struct object_context *b_oc) { - const char *a_name, *b_name; struct object_id a_oid, b_oid; struct object *a_obj, *b_obj; unsigned int a_flags, b_flags; - int symmetric = 0; unsigned int flags_exclude = flags ^ (UNINTERESTING | BOTTOM); unsigned int oc_flags = GET_OID_COMMITTISH | GET_OID_RECORD_PATH; - a_name = arg; if (!*a_name) a_name = "HEAD"; - b_name = dotdot + 2; - if (*b_name == '.') { - symmetric = 1; - b_name++; - } if (!*b_name) b_name = "HEAD"; @@ -2081,15 +2072,13 @@ static int handle_dotdot_1(const char *arg, char *dotdot, return -1; if (!cant_be_filename) { - *dotdot = '.'; - verify_non_filename(revs->prefix, arg); - *dotdot = '\0'; + verify_non_filename(revs->prefix, full_name); } a_obj = parse_object(revs->repo, &a_oid); b_obj = parse_object(revs->repo, &b_oid); if (!a_obj || !b_obj) - return dotdot_missing(arg, dotdot, revs, symmetric); + return dotdot_missing(full_name, revs, symmetric); if (!symmetric) { /* just A..B */ @@ -2103,7 +2092,7 @@ static int handle_dotdot_1(const char *arg, char *dotdot, a = lookup_commit_reference(revs->repo, &a_obj->oid); b = lookup_commit_reference(revs->repo, &b_obj->oid); if (!a || !b) - return dotdot_missing(arg, dotdot, revs, symmetric); + return dotdot_missing(full_name, revs, symmetric); if (repo_get_merge_bases(the_repository, a, b, &exclude) < 0) { commit_list_free(exclude); @@ -2132,16 +2121,23 @@ static int handle_dotdot(const char *arg, int cant_be_filename) { struct object_context a_oc = {0}, b_oc = {0}; - char *dotdot = strstr(arg, ".."); + const char *dotdot = strstr(arg, ".."); + char *tmp; + int symmetric = 0; int ret; if (!dotdot) return -1; - *dotdot = '\0'; - ret = handle_dotdot_1(arg, dotdot, revs, flags, cant_be_filename, - &a_oc, &b_oc); - *dotdot = '.'; + tmp = xmemdupz(arg, dotdot - arg); + dotdot += 2; + if (*dotdot == '.') { + symmetric = 1; + dotdot++; + } + ret = handle_dotdot_1(tmp, dotdot, arg, symmetric, revs, flags, + cant_be_filename, &a_oc, &b_oc); + free(tmp); object_context_release(&a_oc); object_context_release(&b_oc); From 268a9caaf29f0269147dacbea2c8d439c505c5ee Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:05:25 -0400 Subject: [PATCH 19/34] rev-parse: simplify dotdot parsing The previous commit simplified the way that revision.c parses ".." and "..." range operators. But there's roughly similar code in rev-parse. This is less likely to trigger a segfault, as there is no library function which we'd pass a string literal to, but it still causes the compiler to complain about laundering away constness via strstr(). Let's give it the same treatment, copying the left-hand side of the range operator into its own string. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/rev-parse.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 01a62800e87938..5da95371134027 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -267,21 +267,20 @@ static int show_file(const char *arg, int output_prefix) static int try_difference(const char *arg) { - char *dotdot; + const char *dotdot; struct object_id start_oid; struct object_id end_oid; const char *end; const char *start; + char *to_free; int symmetric; static const char head_by_default[] = "HEAD"; if (!(dotdot = strstr(arg, ".."))) return 0; + start = to_free = xmemdupz(arg, dotdot - arg); end = dotdot + 2; - start = arg; symmetric = (*end == '.'); - - *dotdot = 0; end += symmetric; if (!*end) @@ -295,7 +294,7 @@ static int try_difference(const char *arg) * Just ".."? That is not a range but the * pathspec for the parent directory. */ - *dotdot = '.'; + free(to_free); return 0; } @@ -308,7 +307,7 @@ static int try_difference(const char *arg) a = lookup_commit_reference(the_repository, &start_oid); b = lookup_commit_reference(the_repository, &end_oid); if (!a || !b) { - *dotdot = '.'; + free(to_free); return 0; } if (repo_get_merge_bases(the_repository, a, b, &exclude) < 0) @@ -318,10 +317,10 @@ static int try_difference(const char *arg) show_rev(REVERSED, &commit->object.oid, NULL); } } - *dotdot = '.'; + free(to_free); return 1; } - *dotdot = '.'; + free(to_free); return 0; } From 22b985ef193a18ec0e6602ea1838e3290a351dd6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:13:18 -0400 Subject: [PATCH 20/34] revision: avoid writing to const string for parent marks We take in a "const char *", but may write a NUL into it when parsing parent marks like "foo^-", since we want to isolate "foo" as a string for further parsing. This is usually OK, as our "const" strings are often actually argv strings which are technically writeable, but we'd segfault with a string literal like: handle_revision_arg("HEAD^-", &revs, 0, 0); Similar to how we handled dotdot in a previous commit, we can avoid this by making a temporary copy of the left-hand side of the string. The cost should negligible compared to the rest of the parsing (like actually parsing commits to create their parent linked-lists). There is one slightly tricky thing, though. We parse some of the marks progressively, so that if we see "foo^!" for example, we'll strip that down to "foo" not just for calling add_parents_only(), but also for the rest of the function. That makes sense since we eventually want to pass "foo" to get_oid_with_context(). But it also means that we'll keep looking for other marks. In particular, "foo^-^!" is valid, though oddly "foo^!^-" would ignore the "^-". I'm not sure if this is a weird historical artifact of the implementation, or if there are important corner cases. So I've left the behavior unchanged. Each mark we find allocates a string with the mark stripped, which means we could allocate multiple times (and carry a free-able pointer for each to the end). But in practice we won't, because of the three marks, "^@" jumps immediately to the end without further parsing, and "^-^!" is nonsense that nobody would pass. So you'd get one allocation in general, and never more than two. Another obvious option would be to just copy "arg" up front and be OK with munging it. But that means we pay the cost even when we find no marks. We could make a single copy upon finding a mark and then munge, but that adds extra code to each site (checking whether somebody else allocated, and if not, adjusting our "mark" pointer to be relative to the copied string). I aimed for something that was clear and obvious, if a bit verbose. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- revision.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/revision.c b/revision.c index f61262436f5678..fda405bf65326a 100644 --- a/revision.c +++ b/revision.c @@ -2147,7 +2147,10 @@ static int handle_dotdot(const char *arg, static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int flags, unsigned revarg_opt) { struct object_context oc = {0}; - char *mark; + const char *mark; + char *arg_minus_at = NULL; + char *arg_minus_excl = NULL; + char *arg_minus_dash = NULL; struct object *object; struct object_id oid; int local_flags; @@ -2174,18 +2177,17 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl mark = strstr(arg, "^@"); if (mark && !mark[2]) { - *mark = 0; - if (add_parents_only(revs, arg, flags, 0)) { + arg_minus_at = xmemdupz(arg, mark - arg); + if (add_parents_only(revs, arg_minus_at, flags, 0)) { ret = 0; goto out; } - *mark = '^'; } mark = strstr(arg, "^!"); if (mark && !mark[2]) { - *mark = 0; - if (!add_parents_only(revs, arg, flags ^ (UNINTERESTING | BOTTOM), 0)) - *mark = '^'; + arg_minus_excl = xmemdupz(arg, mark - arg); + if (add_parents_only(revs, arg_minus_excl, flags ^ (UNINTERESTING | BOTTOM), 0)) + arg = arg_minus_excl; } mark = strstr(arg, "^-"); if (mark) { @@ -2199,9 +2201,9 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl } } - *mark = 0; - if (!add_parents_only(revs, arg, flags ^ (UNINTERESTING | BOTTOM), exclude_parent)) - *mark = '^'; + arg_minus_dash = xmemdupz(arg, mark - arg); + if (add_parents_only(revs, arg_minus_dash, flags ^ (UNINTERESTING | BOTTOM), exclude_parent)) + arg = arg_minus_dash; } local_flags = 0; @@ -2236,6 +2238,9 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl out: object_context_release(&oc); + free(arg_minus_at); + free(arg_minus_excl); + free(arg_minus_dash); return ret; } From 213b2138770d820bc28fde839f3e4df90a5d5d81 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:14:24 -0400 Subject: [PATCH 21/34] rev-parse: avoid writing to const string for parent marks The previous commit cleared up some const confusion in handling parent marks in revision.c, but we have roughly the same code duplicated in rev-parse. This one is much easier to fix, because the handling of the shortened string is all done in one place, after detecting any marks (but without shortening the string between marks). As a side note, I suspect this means that it behaves differently than the revision.c parser for weird stuff like "foo^!^@^-", but that is outside the scope of this patch. While we are here, let's also rename the variable "dotdot", which is totally misleading (and which we already fixed in revision.c long ago via f632dedd8d (handle_revision_arg: stop using "dotdot" as a generic pointer, 2017-05-19)). Doing that here makes the diff a little messier, but it also lets the compiler help us make sure we did not miss any stray mentions of the variable while we are changing its semantics. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/rev-parse.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 5da95371134027..218b5f34d6a893 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -326,7 +326,7 @@ static int try_difference(const char *arg) static int try_parent_shorthands(const char *arg) { - char *dotdot; + const char *mark; struct object_id oid; struct commit *commit; struct commit_list *parents; @@ -334,38 +334,39 @@ static int try_parent_shorthands(const char *arg) int include_rev = 0; int include_parents = 0; int exclude_parent = 0; + char *to_free; - if ((dotdot = strstr(arg, "^!"))) { + if ((mark = strstr(arg, "^!"))) { include_rev = 1; - if (dotdot[2]) + if (mark[2]) return 0; - } else if ((dotdot = strstr(arg, "^@"))) { + } else if ((mark = strstr(arg, "^@"))) { include_parents = 1; - if (dotdot[2]) + if (mark[2]) return 0; - } else if ((dotdot = strstr(arg, "^-"))) { + } else if ((mark = strstr(arg, "^-"))) { include_rev = 1; exclude_parent = 1; - if (dotdot[2]) { + if (mark[2]) { char *end; - exclude_parent = strtoul(dotdot + 2, &end, 10); + exclude_parent = strtoul(mark + 2, &end, 10); if (*end != '\0' || !exclude_parent) return 0; } } else return 0; - *dotdot = 0; + arg = to_free = xmemdupz(arg, mark - arg); if (repo_get_oid_committish(the_repository, arg, &oid) || !(commit = lookup_commit_reference(the_repository, &oid))) { - *dotdot = '^'; + free(to_free); return 0; } if (exclude_parent && exclude_parent > commit_list_count(commit->parents)) { - *dotdot = '^'; + free(to_free); return 0; } @@ -386,7 +387,7 @@ static int try_parent_shorthands(const char *arg) free(name); } - *dotdot = '^'; + free(to_free); return 1; } From d385845d55e0e3a775fc47ac8d73a5ec41308db3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 26 Mar 2026 15:23:20 -0400 Subject: [PATCH 22/34] config: store allocated string in non-const pointer When git-config matches a url, we copy the variable section name and store it in the "section" member of a urlmatch_config struct. That member is const, since the url-matcher will not touch it (and other callers really will have a const string). But that means that we have only a const pointer to our allocated string. We have to cast away the constness when we free it, and likewise when we assign NUL to tie off the "." separating the subsection and key. This latter happens implicitly via a strchr() call, but recent versions of glibc have added annotations that let the compiler detect that and complain. Let's keep our own "section" pointer for the non-const string, and then just point config.section at it. That avoids all of the casting, both explicit and implicit. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/config.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/builtin/config.c b/builtin/config.c index 7c4857be622904..cf4ba0f7cc6f22 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -838,6 +838,7 @@ static int get_urlmatch(const struct config_location_options *opts, const char *var, const char *url) { int ret; + char *section; char *section_tail; struct config_display_options display_opts = *_display_opts; struct string_list_item *item; @@ -851,8 +852,8 @@ static int get_urlmatch(const struct config_location_options *opts, if (!url_normalize(url, &config.url)) die("%s", config.url.err); - config.section = xstrdup_tolower(var); - section_tail = strchr(config.section, '.'); + config.section = section = xstrdup_tolower(var); + section_tail = strchr(section, '.'); if (section_tail) { *section_tail = '\0'; config.key = section_tail + 1; @@ -886,7 +887,7 @@ static int get_urlmatch(const struct config_location_options *opts, string_list_clear(&values, 1); free(config.url.url); - free((void *)config.section); + free(section); return ret; } From 81e29064371c4b4599b171ac71e73d1e21475a63 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:43 -0400 Subject: [PATCH 23/34] pack-objects: plug leak in `read_stdin_packs()` The `read_stdin_packs()` function added originally via 339bce27f4f (builtin/pack-objects.c: add '--stdin-packs' option, 2021-02-22) declares a `rev_info` struct but neglects to call `release_revisions()` on it before returning, creating the potential for a leak. The related change in 97ec43247c0 (pack-objects: declare 'rev_info' for '--stdin-packs' earlier, 2025-06-23) carried forward this oversight and did not address it. Ensure that we call `release_revisions()` appropriately to prevent a potential leak from this function. Note that in practice our `rev_info` here does not have a present leak, hence t5331 passes cleanly before this commit, even when built with SANITIZE=leak. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index cd013c0b68a6a3..9a89bc5c4c924a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3968,6 +3968,8 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) show_object_pack_hint, &mode); + release_revisions(&revs); + trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found", stdin_packs_found_nr); trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints", From d31d1f2e06942046b5220942c77245725d7df2c1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:46 -0400 Subject: [PATCH 24/34] pack-objects: refactor `read_packs_list_from_stdin()` to use `strmap` The '--stdin-packs' mode of pack-objects maintains two separate string_lists: one for included packs, and one for excluded packs. Each list stores the pack basename as a string and the corresponding `packed_git` pointer in its `->util` field. This works, but makes it awkward to extend the set of pack "kinds" that pack-objects can accept via stdin, since each new kind would need its own string_list and duplicated handling. A future commit will want to do just this, so prepare for that change by handling the various "kinds" of packs specified over stdin in a more generic fashion. Namely, replace the two `string_list`s with a single `strmap` keyed on the pack basename, with values pointing to a new `struct stdin_pack_info`. This struct tracks both the `packed_git` pointer and a `kind` bitfield indicating whether the pack was specified as included or excluded. Extract the logic for sorting packs by mtime and adding their objects into a separate `stdin_packs_add_pack_entries()` helper. While we could have used a `string_list`, we must handle the case where the same pack is specified more than once. With a `string_list` only, we would have to pay a quadratic cost to either (a) insert elements into their sorted positions, or (b) a repeated linear search, which is accidentally quadratic. For that reason, use a strmap instead. This patch does not include any functional changes. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 179 ++++++++++++++++++++++++++--------------- 1 file changed, 112 insertions(+), 67 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 9a89bc5c4c924a..945100b405c536 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -28,6 +28,7 @@ #include "reachable.h" #include "oid-array.h" #include "strvec.h" +#include "strmap.h" #include "list.h" #include "packfile.h" #include "object-file.h" @@ -3820,107 +3821,151 @@ static void show_commit_pack_hint(struct commit *commit, void *data) } +/* + * stdin_pack_info_kind specifies how a pack specified over stdin + * should be treated when pack-objects is invoked with --stdin-packs. + * + * - STDIN_PACK_INCLUDE: objects in any packs with this flag bit set + * should be included in the output pack, unless they appear in an + * excluded pack. + * + * - STDIN_PACK_EXCLUDE_CLOSED: objects in any packs with this flag + * bit set should be excluded from the output pack. + * + * Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE are + * used as traversal tips when invoked with --stdin-packs=follow. + */ +enum stdin_pack_info_kind { + STDIN_PACK_INCLUDE = (1<<0), + STDIN_PACK_EXCLUDE_CLOSED = (1<<1), +}; + +struct stdin_pack_info { + struct packed_git *p; + enum stdin_pack_info_kind kind; +}; + static int pack_mtime_cmp(const void *_a, const void *_b) { - struct packed_git *a = ((const struct string_list_item*)_a)->util; - struct packed_git *b = ((const struct string_list_item*)_b)->util; + struct stdin_pack_info *a = ((const struct string_list_item*)_a)->util; + struct stdin_pack_info *b = ((const struct string_list_item*)_b)->util; /* * order packs by descending mtime so that objects are laid out * roughly as newest-to-oldest */ - if (a->mtime < b->mtime) + if (a->p->mtime < b->p->mtime) return 1; - else if (b->mtime < a->mtime) + else if (b->p->mtime < a->p->mtime) return -1; else return 0; } -static void read_packs_list_from_stdin(struct rev_info *revs) +static void stdin_packs_add_pack_entries(struct strmap *packs, + struct rev_info *revs) +{ + struct string_list keys = STRING_LIST_INIT_NODUP; + struct string_list_item *item; + struct hashmap_iter iter; + struct strmap_entry *entry; + + strmap_for_each_entry(packs, &iter, entry) { + struct stdin_pack_info *info = entry->value; + if (!info->p) + die(_("could not find pack '%s'"), entry->key); + + string_list_append(&keys, entry->key)->util = info; + } + + /* + * Order packs by ascending mtime; use QSORT directly to access the + * string_list_item's ->util pointer, which string_list_sort() does not + * provide. + */ + QSORT(keys.items, keys.nr, pack_mtime_cmp); + + for_each_string_list_item(item, &keys) { + struct stdin_pack_info *info = item->util; + + if (info->kind & STDIN_PACK_INCLUDE) + for_each_object_in_pack(info->p, + add_object_entry_from_pack, + revs, + ODB_FOR_EACH_OBJECT_PACK_ORDER); + } + + string_list_clear(&keys, 0); +} + +static void stdin_packs_read_input(struct rev_info *revs) { struct strbuf buf = STRBUF_INIT; - struct string_list include_packs = STRING_LIST_INIT_DUP; - struct string_list exclude_packs = STRING_LIST_INIT_DUP; - struct string_list_item *item = NULL; + struct strmap packs = STRMAP_INIT; struct packed_git *p; while (strbuf_getline(&buf, stdin) != EOF) { - if (!buf.len) + struct stdin_pack_info *info; + enum stdin_pack_info_kind kind = STDIN_PACK_INCLUDE; + const char *key = buf.buf; + + if (!*key) continue; + else if (*key == '^') + kind = STDIN_PACK_EXCLUDE_CLOSED; - if (*buf.buf == '^') - string_list_append(&exclude_packs, buf.buf + 1); - else - string_list_append(&include_packs, buf.buf); + if (kind != STDIN_PACK_INCLUDE) + key++; + + info = strmap_get(&packs, key); + if (!info) { + CALLOC_ARRAY(info, 1); + strmap_put(&packs, key, info); + } + + info->kind |= kind; strbuf_reset(&buf); } - string_list_sort_u(&include_packs, 0); - string_list_sort_u(&exclude_packs, 0); - repo_for_each_pack(the_repository, p) { - const char *pack_name = pack_basename(p); + struct stdin_pack_info *info; + + info = strmap_get(&packs, pack_basename(p)); + if (!info) + continue; - if ((item = string_list_lookup(&include_packs, pack_name))) { + if (info->kind & STDIN_PACK_INCLUDE) { if (exclude_promisor_objects && p->pack_promisor) die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name); - item->util = p; + + /* + * Arguments we got on stdin may not even be + * packs. First check that to avoid segfaulting + * later on in e.g. pack_mtime_cmp(), excluded + * packs are handled below. + */ + if (!is_pack_valid(p)) + die(_("packfile %s cannot be accessed"), p->pack_name); } - if ((item = string_list_lookup(&exclude_packs, pack_name))) - item->util = p; - } - /* - * Arguments we got on stdin may not even be packs. First - * check that to avoid segfaulting later on in - * e.g. pack_mtime_cmp(), excluded packs are handled below. - * - * Since we first parsed our STDIN and then sorted the input - * lines the pack we error on will be whatever line happens to - * sort first. This is lazy, it's enough that we report one - * bad case here, we don't need to report the first/last one, - * or all of them. - */ - for_each_string_list_item(item, &include_packs) { - struct packed_git *p = item->util; - if (!p) - die(_("could not find pack '%s'"), item->string); - if (!is_pack_valid(p)) - die(_("packfile %s cannot be accessed"), p->pack_name); - } + if (info->kind & STDIN_PACK_EXCLUDE_CLOSED) { + /* + * Marking excluded packs as kept in-core so + * that later calls to add_object_entry() + * discards any objects that are also found in + * excluded packs. + */ + p->pack_keep_in_core = 1; + } - /* - * Then, handle all of the excluded packs, marking them as - * kept in-core so that later calls to add_object_entry() - * discards any objects that are also found in excluded packs. - */ - for_each_string_list_item(item, &exclude_packs) { - struct packed_git *p = item->util; - if (!p) - die(_("could not find pack '%s'"), item->string); - p->pack_keep_in_core = 1; + info->p = p; } - /* - * Order packs by ascending mtime; use QSORT directly to access the - * string_list_item's ->util pointer, which string_list_sort() does not - * provide. - */ - QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp); - - for_each_string_list_item(item, &include_packs) { - struct packed_git *p = item->util; - for_each_object_in_pack(p, - add_object_entry_from_pack, - revs, - ODB_FOR_EACH_OBJECT_PACK_ORDER); - } + stdin_packs_add_pack_entries(&packs, revs); strbuf_release(&buf); - string_list_clear(&include_packs, 0); - string_list_clear(&exclude_packs, 0); + strmap_clear(&packs, 1); } static void add_unreachable_loose_objects(struct rev_info *revs); @@ -3957,7 +4002,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) /* avoids adding objects in excluded packs */ ignore_packed_keep_in_core = 1; - read_packs_list_from_stdin(&revs); + stdin_packs_read_input(&revs); if (rev_list_unpacked) add_unreachable_loose_objects(&revs); From 5a4381f093508f0504a99b1abc9ca5e8ebd0901f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:49 -0400 Subject: [PATCH 25/34] t7704: demonstrate failure with once-cruft objects above the geometric split Add a test demonstrating a case where geometric repacking fails to produce a pack with full object closure, thus making it impossible to write a reachability bitmap. Mark the test with 'test_expect_failure' for now. The subsequent commit will explain the precise failure mode, and implement a fix. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/t7704-repack-cruft.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/t/t7704-repack-cruft.sh b/t/t7704-repack-cruft.sh index aa2e2e6ad887f2..77133395b5d0cc 100755 --- a/t/t7704-repack-cruft.sh +++ b/t/t7704-repack-cruft.sh @@ -869,4 +869,26 @@ test_expect_success 'repack --write-midx includes cruft when already geometric' ) ' +test_expect_failure 'repack rescues once-cruft objects above geometric split' ' + git config repack.midxMustContainCruft false && + + test_commit reachable && + test_commit unreachable && + + unreachable="$(git rev-parse HEAD)" && + + git reset --hard HEAD^ && + git tag -d unreachable && + git reflog expire --all --expire=all && + + git repack --cruft -d && + + echo $unreachable | git pack-objects .git/objects/pack/pack && + + test_commit new && + + git update-ref refs/heads/other $unreachable && + git repack --geometric=2 -d --write-midx --write-bitmap-index +' + test_done From 3f7c0e722e2733aede32b1e531caf83e7043d1bd Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:51 -0400 Subject: [PATCH 26/34] pack-objects: support excluded-open packs with --stdin-packs In cd846bacc7d (pack-objects: introduce '--stdin-packs=follow', 2025-06-23), pack-objects learned to traverse through commits in included packs when using '--stdin-packs=follow', rescuing reachable objects from unlisted packs into the output. When we encounter a commit in an excluded pack during this rescuing phase we will traverse through its parents. But because we set `revs.no_kept_objects = 1`, commit simplification will prevent us from showing it via `get_revision()`. (In practice, `--stdin-packs=follow` walks commits down to the roots, but only opens up trees for ones that do not appear in an excluded pack.) But there are certain cases where we *do* need to see the parents of an object in an excluded pack. Namely, if an object is rescue-able, but only reachable from object(s) which appear in excluded packs, then commit simplification will exclude those commits from the object traversal, and we will never see a copy of that object, and thus not rescue it. This is what causes the failure in the previous commit during repacking. When performing a geometric repack, packs above the geometric split that weren't part of the previous MIDX (e.g., packs pushed directly into `$GIT_DIR/objects/pack`) may not have full object closure. When those packs are listed as excluded via the '^' marker, the reachability traversal encounters the sequence described above, and may miss objects which we expect to rescue with `--stdin-packs=follow`. Introduce a new "excluded-open" pack prefix, '!'. Like '^'-prefixed packs, objects from '!'-prefixed packs are excluded from the resulting pack. But unlike '^', commits in '!'-prefixed packs *are* used as starting points for the follow traversal, and the traversal does not treat them as a closure boundary. In order to distinguish excluded-closed from excluded-open packs during the traversal, introduce a new `pack_keep_in_core_open` bit on `struct packed_git`, along with a corresponding `KEPT_PACK_IN_CORE_OPEN` flag for the kept-pack cache. In `add_object_entry_from_pack()`, move the `want_object_in_pack()` check to *after* `add_pending_oid()`. This is necessary so that commits from excluded-open packs are added as traversal tips even though their objects won't appear in the output. As a consequence, the caller `for_each_object_in_pack()` will always provide a non-NULL 'p', hence we are able to drop the "if (p)" conditional. The `include_check` and `include_check_obj` callbacks on `rev_info` are used to halt the walk at closed-excluded packs, since objects behind a '^' boundary are guaranteed to have closure and need not be rescued. The following commit will make use of this new functionality within the repack layer to resolve the test failure demonstrated in the previous commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-pack-objects.adoc | 25 ++++-- builtin/pack-objects.c | 116 ++++++++++++++++++++++------ packfile.c | 3 +- packfile.h | 2 + t/t5331-pack-objects-stdin.sh | 105 +++++++++++++++++++++++++ 5 files changed, 218 insertions(+), 33 deletions(-) diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index 71b9682485c38b..b78175fbe1b97b 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -94,13 +94,24 @@ base-name:: included packs (those not beginning with `^`), excluding any objects listed in the excluded packs (beginning with `^`). + -When `mode` is "follow", objects from packs not listed on stdin receive -special treatment. Objects within unlisted packs will be included if -those objects are (1) reachable from the included packs, and (2) not -found in any excluded packs. This mode is useful, for example, to -resurrect once-unreachable objects found in cruft packs to generate -packs which are closed under reachability up to the boundary set by the -excluded packs. +When `mode` is "follow" packs may additionally be prefixed with `!`, +indicating that they are excluded but not necessarily closed under +reachability. In addition to objects in included packs, the resulting +pack may include additional objects based on the following: ++ +-- +* If any packs are marked with `!`, then objects reachable from such + packs or included ones via objects outside of excluded-closed packs + will be included. In this case, all `^` packs are treated as closed + under reachability. +* Otherwise (if there are no `!` packs), objects within unlisted packs + will be included if those objects are (1) reachable from the + included packs, and (2) not found in any excluded packs. +-- ++ +This mode is useful, for example, to resurrect once-unreachable +objects found in cruft packs to generate packs which are closed under +reachability up to the boundary set by the excluded packs. + Incompatible with `--revs`, or options that imply `--revs` (such as `--all`), with the exception of `--unpacked`, which is compatible. diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 945100b405c536..7b97784d6c5de8 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -217,6 +217,7 @@ static int have_non_local_packs; static int incremental; static int ignore_packed_keep_on_disk; static int ignore_packed_keep_in_core; +static int ignore_packed_keep_in_core_open; static int ignore_packed_keep_in_core_has_cruft; static int allow_ofs_delta; static struct pack_idx_option pack_idx_opts; @@ -1618,7 +1619,8 @@ static int want_found_object(const struct object_id *oid, int exclude, /* * Then handle .keep first, as we have a fast(er) path there. */ - if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) { + if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core || + ignore_packed_keep_in_core_open) { /* * Set the flags for the kept-pack cache to be the ones we want * to ignore. @@ -1632,6 +1634,8 @@ static int want_found_object(const struct object_id *oid, int exclude, flags |= KEPT_PACK_ON_DISK; if (ignore_packed_keep_in_core) flags |= KEPT_PACK_IN_CORE; + if (ignore_packed_keep_in_core_open) + flags |= KEPT_PACK_IN_CORE_OPEN; /* * If the object is in a pack that we want to ignore, *and* we @@ -1643,6 +1647,8 @@ static int want_found_object(const struct object_id *oid, int exclude, return 0; if (ignore_packed_keep_in_core && p->pack_keep_in_core) return 0; + if (ignore_packed_keep_in_core_open && p->pack_keep_in_core_open) + return 0; if (has_object_kept_pack(p->repo, oid, flags)) return 0; } else { @@ -3742,6 +3748,7 @@ static int add_object_entry_from_pack(const struct object_id *oid, void *_data) { off_t ofs; + struct object_info oi = OBJECT_INFO_INIT; enum object_type type = OBJ_NONE; display_progress(progress_state, ++nr_seen); @@ -3749,29 +3756,34 @@ static int add_object_entry_from_pack(const struct object_id *oid, if (have_duplicate_entry(oid, 0)) return 0; - ofs = nth_packed_object_offset(p, pos); - if (!want_object_in_pack(oid, 0, &p, &ofs)) - return 0; + stdin_packs_found_nr++; - if (p) { - struct object_info oi = OBJECT_INFO_INIT; - - oi.typep = &type; - if (packed_object_info(p, ofs, &oi) < 0) { - die(_("could not get type of object %s in pack %s"), - oid_to_hex(oid), p->pack_name); - } else if (type == OBJ_COMMIT) { - struct rev_info *revs = _data; - /* - * commits in included packs are used as starting points for the - * subsequent revision walk - */ - add_pending_oid(revs, NULL, oid, 0); - } + ofs = nth_packed_object_offset(p, pos); - stdin_packs_found_nr++; + oi.typep = &type; + if (packed_object_info(p, ofs, &oi) < 0) { + die(_("could not get type of object %s in pack %s"), + oid_to_hex(oid), p->pack_name); + } else if (type == OBJ_COMMIT) { + struct rev_info *revs = _data; + /* + * commits in included packs are used as starting points + * for the subsequent revision walk + * + * Note that we do want to walk through commits that are + * present in excluded-open ('!') packs to pick up any + * objects reachable from them not present in the + * excluded-closed ('^') packs. + * + * However, we'll only add those objects to the packing + * list after checking `want_object_in_pack()` below. + */ + add_pending_oid(revs, NULL, oid, 0); } + if (!want_object_in_pack(oid, 0, &p, &ofs)) + return 0; + create_object_entry(oid, type, 0, 0, 0, p, ofs); return 0; @@ -3832,12 +3844,18 @@ static void show_commit_pack_hint(struct commit *commit, void *data) * - STDIN_PACK_EXCLUDE_CLOSED: objects in any packs with this flag * bit set should be excluded from the output pack. * - * Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE are - * used as traversal tips when invoked with --stdin-packs=follow. + * - STDIN_PACK_EXCLUDE_OPEN: objects in any packs with this flag + * bit set should be excluded from the output pack, but are not + * guaranteed to be closed under reachability. + * + * Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE or + * STDIN_PACK_EXCLUDE_OPEN are used as traversal tips when invoked + * with --stdin-packs=follow. */ enum stdin_pack_info_kind { STDIN_PACK_INCLUDE = (1<<0), STDIN_PACK_EXCLUDE_CLOSED = (1<<1), + STDIN_PACK_EXCLUDE_OPEN = (1<<2), }; struct stdin_pack_info { @@ -3862,6 +3880,17 @@ static int pack_mtime_cmp(const void *_a, const void *_b) return 0; } +static int stdin_packs_include_check_obj(struct object *obj, void *data UNUSED) +{ + return !has_object_kept_pack(to_pack.repo, &obj->oid, + KEPT_PACK_IN_CORE); +} + +static int stdin_packs_include_check(struct commit *commit, void *data) +{ + return stdin_packs_include_check_obj((struct object *)commit, data); +} + static void stdin_packs_add_pack_entries(struct strmap *packs, struct rev_info *revs) { @@ -3888,7 +3917,19 @@ static void stdin_packs_add_pack_entries(struct strmap *packs, for_each_string_list_item(item, &keys) { struct stdin_pack_info *info = item->util; - if (info->kind & STDIN_PACK_INCLUDE) + if (info->kind & STDIN_PACK_EXCLUDE_OPEN) { + /* + * When open-excluded packs ("!") are present, stop + * the parent walk at closed-excluded ("^") packs. + * Objects behind a "^" boundary are guaranteed to + * have closure and should not be rescued. + */ + revs->include_check = stdin_packs_include_check; + revs->include_check_obj = stdin_packs_include_check_obj; + } + + if ((info->kind & STDIN_PACK_INCLUDE) || + (info->kind & STDIN_PACK_EXCLUDE_OPEN)) for_each_object_in_pack(info->p, add_object_entry_from_pack, revs, @@ -3898,7 +3939,8 @@ static void stdin_packs_add_pack_entries(struct strmap *packs, string_list_clear(&keys, 0); } -static void stdin_packs_read_input(struct rev_info *revs) +static void stdin_packs_read_input(struct rev_info *revs, + enum stdin_packs_mode mode) { struct strbuf buf = STRBUF_INIT; struct strmap packs = STRMAP_INIT; @@ -3913,6 +3955,8 @@ static void stdin_packs_read_input(struct rev_info *revs) continue; else if (*key == '^') kind = STDIN_PACK_EXCLUDE_CLOSED; + else if (*key == '!' && mode == STDIN_PACKS_MODE_FOLLOW) + kind = STDIN_PACK_EXCLUDE_OPEN; if (kind != STDIN_PACK_INCLUDE) key++; @@ -3959,6 +4003,20 @@ static void stdin_packs_read_input(struct rev_info *revs) p->pack_keep_in_core = 1; } + if (info->kind & STDIN_PACK_EXCLUDE_OPEN) { + /* + * Marking excluded open packs as kept in-core + * (open) for the same reason as we marked + * exclude closed packs as kept in-core. + * + * Use a separate flag here to ensure we don't + * halt our traversal at these packs, since they + * are not guaranteed to have closure. + * + */ + p->pack_keep_in_core_open = 1; + } + info->p = p; } @@ -4002,7 +4060,15 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked) /* avoids adding objects in excluded packs */ ignore_packed_keep_in_core = 1; - stdin_packs_read_input(&revs); + if (mode == STDIN_PACKS_MODE_FOLLOW) { + /* + * In '--stdin-packs=follow' mode, additionally ignore + * objects in excluded-open packs to prevent them from + * appearing in the resulting pack. + */ + ignore_packed_keep_in_core_open = 1; + } + stdin_packs_read_input(&revs, mode); if (rev_list_unpacked) add_unreachable_loose_objects(&revs); diff --git a/packfile.c b/packfile.c index 215a23e42be0fe..076e444e32aa86 100644 --- a/packfile.c +++ b/packfile.c @@ -2246,7 +2246,8 @@ struct packed_git **packfile_store_get_kept_pack_cache(struct packfile_store *st struct packed_git *p = e->pack; if ((p->pack_keep && (flags & KEPT_PACK_ON_DISK)) || - (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE))) { + (p->pack_keep_in_core && (flags & KEPT_PACK_IN_CORE)) || + (p->pack_keep_in_core_open && (flags & KEPT_PACK_IN_CORE_OPEN))) { ALLOC_GROW(packs, nr + 1, alloc); packs[nr++] = p; } diff --git a/packfile.h b/packfile.h index 8b04a258a7b9d5..b7735c1977d8a5 100644 --- a/packfile.h +++ b/packfile.h @@ -28,6 +28,7 @@ struct packed_git { unsigned pack_local:1, pack_keep:1, pack_keep_in_core:1, + pack_keep_in_core_open:1, freshened:1, do_not_close:1, pack_promisor:1, @@ -266,6 +267,7 @@ int packfile_store_freshen_object(struct packfile_store *store, enum kept_pack_type { KEPT_PACK_ON_DISK = (1 << 0), KEPT_PACK_IN_CORE = (1 << 1), + KEPT_PACK_IN_CORE_OPEN = (1 << 2), }; /* diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh index 7eb79bc2cdb5b5..c74b5861af322f 100755 --- a/t/t5331-pack-objects-stdin.sh +++ b/t/t5331-pack-objects-stdin.sh @@ -415,4 +415,109 @@ test_expect_success '--stdin-packs=follow tolerates missing commits' ' stdin_packs__follow_with_only HEAD HEAD^{tree} ' +test_expect_success '--stdin-packs=follow with open-excluded packs' ' + test_when_finished "rm -fr repo" && + + git init repo && + ( + cd repo && + git config set maintenance.auto false && + + git branch -M main && + + # Create the following commit structure: + # + # A <-- B <-- D (main) + # ^ + # \ + # C (other) + test_commit A && + test_commit B && + git checkout -B other && + test_commit C && + git checkout main && + test_commit D && + + A="$(echo A | git pack-objects --revs $packdir/pack)" && + B="$(echo A..B | git pack-objects --revs $packdir/pack)" && + C="$(echo B..C | git pack-objects --revs $packdir/pack)" && + D="$(echo B..D | git pack-objects --revs $packdir/pack)" && + + C_ONLY="$(git rev-parse other | git pack-objects $packdir/pack)" && + + git prune-packed && + + # Create a pack using --stdin-packs=follow where: + # + # - pack D is included, + # - pack C_ONLY is excluded, but open, + # - pack B is excluded, but closed, and + # - packs A and C are unknown + # + # The resulting pack should therefore contain: + # + # - objects from the included pack D, + # - A.t (rescued via D^{tree}), and + # - C^{tree} and C.t (rescued via pack C_ONLY) + # + # , but should omit: + # + # - C (excluded via C_ONLY), + # - objects from pack B (trivially excluded-closed) + # - A and A^{tree} (ancestors of B) + P=$(git pack-objects --stdin-packs=follow $packdir/pack <<-EOF + pack-$D.pack + !pack-$C_ONLY.pack + ^pack-$B.pack + EOF + ) && + + { + objects_in_packs $D && + git rev-parse A:A.t "C^{tree}" C:C.t + } >expect.raw && + sort expect.raw >expect && + + objects_in_packs $P >actual && + test_cmp expect actual + ) +' + +test_expect_success '--stdin-packs with !-delimited pack without follow' ' + test_when_finished "rm -fr repo" && + + git init repo && + ( + test_commit A && + test_commit B && + test_commit C && + + A="$(echo A | git pack-objects --revs $packdir/pack)" && + B="$(echo A..B | git pack-objects --revs $packdir/pack)" && + C="$(echo B..C | git pack-objects --revs $packdir/pack)" && + + cat >in <<-EOF && + !pack-$A.pack + pack-$B.pack + pack-$C.pack + EOF + + # Without --stdin-packs=follow, we treat the first + # line of input as a literal packfile name, and thus + # expect pack-objects to complain of a missing pack + test_must_fail git pack-objects --stdin-packs --stdout \ + >/dev/null err && + test_grep "could not find pack .!pack-$A.pack." err && + + # With --stdin-packs=follow, we treat the second line + # of input as indicating pack-$A.pack is an excluded + # open pack, and thus expect pack-objects to succeed + P=$(git pack-objects --stdin-packs=follow $packdir/pack expect && + objects_in_packs $P >actual && + test_cmp expect actual + ) +' + test_done From 9ad29df36d7c762677b5a4ecc6a6dc229c818b2a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 27 Mar 2026 16:06:54 -0400 Subject: [PATCH 27/34] repack: mark non-MIDX packs above the split as excluded-open In 5ee86c273bf (repack: exclude cruft pack(s) from the MIDX where possible, 2025-06-23), geometric repacking learned to exclude cruft packs from the MIDX when 'repack.midxMustContainCruft' is set to 'false'. This works because packs generated with '--stdin-packs=follow' rescue any once-unreachable objects that later become reachable, making the resulting packs closed under reachability without needing the cruft pack in the MIDX. However, packs above the geometric split that were not part of the previous MIDX may not have full object closure. When such packs are marked as excluded-closed ('^'), pack-objects treats them as a reachability boundary and does not traverse through them during the follow pass, potentially leaving the resulting pack without full closure. Fix this by marking packs above the geometric split that were not in the previous MIDX as excluded-open ('!') instead of excluded-closed ('^'). This causes pack-objects to walk through their commits during the follow pass, rescuing any reachable objects not present in the closed-excluded packs. Note that MIDXs which were generated prior to this change and are unlucky enough to not be closed under reachability may still exhibit this bug, as we treat all MIDX'd packs as closed. That is true in an overwhelming number of cases, since in order to have a non-closed MIDX you would have to: - Generate a pack via an earlier geometric repack that is not closed under reachability. - Store that pack in the MIDX. - Avoid picking any commits to receive reachability bitmaps which happen to reach objects from which the missing objects are reachable. In the extremely rare chance that all of the above should happen, an all-into-one repack will resolve the issue. Unfortunately, there is no perfect way to determine whether a MIDX'd pack is closed outside of ensuring that there is a '1' bit in at least one bitmap for every bit position corresponding to objects in that pack. While this is possible to do, this approach would treat MIDX'd packs as open in cases where there is at least one object that is not reachable from the subset of commits selected for bitmapping. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 19 +++++++++++++++++-- t/t7704-repack-cruft.sh | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index f6bb04bef7264e..4c5a82c2c8d7de 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -369,8 +369,23 @@ int cmd_repack(int argc, */ for (i = 0; i < geometry.split; i++) fprintf(in, "%s\n", pack_basename(geometry.pack[i])); - for (i = geometry.split; i < geometry.pack_nr; i++) - fprintf(in, "^%s\n", pack_basename(geometry.pack[i])); + for (i = geometry.split; i < geometry.pack_nr; i++) { + const char *basename = pack_basename(geometry.pack[i]); + char marker = '^'; + + if (!midx_must_contain_cruft && + !string_list_has_string(&existing.midx_packs, + basename)) { + /* + * Assume non-MIDX'd packs are not + * necessarily closed under + * reachability. + */ + marker = '!'; + } + + fprintf(in, "%c%s\n", marker, basename); + } fclose(in); } diff --git a/t/t7704-repack-cruft.sh b/t/t7704-repack-cruft.sh index 77133395b5d0cc..9e03b04315db76 100755 --- a/t/t7704-repack-cruft.sh +++ b/t/t7704-repack-cruft.sh @@ -869,7 +869,7 @@ test_expect_success 'repack --write-midx includes cruft when already geometric' ) ' -test_expect_failure 'repack rescues once-cruft objects above geometric split' ' +test_expect_success 'repack rescues once-cruft objects above geometric split' ' git config repack.midxMustContainCruft false && test_commit reachable && From d8e34f971b31ae6583e796626c7280732fca68e1 Mon Sep 17 00:00:00 2001 From: Zakariyah Ali Date: Sat, 28 Mar 2026 00:40:19 +0100 Subject: [PATCH 28/34] t2000: modernise overall structure This test script that dates back to 2005 certainly shows its age and both its style and the way the tests are laid out do not match the modern standard. * Executables that prepare the data used to test the command should be inside the test_expect_success block in modern tests. * In modern tests, running a command that is being tested, making sure it succeeds, and inspecting other side effects that are expected, are all done in a single test_expect_success block. * A test_expect_success block in modern tests are laid out as test_expect_success 'title of the test' ' body of the test && ... body of the test ' not as test_expect_success \ 'title of the test' \ 'body of the test && ... body of the test' which is in a prehistoric style. * In modern tests, each &&-chained statement in the body of the test_expect_success block are indented with a horizontal tab, unlike prehistoric style that used 4-space indent. Signed-off-by: Zakariyah Ali Signed-off-by: Junio C Hamano --- t/t2000-conflict-when-checking-files-out.sh | 122 +++++++++++--------- 1 file changed, 66 insertions(+), 56 deletions(-) diff --git a/t/t2000-conflict-when-checking-files-out.sh b/t/t2000-conflict-when-checking-files-out.sh index f18616ad2be3ea..af199d81913f1e 100755 --- a/t/t2000-conflict-when-checking-files-out.sh +++ b/t/t2000-conflict-when-checking-files-out.sh @@ -35,30 +35,30 @@ show_files() { sed -e 's/^\([0-9]*\) [^ ]* [0-9a-f]* /tr: \1 /' } -date >path0 -mkdir path1 -date >path1/file1 - -test_expect_success \ - 'git update-index --add various paths.' \ - 'git update-index --add path0 path1/file1' - -rm -fr path0 path1 -mkdir path0 -date >path0/file0 -date >path1 +test_expect_success 'prepare files path0 and path1/file1' ' + date >path0 && + mkdir path1 && + date >path1/file1 && + git update-index --add path0 path1/file1 +' -test_expect_success \ - 'git checkout-index without -f should fail on conflicting work tree.' \ - 'test_must_fail git checkout-index -a' +test_expect_success 'prepare working tree files with D/F conflicts' ' + rm -fr path0 path1 && + mkdir path0 && + date >path0/file0 && + date >path1 +' -test_expect_success \ - 'git checkout-index with -f should succeed.' \ - 'git checkout-index -f -a' +test_expect_success 'git checkout-index without -f should fail on conflicting work tree.' ' + test_must_fail git checkout-index -a +' -test_expect_success \ - 'git checkout-index conflicting paths.' \ - 'test -f path0 && test -d path1 && test -f path1/file1' +test_expect_success 'git checkout-index with -f should succeed.' ' + git checkout-index -f -a && + test_path_is_file path0 && + test_path_is_dir path1 && + test_path_is_file path1/file1 +' test_expect_success SYMLINKS 'checkout-index -f twice with --prefix' ' mkdir -p tar/get && @@ -83,53 +83,63 @@ test_expect_success SYMLINKS 'checkout-index -f twice with --prefix' ' # path path3 is occupied by a non-directory. With "-f" it should remove # the symlink path3 and create directory path3 and file path3/file1. -mkdir path2 -date >path2/file0 -test_expect_success \ - 'git update-index --add path2/file0' \ - 'git update-index --add path2/file0' -test_expect_success \ - 'writing tree out with git write-tree' \ - 'tree1=$(git write-tree)' +test_expect_success 'prepare path2/file0 and index' ' + mkdir path2 && + date >path2/file0 && + git update-index --add path2/file0 +' + +test_expect_success 'write tree with path2/file0' ' + tree1=$(git write-tree) +' + test_debug 'show_files $tree1' -mkdir path3 -date >path3/file1 -test_expect_success \ - 'git update-index --add path3/file1' \ - 'git update-index --add path3/file1' -test_expect_success \ - 'writing tree out with git write-tree' \ - 'tree2=$(git write-tree)' +test_expect_success 'prepare path3/file1 and index' ' + mkdir path3 && + date >path3/file1 && + git update-index --add path3/file1 +' + +test_expect_success 'write tree with path3/file1' ' + tree2=$(git write-tree) +' + test_debug 'show_files $tree2' -rm -fr path3 -test_expect_success \ - 'read previously written tree and checkout.' \ - 'git read-tree -m $tree1 && git checkout-index -f -a' +test_expect_success 'read previously written tree and checkout.' ' + rm -fr path3 && + git read-tree -m $tree1 && + git checkout-index -f -a +' + test_debug 'show_files $tree1' -test_expect_success \ - 'add a symlink' \ - 'test_ln_s_add path2 path3' -test_expect_success \ - 'writing tree out with git write-tree' \ - 'tree3=$(git write-tree)' +test_expect_success 'add a symlink' ' + test_ln_s_add path2 path3 +' + +test_expect_success 'write tree with symlink path3' ' + tree3=$(git write-tree) +' + test_debug 'show_files $tree3' # Morten says "Got that?" here. # Test begins. -test_expect_success \ - 'read previously written tree and checkout.' \ - 'git read-tree $tree2 && git checkout-index -f -a' +test_expect_success 'read previously written tree and checkout.' ' + git read-tree $tree2 && + git checkout-index -f -a +' + test_debug 'show_files $tree2' -test_expect_success \ - 'checking out conflicting path with -f' \ - 'test ! -h path2 && test -d path2 && - test ! -h path3 && test -d path3 && - test ! -h path2/file0 && test -f path2/file0 && - test ! -h path3/file1 && test -f path3/file1' +test_expect_success 'checking out conflicting path with -f' ' + test_path_is_dir_not_symlink path2 && + test_path_is_dir_not_symlink path3 && + test_path_is_file_not_symlink path2/file0 && + test_path_is_file_not_symlink path3/file1 +' test_done From 849988bc7499d11f127305a8f20a3a054eb0b0c0 Mon Sep 17 00:00:00 2001 From: Trieu Huynh Date: Sat, 28 Mar 2026 22:59:35 +0900 Subject: [PATCH 29/34] t6101: avoid suppressing git's exit code Update t6101-rev-parse-parents.sh to redirect git-rev-parse output to a temporary file instead of piping it directly to not hide the exit code of git commands behind pipes, as a crash in git might go unnoticed. Signed-off-by: Trieu Huynh Signed-off-by: Junio C Hamano --- t/t6101-rev-parse-parents.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/t/t6101-rev-parse-parents.sh b/t/t6101-rev-parse-parents.sh index 5f55ab98d352fc..728188971744c4 100755 --- a/t/t6101-rev-parse-parents.sh +++ b/t/t6101-rev-parse-parents.sh @@ -39,7 +39,8 @@ test_expect_success 'setup' ' ' test_expect_success 'start is valid' ' - git rev-parse start | grep "^$OID_REGEX$" + git rev-parse start >actual && + test_grep "^$OID_REGEX$" actual ' test_expect_success 'start^0' ' From 0f0ce0762503cb8f58a3ce07052a639e36e07ed5 Mon Sep 17 00:00:00 2001 From: Shreyansh Paliwal Date: Sat, 28 Mar 2026 20:51:58 +0530 Subject: [PATCH 30/34] doc: gitignore: clarify pattern base for info/exclude and core.excludesFile The pattern format section describes how patterns are interpreted relative to the location of a .gitignore file, but does not mention the behavior for exclude sources outside the working tree. Clarify that patterns from $GIT_DIR/info/exclude and core.excludesFile are treated as if they are specified at the root of the working tree, so a leading '/' anchors matches at the repository root. Reported-by: Dan Drake Signed-off-by: Shreyansh Paliwal Signed-off-by: Junio C Hamano --- Documentation/gitignore.adoc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/gitignore.adoc b/Documentation/gitignore.adoc index 9fccab4ae8df23..a3d24e5c34aabc 100644 --- a/Documentation/gitignore.adoc +++ b/Documentation/gitignore.adoc @@ -96,6 +96,11 @@ PATTERN FORMAT particular `.gitignore` file itself. Otherwise the pattern may also match at any level below the `.gitignore` level. + - Patterns read from exclude sources that are outside the working tree, + such as $GIT_DIR/info/exclude and core.excludesFile, are treated as if + they are specified at the root of the working tree, i.e. a leading "/" + in such patterns anchors the match at the root of the repository. + - If there is a separator at the end of the pattern then the pattern will only match directories, otherwise the pattern can match both files and directories. From 3402850ee16f5a7a05d68cb29271a0e558659aaa Mon Sep 17 00:00:00 2001 From: Quentin Bernet Date: Mon, 30 Mar 2026 13:24:35 +0000 Subject: [PATCH 31/34] docs: fix "git stash [push]" documentation Both the synopsis and explanation are incorrect and contradict each other. The synopsis claims "push" can only be omitted when you do not give any options and arguments. The explanation correctly claims that non-option arguments are not allowed, except pathspec elements preceded by double hyphens. But it also adds "-p" to the list of exceptions, even though it is an option argument. Signed-off-by: Quentin Bernet Signed-off-by: Junio C Hamano --- Documentation/git-stash.adoc | 10 ++++------ builtin/stash.c | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/Documentation/git-stash.adoc b/Documentation/git-stash.adoc index 235d57ddd8f5d1..b05c990ecd8759 100644 --- a/Documentation/git-stash.adoc +++ b/Documentation/git-stash.adoc @@ -14,10 +14,10 @@ git stash drop [-q | --quiet] [] git stash pop [--index] [-q | --quiet] [] git stash apply [--index] [-q | --quiet] [] git stash branch [] -git stash [push [-p | --patch] [-S | --staged] [-k | --[no-]keep-index] [-q | --quiet] +git stash [push] [-p | --patch] [-S | --staged] [-k | --[no-]keep-index] [-q | --quiet] [-u | --include-untracked] [-a | --all] [(-m | --message) ] [--pathspec-from-file= [--pathspec-file-nul]] - [--] [...]] + [--] [...] git stash save [-p | --patch] [-S | --staged] [-k | --[no-]keep-index] [-q | --quiet] [-u | --include-untracked] [-a | --all] [] git stash clear @@ -60,10 +60,8 @@ COMMANDS the description along with the stashed state. + For quickly making a snapshot, you can omit "push". In this mode, -non-option arguments are not allowed to prevent a misspelled -subcommand from making an unwanted stash entry. The two exceptions to this -are `stash -p` which acts as alias for `stash push -p` and pathspec elements, -which are allowed after a double hyphen `--` for disambiguation. +pathspec elements are only allowed after a double hyphen `--` +to prevent a misspelled subcommand from making an unwanted stash entry. `save [-p | --patch] [-S | --staged] [-k | --[no-]keep-index] [-u | --include-untracked] [-a | --all] [-q | --quiet] []`:: diff --git a/builtin/stash.c b/builtin/stash.c index 95c5005b0b2197..0d27b2fb1fcb67 100644 --- a/builtin/stash.c +++ b/builtin/stash.c @@ -50,10 +50,10 @@ #define BUILTIN_STASH_STORE_USAGE \ N_("git stash store [(-m | --message) ] [-q | --quiet] ") #define BUILTIN_STASH_PUSH_USAGE \ - N_("git stash [push [-p | --patch] [-S | --staged] [-k | --[no-]keep-index] [-q | --quiet]\n" \ + N_("git stash [push] [-p | --patch] [-S | --staged] [-k | --[no-]keep-index] [-q | --quiet]\n" \ " [-u | --include-untracked] [-a | --all] [(-m | --message) ]\n" \ " [--pathspec-from-file= [--pathspec-file-nul]]\n" \ - " [--] [...]]") + " [--] [...]") #define BUILTIN_STASH_SAVE_USAGE \ N_("git stash save [-p | --patch] [-S | --staged] [-k | --[no-]keep-index] [-q | --quiet]\n" \ " [-u | --include-untracked] [-a | --all] []") From 66dd13f3f72e8cad1327b6b134dec079936c4b07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20A=C3=9Fhauer?= Date: Mon, 6 Apr 2026 05:45:29 +0000 Subject: [PATCH 32/34] unify and bump _WIN32_WINNT definition to Windows 8.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Git for Windows doesn't support anything prior to Windows 8.1 since 2.47.0 and Git followed along with commits like ce6ccba (mingw: drop Windows 7-specific work-around, 2025-08-04). There is no need to pretend to the compiler that we still support Windows Vista, just to lock us out of easy access to newer APIs. There is also no need to have conflicting and unused definitions claiming we support some versions of Windows XP or even Windows NT 4.0. Bump all definitions of _WIN32_WINNT to a realistic value of Windows 8.1. This will also simplify code for a followup commit that will improve cpu core detection on multi-socket systems. Signed-off-by: Matthias Aßhauer Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- compat/mingw.c | 2 +- compat/nedmalloc/malloc.c.h | 2 +- compat/poll/poll.c | 4 ++-- compat/posix.h | 2 +- compat/win32/flush.c | 2 ++ 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/compat/mingw.c b/compat/mingw.c index 338ec3535edf41..2023c16db65742 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -2464,7 +2464,7 @@ int mingw_rename(const char *pold, const char *pnew) if (supports_file_rename_info_ex) { /* * Our minimum required Windows version is still set to Windows - * Vista. We thus have to declare required infrastructure for + * 8.1. We thus have to declare required infrastructure for * FileRenameInfoEx ourselves until we bump _WIN32_WINNT to * 0x0A00. Furthermore, we have to handle cases where the * FileRenameInfoEx call isn't supported yet. diff --git a/compat/nedmalloc/malloc.c.h b/compat/nedmalloc/malloc.c.h index 814845d4b33fc8..e0c567586c878c 100644 --- a/compat/nedmalloc/malloc.c.h +++ b/compat/nedmalloc/malloc.c.h @@ -500,7 +500,7 @@ MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP #ifdef WIN32 #define WIN32_LEAN_AND_MEAN #ifndef _WIN32_WINNT -#define _WIN32_WINNT 0x403 +#define _WIN32_WINNT 0x603 #endif #include #define HAVE_MMAP 1 diff --git a/compat/poll/poll.c b/compat/poll/poll.c index a2becd16cd0bd0..ea362b4a8e2340 100644 --- a/compat/poll/poll.c +++ b/compat/poll/poll.c @@ -20,7 +20,7 @@ #define DISABLE_SIGN_COMPARE_WARNINGS -/* To bump the minimum Windows version to Windows Vista */ +/* To bump the minimum Windows version to Windows 8.1 */ #include "git-compat-util.h" /* Tell gcc not to warn about the (nfd < 0) tests, below. */ @@ -41,7 +41,7 @@ #if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ # define WIN32_NATIVE # if defined (_MSC_VER) && !defined(_WIN32_WINNT) -# define _WIN32_WINNT 0x0502 +# define _WIN32_WINNT 0x0603 # endif # include # include diff --git a/compat/posix.h b/compat/posix.h index 3c611d2736c47a..94699a03fa5d69 100644 --- a/compat/posix.h +++ b/compat/posix.h @@ -76,7 +76,7 @@ #if defined(WIN32) && !defined(__CYGWIN__) /* Both MinGW and MSVC */ # if !defined(_WIN32_WINNT) -# define _WIN32_WINNT 0x0600 +# define _WIN32_WINNT 0x0603 # endif #define WIN32_LEAN_AND_MEAN /* stops windows.h including winsock.h */ #include diff --git a/compat/win32/flush.c b/compat/win32/flush.c index 291f90ea9406ba..7244ff69ac08b7 100644 --- a/compat/win32/flush.c +++ b/compat/win32/flush.c @@ -6,7 +6,9 @@ int win32_fsync_no_flush(int fd) { IO_STATUS_BLOCK io_status; +#ifndef FLUSH_FLAGS_FILE_DATA_ONLY #define FLUSH_FLAGS_FILE_DATA_ONLY 1 +#endif DECLARE_PROC_ADDR(ntdll.dll, NTSTATUS, NTAPI, NtFlushBuffersFileEx, HANDLE FileHandle, ULONG Flags, PVOID Parameters, ULONG ParameterSize, From 2f8c3f6a5a6d6a3de205be709e1a598b9d4b0b3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20A=C3=9Fhauer?= Date: Mon, 6 Apr 2026 05:45:30 +0000 Subject: [PATCH 33/34] compat/winansi: drop pre-Vista workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1edeb9a (Win32: warn if the console font doesn't support Unicode, 2014-06-10) introduced both code to detect the current console font on Windows Vista and newer and a fallback for older systems to detect the default console font and issue a warning if that font doesn't support unicode. Since we haven't supported any Windows older than Vista in almost a decade, we don't need to keep the workaround. Signed-off-by: Matthias Aßhauer Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- compat/winansi.c | 37 ++++--------------------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/compat/winansi.c b/compat/winansi.c index ac2ffb78691a7d..3ce190093901b4 100644 --- a/compat/winansi.c +++ b/compat/winansi.c @@ -32,47 +32,18 @@ static int non_ascii_used = 0; static HANDLE hthread, hread, hwrite; static HANDLE hconsole1, hconsole2; -#ifdef __MINGW32__ -#if !defined(__MINGW64_VERSION_MAJOR) || __MINGW64_VERSION_MAJOR < 5 -typedef struct _CONSOLE_FONT_INFOEX { - ULONG cbSize; - DWORD nFont; - COORD dwFontSize; - UINT FontFamily; - UINT FontWeight; - WCHAR FaceName[LF_FACESIZE]; -} CONSOLE_FONT_INFOEX, *PCONSOLE_FONT_INFOEX; -#endif -#endif - static void warn_if_raster_font(void) { DWORD fontFamily = 0; - DECLARE_PROC_ADDR(kernel32.dll, BOOL, WINAPI, - GetCurrentConsoleFontEx, HANDLE, BOOL, - PCONSOLE_FONT_INFOEX); + CONSOLE_FONT_INFOEX cfi; /* don't bother if output was ascii only */ if (!non_ascii_used) return; - /* GetCurrentConsoleFontEx is available since Vista */ - if (INIT_PROC_ADDR(GetCurrentConsoleFontEx)) { - CONSOLE_FONT_INFOEX cfi; - cfi.cbSize = sizeof(cfi); - if (GetCurrentConsoleFontEx(console, 0, &cfi)) - fontFamily = cfi.FontFamily; - } else { - /* pre-Vista: check default console font in registry */ - HKEY hkey; - if (ERROR_SUCCESS == RegOpenKeyExA(HKEY_CURRENT_USER, "Console", - 0, KEY_READ, &hkey)) { - DWORD size = sizeof(fontFamily); - RegQueryValueExA(hkey, "FontFamily", NULL, NULL, - (LPVOID) &fontFamily, &size); - RegCloseKey(hkey); - } - } + cfi.cbSize = sizeof(cfi); + if (GetCurrentConsoleFontEx(console, 0, &cfi)) + fontFamily = cfi.FontFamily; if (!(fontFamily & TMPF_TRUETYPE)) { const wchar_t *msg = L"\nWarning: Your console font probably " From 1adf5bca8c3cf778103548b9355777cf2d12efdd Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 6 Apr 2026 15:42:30 -0700 Subject: [PATCH 34/34] A handful before -rc1 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.54.0.adoc | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Documentation/RelNotes/2.54.0.adoc b/Documentation/RelNotes/2.54.0.adoc index 04c038f035ae76..c692dddb4a1062 100644 --- a/Documentation/RelNotes/2.54.0.adoc +++ b/Documentation/RelNotes/2.54.0.adoc @@ -119,6 +119,8 @@ UI, Workflows & Features * "git replay" (experimental) learns, in addition to "pick" and "replay", a new operating mode "revert". + * git replay now supports replaying down to the root commit. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -283,6 +285,21 @@ Performance, Internal Implementation, Development Support etc. is buggy and breaks our existing tests, which unfortunately have been rewritten to avoid triggering the bug. + * Object name handling (disambiguation and abbreviation) has been + refactored to be backend-generic, moving logic into the respective + object database backends. + + * pack-objects's --stdin-packs=follow mode learns to handle + excluded-but-open packs. + + * A few code paths that spawned child processes for network + connection weren't wait(2)ing for their children and letting "init" + reap them instead; they have been tightened. + + * Adjust the codebase for C23 that changes functions like strchr() + that discarded constness when they return a pointer into a const + string to preserve constness. + Fixes since v2.53 ----------------- @@ -512,3 +529,6 @@ Fixes since v2.53 (merge 37182267a0 kh/doc-interpret-trailers-1 later to maint). (merge f64c50e768 jc/rerere-modern-strbuf-handling later to maint). (merge 699248d89e th/t8003-unhide-git-failures later to maint). + (merge d8e34f971b za/t2000-modernise later to maint). + (merge 849988bc74 th/t6101-unhide-git-failures later to maint). + (merge 0f0ce07625 sp/doc-gitignore-oowt later to maint).