@@ -91,54 +91,271 @@ static void replace_dir(struct strbuf *buf, int len, const char *newdir)
9191 buf -> buf [newlen ] = '/' ;
9292}
9393
94- static const char * common_list [] = {
95- "/branches" , "/hooks" , "/info" , "!/logs" , "/lost-found" ,
96- "/objects" , "/refs" , "/remotes" , "/worktrees" , "/rr-cache" , "/svn" ,
97- "config" , "!gc.pid" , "packed-refs" , "shallow" ,
98- NULL
94+ struct common_dir {
95+ /* Not considered garbage for report_linked_checkout_garbage */
96+ unsigned ignore_garbage :1 ;
97+ unsigned is_dir :1 ;
98+ /* Not common even though its parent is */
99+ unsigned exclude :1 ;
100+ const char * dirname ;
99101};
100102
101- static void update_common_dir (struct strbuf * buf , int git_dir_len )
103+ static struct common_dir common_list [] = {
104+ { 0 , 1 , 0 , "branches" },
105+ { 0 , 1 , 0 , "hooks" },
106+ { 0 , 1 , 0 , "info" },
107+ { 0 , 0 , 1 , "info/sparse-checkout" },
108+ { 1 , 1 , 0 , "logs" },
109+ { 1 , 1 , 1 , "logs/HEAD" },
110+ { 0 , 1 , 1 , "logs/refs/bisect" },
111+ { 0 , 1 , 0 , "lost-found" },
112+ { 0 , 1 , 0 , "objects" },
113+ { 0 , 1 , 0 , "refs" },
114+ { 0 , 1 , 1 , "refs/bisect" },
115+ { 0 , 1 , 0 , "remotes" },
116+ { 0 , 1 , 0 , "worktrees" },
117+ { 0 , 1 , 0 , "rr-cache" },
118+ { 0 , 1 , 0 , "svn" },
119+ { 0 , 0 , 0 , "config" },
120+ { 1 , 0 , 0 , "gc.pid" },
121+ { 0 , 0 , 0 , "packed-refs" },
122+ { 0 , 0 , 0 , "shallow" },
123+ { 0 , 0 , 0 , NULL }
124+ };
125+
126+ /*
127+ * A compressed trie. A trie node consists of zero or more characters that
128+ * are common to all elements with this prefix, optionally followed by some
129+ * children. If value is not NULL, the trie node is a terminal node.
130+ *
131+ * For example, consider the following set of strings:
132+ * abc
133+ * def
134+ * definite
135+ * definition
136+ *
137+ * The trie would look look like:
138+ * root: len = 0, children a and d non-NULL, value = NULL.
139+ * a: len = 2, contents = bc, value = (data for "abc")
140+ * d: len = 2, contents = ef, children i non-NULL, value = (data for "def")
141+ * i: len = 3, contents = nit, children e and i non-NULL, value = NULL
142+ * e: len = 0, children all NULL, value = (data for "definite")
143+ * i: len = 2, contents = on, children all NULL,
144+ * value = (data for "definition")
145+ */
146+ struct trie {
147+ struct trie * children [256 ];
148+ int len ;
149+ char * contents ;
150+ void * value ;
151+ };
152+
153+ static struct trie * make_trie_node (const char * key , void * value )
102154{
103- char * base = buf -> buf + git_dir_len ;
104- const char * * p ;
105-
106- if (is_dir_file (base , "logs" , "HEAD" ) ||
107- is_dir_file (base , "info" , "sparse-checkout" ))
108- return ; /* keep this in $GIT_DIR */
109- for (p = common_list ; * p ; p ++ ) {
110- const char * path = * p ;
111- int is_dir = 0 ;
112- if (* path == '!' )
113- path ++ ;
114- if (* path == '/' ) {
115- path ++ ;
116- is_dir = 1 ;
155+ struct trie * new_node = xcalloc (1 , sizeof (* new_node ));
156+ new_node -> len = strlen (key );
157+ if (new_node -> len ) {
158+ new_node -> contents = xmalloc (new_node -> len );
159+ memcpy (new_node -> contents , key , new_node -> len );
160+ }
161+ new_node -> value = value ;
162+ return new_node ;
163+ }
164+
165+ /*
166+ * Add a key/value pair to a trie. The key is assumed to be \0-terminated.
167+ * If there was an existing value for this key, return it.
168+ */
169+ static void * add_to_trie (struct trie * root , const char * key , void * value )
170+ {
171+ struct trie * child ;
172+ void * old ;
173+ int i ;
174+
175+ if (!* key ) {
176+ /* we have reached the end of the key */
177+ old = root -> value ;
178+ root -> value = value ;
179+ return old ;
180+ }
181+
182+ for (i = 0 ; i < root -> len ; i ++ ) {
183+ if (root -> contents [i ] == key [i ])
184+ continue ;
185+
186+ /*
187+ * Split this node: child will contain this node's
188+ * existing children.
189+ */
190+ child = malloc (sizeof (* child ));
191+ memcpy (child -> children , root -> children , sizeof (root -> children ));
192+
193+ child -> len = root -> len - i - 1 ;
194+ if (child -> len ) {
195+ child -> contents = xstrndup (root -> contents + i + 1 ,
196+ child -> len );
117197 }
118- if (is_dir && dir_prefix (base , path )) {
119- replace_dir (buf , git_dir_len , get_git_common_dir ());
120- return ;
198+ child -> value = root -> value ;
199+ root -> value = NULL ;
200+ root -> len = i ;
201+
202+ memset (root -> children , 0 , sizeof (root -> children ));
203+ root -> children [(unsigned char )root -> contents [i ]] = child ;
204+
205+ /* This is the newly-added child. */
206+ root -> children [(unsigned char )key [i ]] =
207+ make_trie_node (key + i + 1 , value );
208+ return NULL ;
209+ }
210+
211+ /* We have matched the entire compressed section */
212+ if (key [i ]) {
213+ child = root -> children [(unsigned char )key [root -> len ]];
214+ if (child ) {
215+ return add_to_trie (child , key + root -> len + 1 , value );
216+ } else {
217+ child = make_trie_node (key + root -> len + 1 , value );
218+ root -> children [(unsigned char )key [root -> len ]] = child ;
219+ return NULL ;
121220 }
122- if (!is_dir && !strcmp (base , path )) {
123- replace_dir (buf , git_dir_len , get_git_common_dir ());
124- return ;
221+ }
222+
223+ old = root -> value ;
224+ root -> value = value ;
225+ return old ;
226+ }
227+
228+ typedef int (* match_fn )(const char * unmatched , void * data , void * baton );
229+
230+ /*
231+ * Search a trie for some key. Find the longest /-or-\0-terminated
232+ * prefix of the key for which the trie contains a value. Call fn
233+ * with the unmatched portion of the key and the found value, and
234+ * return its return value. If there is no such prefix, return -1.
235+ *
236+ * The key is partially normalized: consecutive slashes are skipped.
237+ *
238+ * For example, consider the trie containing only [refs,
239+ * refs/worktree] (both with values).
240+ *
241+ * | key | unmatched | val from node | return value |
242+ * |-----------------|------------|---------------|--------------|
243+ * | a | not called | n/a | -1 |
244+ * | refs | \0 | refs | as per fn |
245+ * | refs/ | / | refs | as per fn |
246+ * | refs/w | /w | refs | as per fn |
247+ * | refs/worktree | \0 | refs/worktree | as per fn |
248+ * | refs/worktree/ | / | refs/worktree | as per fn |
249+ * | refs/worktree/a | /a | refs/worktree | as per fn |
250+ * |-----------------|------------|---------------|--------------|
251+ *
252+ */
253+ static int trie_find (struct trie * root , const char * key , match_fn fn ,
254+ void * baton )
255+ {
256+ int i ;
257+ int result ;
258+ struct trie * child ;
259+
260+ if (!* key ) {
261+ /* we have reached the end of the key */
262+ if (root -> value && !root -> len )
263+ return fn (key , root -> value , baton );
264+ else
265+ return -1 ;
266+ }
267+
268+ for (i = 0 ; i < root -> len ; i ++ ) {
269+ /* Partial path normalization: skip consecutive slashes. */
270+ if (key [i ] == '/' && key [i + 1 ] == '/' ) {
271+ key ++ ;
272+ continue ;
125273 }
274+ if (root -> contents [i ] != key [i ])
275+ return -1 ;
126276 }
277+
278+ /* Matched the entire compressed section */
279+ key += i ;
280+ if (!* key )
281+ /* End of key */
282+ return fn (key , root -> value , baton );
283+
284+ /* Partial path normalization: skip consecutive slashes */
285+ while (key [0 ] == '/' && key [1 ] == '/' )
286+ key ++ ;
287+
288+ child = root -> children [(unsigned char )* key ];
289+ if (child )
290+ result = trie_find (child , key + 1 , fn , baton );
291+ else
292+ result = -1 ;
293+
294+ if (result >= 0 || (* key != '/' && * key != 0 ))
295+ return result ;
296+ if (root -> value )
297+ return fn (key , root -> value , baton );
298+ else
299+ return -1 ;
300+ }
301+
302+ static struct trie common_trie ;
303+ static int common_trie_done_setup ;
304+
305+ static void init_common_trie (void )
306+ {
307+ struct common_dir * p ;
308+
309+ if (common_trie_done_setup )
310+ return ;
311+
312+ for (p = common_list ; p -> dirname ; p ++ )
313+ add_to_trie (& common_trie , p -> dirname , p );
314+
315+ common_trie_done_setup = 1 ;
316+ }
317+
318+ /*
319+ * Helper function for update_common_dir: returns 1 if the dir
320+ * prefix is common.
321+ */
322+ static int check_common (const char * unmatched , void * value , void * baton )
323+ {
324+ struct common_dir * dir = value ;
325+
326+ if (!dir )
327+ return 0 ;
328+
329+ if (dir -> is_dir && (unmatched [0 ] == 0 || unmatched [0 ] == '/' ))
330+ return !dir -> exclude ;
331+
332+ if (!dir -> is_dir && unmatched [0 ] == 0 )
333+ return !dir -> exclude ;
334+
335+ return 0 ;
336+ }
337+
338+ static void update_common_dir (struct strbuf * buf , int git_dir_len )
339+ {
340+ char * base = buf -> buf + git_dir_len ;
341+ init_common_trie ();
342+ if (trie_find (& common_trie , base , check_common , NULL ) > 0 )
343+ replace_dir (buf , git_dir_len , get_git_common_dir ());
127344}
128345
129346void report_linked_checkout_garbage (void )
130347{
131348 struct strbuf sb = STRBUF_INIT ;
132- const char * * p ;
349+ const struct common_dir * p ;
133350 int len ;
134351
135352 if (!git_common_dir_env )
136353 return ;
137354 strbuf_addf (& sb , "%s/" , get_git_dir ());
138355 len = sb .len ;
139- for (p = common_list ; * p ; p ++ ) {
140- const char * path = * p ;
141- if (* path == '!' )
356+ for (p = common_list ; p -> dirname ; p ++ ) {
357+ const char * path = p -> dirname ;
358+ if (p -> ignore_garbage )
142359 continue ;
143360 strbuf_setlen (& sb , len );
144361 strbuf_addstr (& sb , path );
0 commit comments