Skip to content

Commit 4e01d1d

Browse files
mjcheethamdscho
authored andcommitted
maintenance: add new cache-local-objects maintenance task (#720)
Introduce a new maintenance task, `cache-local-objects`, that operates on Scalar or VFS for Git repositories with a per-volume, shared object cache (specified by `gvfs.sharedCache`) to migrate packfiles and loose objects from the repository object directory to the shared cache. Older versions of `microsoft/git` incorrectly placed packfiles in the repository object directory instead of the shared cache; this task will help clean up existing clones impacted by that issue. Fixes #716
2 parents 2e65b96 + 5de8f7c commit 4e01d1d

File tree

4 files changed

+330
-0
lines changed

4 files changed

+330
-0
lines changed

Documentation/git-maintenance.adoc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ task:
7070
* `prefetch`: hourly.
7171
* `loose-objects`: daily.
7272
* `incremental-repack`: daily.
73+
* `cache-local-objects`: weekly.
7374
--
7475
+
7576
`git maintenance register` will also disable foreground maintenance by
@@ -185,6 +186,13 @@ worktree-prune::
185186
The `worktree-prune` task deletes stale or broken worktrees. See
186187
linkgit:git-worktree[1] for more information.
187188

189+
cache-local-objects::
190+
The `cache-local-objects` task only operates on Scalar or VFS for Git
191+
repositories (cloned with either `scalar clone` or `gvfs clone`) that
192+
have the `gvfs.sharedCache` configuration setting present. This task
193+
migrates pack files and loose objects from the repository's object
194+
directory in to the shared volume cache.
195+
188196
OPTIONS
189197
-------
190198
--auto::

builtin/gc.c

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313
#define USE_THE_REPOSITORY_VARIABLE
1414
#define DISABLE_SIGN_COMPARE_WARNINGS
1515

16+
#include "git-compat-util.h"
1617
#include "builtin.h"
1718
#include "abspath.h"
19+
#include "copy.h"
1820
#include "date.h"
1921
#include "dir.h"
2022
#include "environment.h"
@@ -264,6 +266,7 @@ enum maintenance_task_label {
264266
TASK_REFLOG_EXPIRE,
265267
TASK_WORKTREE_PRUNE,
266268
TASK_RERERE_GC,
269+
TASK_CACHE_LOCAL_OBJS,
267270

268271
/* Leave as final value */
269272
TASK__COUNT
@@ -1707,6 +1710,186 @@ static int geometric_repack_auto_condition(struct gc_config *cfg UNUSED)
17071710
return ret;
17081711
}
17091712

1713+
static void link_or_copy_or_die(const char *src, const char *dst)
1714+
{
1715+
if (!link(src, dst))
1716+
return;
1717+
1718+
/* Use copy operation if src and dst are on different file systems. */
1719+
if (errno != EXDEV)
1720+
warning_errno(_("failed to link '%s' to '%s'"), src, dst);
1721+
1722+
if (copy_file(dst, src, 0444))
1723+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1724+
}
1725+
1726+
static void rename_or_copy_or_die(const char *src, const char *dst)
1727+
{
1728+
if (!rename(src, dst))
1729+
return;
1730+
1731+
/* Use copy and delete if src and dst are on different file systems. */
1732+
if (errno != EXDEV)
1733+
warning_errno(_("failed to move '%s' to '%s'"), src, dst);
1734+
1735+
if (copy_file(dst, src, 0444))
1736+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1737+
1738+
if (unlink(src))
1739+
die_errno(_("failed to delete '%s'"), src);
1740+
}
1741+
1742+
static void migrate_pack(const char *srcdir, const char *dstdir,
1743+
const char *pack_filename)
1744+
{
1745+
size_t basenamelen, srclen, dstlen;
1746+
struct strbuf src = STRBUF_INIT, dst = STRBUF_INIT;
1747+
struct {
1748+
const char *ext;
1749+
unsigned move:1;
1750+
} files[] = {
1751+
{".pack", 0},
1752+
{".keep", 0},
1753+
{".rev", 0},
1754+
{".idx", 1}, /* The index file must be atomically moved last. */
1755+
};
1756+
1757+
trace2_region_enter("maintenance", "migrate_pack", the_repository);
1758+
1759+
basenamelen = strlen(pack_filename) - 5; /* .pack */
1760+
strbuf_addstr(&src, srcdir);
1761+
strbuf_addch(&src, '/');
1762+
strbuf_add(&src, pack_filename, basenamelen);
1763+
strbuf_addstr(&src, ".idx");
1764+
1765+
/* A pack without an index file is not yet ready to be migrated. */
1766+
if (!file_exists(src.buf))
1767+
goto cleanup;
1768+
1769+
strbuf_setlen(&src, src.len - 4 /* .idx */);
1770+
strbuf_addstr(&dst, dstdir);
1771+
strbuf_addch(&dst, '/');
1772+
strbuf_add(&dst, pack_filename, basenamelen);
1773+
1774+
srclen = src.len;
1775+
dstlen = dst.len;
1776+
1777+
/* Move or copy files from the source directory to the destination. */
1778+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1779+
strbuf_setlen(&src, srclen);
1780+
strbuf_addstr(&src, files[i].ext);
1781+
1782+
if (!file_exists(src.buf))
1783+
continue;
1784+
1785+
strbuf_setlen(&dst, dstlen);
1786+
strbuf_addstr(&dst, files[i].ext);
1787+
1788+
if (files[i].move)
1789+
rename_or_copy_or_die(src.buf, dst.buf);
1790+
else
1791+
link_or_copy_or_die(src.buf, dst.buf);
1792+
}
1793+
1794+
/*
1795+
* Now the pack and all associated files exist at the destination we can
1796+
* now clean up the files in the source directory.
1797+
*/
1798+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1799+
/* Files that were moved rather than copied have no clean up. */
1800+
if (files[i].move)
1801+
continue;
1802+
1803+
strbuf_setlen(&src, srclen);
1804+
strbuf_addstr(&src, files[i].ext);
1805+
1806+
/* Files that never existed in originally have no clean up.*/
1807+
if (!file_exists(src.buf))
1808+
continue;
1809+
1810+
if (unlink(src.buf))
1811+
warning_errno(_("failed to delete '%s'"), src.buf);
1812+
}
1813+
1814+
cleanup:
1815+
strbuf_release(&src);
1816+
strbuf_release(&dst);
1817+
1818+
trace2_region_leave("maintenance", "migrate_pack", the_repository);
1819+
}
1820+
1821+
static void move_pack_to_shared_cache(const char *full_path, size_t full_path_len,
1822+
const char *file_name, void *data)
1823+
{
1824+
char *srcdir;
1825+
const char *dstdir = (const char *)data;
1826+
1827+
/* We only care about the actual pack files here.
1828+
* The associated .idx, .keep, .rev files will be copied in tandem
1829+
* with the pack file, with the index file being moved last.
1830+
* The original locations of the non-index files will only deleted
1831+
* once all other files have been copied/moved.
1832+
*/
1833+
if (!ends_with(file_name, ".pack"))
1834+
return;
1835+
1836+
srcdir = xstrndup(full_path, full_path_len - strlen(file_name) - 1);
1837+
1838+
migrate_pack(srcdir, dstdir, file_name);
1839+
1840+
free(srcdir);
1841+
}
1842+
1843+
static int move_loose_object_to_shared_cache(const struct object_id *oid,
1844+
const char *path,
1845+
UNUSED void *data)
1846+
{
1847+
struct stat st;
1848+
struct strbuf dst = STRBUF_INIT;
1849+
char *hex = oid_to_hex(oid);
1850+
1851+
strbuf_addf(&dst, "%s/%.2s/", shared_object_dir, hex);
1852+
1853+
if (stat(dst.buf, &st)) {
1854+
if (mkdir(dst.buf, 0777))
1855+
die_errno(_("failed to create directory '%s'"), dst.buf);
1856+
} else if (!S_ISDIR(st.st_mode))
1857+
die(_("expected '%s' to be a directory"), dst.buf);
1858+
1859+
strbuf_addstr(&dst, hex+2);
1860+
rename_or_copy_or_die(path, dst.buf);
1861+
1862+
strbuf_release(&dst);
1863+
return 0;
1864+
}
1865+
1866+
static int maintenance_task_cache_local_objs(UNUSED struct maintenance_run_opts *opts,
1867+
UNUSED struct gc_config *cfg)
1868+
{
1869+
struct strbuf dstdir = STRBUF_INIT;
1870+
struct repository *r = the_repository;
1871+
1872+
/* This task is only applicable with a VFS/Scalar shared cache. */
1873+
if (!shared_object_dir)
1874+
return 0;
1875+
1876+
/* If the dest is the same as the local odb path then we do nothing. */
1877+
if (!fspathcmp(r->objects->sources->path, shared_object_dir))
1878+
goto cleanup;
1879+
1880+
strbuf_addf(&dstdir, "%s/pack", shared_object_dir);
1881+
1882+
for_each_file_in_pack_dir(r->objects->sources->path, move_pack_to_shared_cache,
1883+
dstdir.buf);
1884+
1885+
for_each_loose_object(r->objects, move_loose_object_to_shared_cache, NULL,
1886+
FOR_EACH_OBJECT_LOCAL_ONLY);
1887+
1888+
cleanup:
1889+
strbuf_release(&dstdir);
1890+
return 0;
1891+
}
1892+
17101893
typedef int (*maintenance_task_fn)(struct maintenance_run_opts *opts,
17111894
struct gc_config *cfg);
17121895
typedef int (*maintenance_auto_fn)(struct gc_config *cfg);
@@ -1785,6 +1968,10 @@ static const struct maintenance_task tasks[] = {
17851968
.background = maintenance_task_rerere_gc,
17861969
.auto_condition = rerere_gc_condition,
17871970
},
1971+
[TASK_CACHE_LOCAL_OBJS] = {
1972+
"cache-local-objects",
1973+
maintenance_task_cache_local_objs,
1974+
},
17881975
};
17891976

17901977
enum task_phase {
@@ -1911,6 +2098,10 @@ static const struct maintenance_strategy incremental_strategy = {
19112098
.type = MAINTENANCE_TYPE_SCHEDULED,
19122099
.schedule = SCHEDULE_WEEKLY,
19132100
},
2101+
[TASK_CACHE_LOCAL_OBJS] = {
2102+
.type = MAINTENANCE_TYPE_SCHEDULED,
2103+
.schedule = SCHEDULE_WEEKLY,
2104+
},
19142105
/*
19152106
* Historically, the "incremental" strategy was only available
19162107
* in the context of scheduled maintenance when set up via

scalar.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1254,6 +1254,7 @@ static int cmd_run(int argc, const char **argv)
12541254
{ "fetch", "prefetch" },
12551255
{ "loose-objects", "loose-objects" },
12561256
{ "pack-files", "incremental-repack" },
1257+
{ "cache-local-objects", "cache-local-objects" },
12571258
{ NULL, NULL }
12581259
};
12591260
struct strbuf buf = STRBUF_INIT;

0 commit comments

Comments
 (0)