Skip to content

Commit 1881e02

Browse files
committed
gc: clean target directories
1 parent 029cbc0 commit 1881e02

5 files changed

Lines changed: 225 additions & 2 deletions

File tree

src/bin/cargo/commands/clean.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,24 @@ pub fn cli() -> Command {
127127
)
128128
.value_name("SIZE")
129129
.value_parser(parse_human_size),
130+
)
131+
.arg(
132+
opt(
133+
"max-target-dir-age",
134+
"Deletes target directories that have not been used \
135+
since the given age (unstable)",
136+
)
137+
.value_name("DURATION")
138+
.value_parser(parse_time_span),
139+
)
140+
.arg(
141+
opt(
142+
"max-target-dir-size",
143+
"Deletes target directories until the total size is under \
144+
the given size (unstable)",
145+
)
146+
.value_name("SIZE")
147+
.value_parser(parse_human_size),
130148
),
131149
)
132150
.after_help(color_print::cstr!(
@@ -190,6 +208,8 @@ fn gc(gctx: &GlobalContext, args: &ArgMatches) -> CliResult {
190208
max_crate_size: size_opt("max-crate-size"),
191209
max_git_size: size_opt("max-git-size"),
192210
max_download_size: size_opt("max-download-size"),
211+
max_target_dir_age: duration_opt("max-target-dir-age"),
212+
max_target_dir_size: size_opt("max-target-dir-size"),
193213
};
194214
if let Some(age) = duration_opt("max-download-age") {
195215
gc_opts.set_max_download_age(age);

src/cargo/core/gc.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ pub struct GcOpts {
130130
pub max_git_size: Option<u64>,
131131
/// The `--max-download-size` CLI option.
132132
pub max_download_size: Option<u64>,
133+
/// The `--max-target-dir-age` CLI option.
134+
pub max_target_dir_age: Option<Duration>,
135+
/// The `--max-target-dir-size` CLI option.
136+
pub max_target_dir_size: Option<u64>,
133137
}
134138

135139
impl GcOpts {

src/cargo/core/global_cache_tracker.rs

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@
115115
//! There are checks for read-only filesystems, which is generally ignored.
116116
117117
use crate::core::gc::GcOpts;
118+
use crate::ops::cargo_clean::validate_target_dir_tag;
118119
use crate::ops::CleanContext;
119120
use crate::util::cache_lock::CacheLockMode;
120121
use crate::util::interning::InternedString;
@@ -587,6 +588,7 @@ impl GlobalCacheTracker {
587588
trace!(target: "gc", "cleaning {gc_opts:?}");
588589
let tx = self.conn.transaction()?;
589590
let mut delete_paths = Vec::new();
591+
let mut target_dir_delete_groups = Vec::new();
590592
// This can be an expensive operation, so only perform it if necessary.
591593
if gc_opts.is_download_cache_opt_set() {
592594
// TODO: Investigate how slow this might be.
@@ -632,6 +634,15 @@ impl GlobalCacheTracker {
632634
let max_age = now - max_age.as_secs();
633635
Self::get_git_co_items_to_clean(&tx, max_age, &base.git_co, &mut delete_paths)?;
634636
}
637+
if let Some(max_age) = gc_opts.max_target_dir_age {
638+
if max_age == Duration::ZERO {
639+
// Special case: max_age=0 means delete all entries
640+
Self::get_target_dirs_to_clean_age(&tx, i64::MAX as Timestamp, &mut target_dir_delete_groups)?;
641+
} else {
642+
let max_age = now - max_age.as_secs();
643+
Self::get_target_dirs_to_clean_age(&tx, max_age, &mut target_dir_delete_groups)?;
644+
}
645+
}
635646
// Size collection must happen after date collection so that dates
636647
// have precedence, since size constraints are a more blunt
637648
// instrument.
@@ -669,6 +680,18 @@ impl GlobalCacheTracker {
669680
if let Some(max_size) = gc_opts.max_download_size {
670681
Self::get_registry_items_to_clean_size_both(&tx, max_size, &base, &mut delete_paths)?;
671682
}
683+
if let Some(max_size) = gc_opts.max_target_dir_size {
684+
Self::get_target_dirs_to_clean_size(&tx, max_size, &mut target_dir_delete_groups)?;
685+
}
686+
687+
for grouped in target_dir_delete_groups {
688+
// Match `cargo clean` behavior for non-explicit target dirs: unsafe target
689+
// directories are skipped instead of aborting the whole GC operation.
690+
if validate_target_dir_tag(&grouped.path).is_ok() {
691+
Self::delete_grouped_target_directory_rows(&tx, &grouped)?;
692+
delete_paths.push(grouped.path);
693+
}
694+
}
672695

673696
clean_ctx.remove_paths(&delete_paths)?;
674697

@@ -1400,6 +1423,182 @@ impl GlobalCacheTracker {
14001423
rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
14011424
}
14021425

1426+
/// Loads all target-directory association rows.
1427+
fn target_directory_rows(
1428+
conn: &Connection,
1429+
) -> CargoResult<Vec<(String, String, Timestamp)>> {
1430+
let mut stmt = conn.prepare_cached(
1431+
"SELECT workspace_manifest, target_dir, timestamp FROM target_directory",
1432+
)?;
1433+
stmt.query_map([], |row| {
1434+
let workspace_manifest: String = row.get_unwrap(0);
1435+
let target_dir: String = row.get_unwrap(1);
1436+
let timestamp: Timestamp = row.get_unwrap(2);
1437+
Ok((workspace_manifest, target_dir, timestamp))
1438+
})?
1439+
.collect::<Result<Vec<_>, _>>()
1440+
.map_err(Into::into)
1441+
}
1442+
1443+
fn delete_target_directory_row(
1444+
conn: &Connection,
1445+
workspace_manifest: &Path,
1446+
target_dir: &Path,
1447+
) -> CargoResult<()> {
1448+
conn.execute(
1449+
"DELETE FROM target_directory WHERE workspace_manifest = ?1 AND target_dir = ?2",
1450+
[
1451+
workspace_manifest.to_string_lossy().to_string(),
1452+
target_dir.to_string_lossy().to_string(),
1453+
],
1454+
)?;
1455+
Ok(())
1456+
}
1457+
1458+
fn delete_grouped_target_directory_rows(
1459+
conn: &Connection,
1460+
grouped: &GroupedTargetDirectory,
1461+
) -> CargoResult<()> {
1462+
for assoc in &grouped.associations {
1463+
Self::delete_target_directory_row(conn, &assoc.workspace_manifest, &assoc.raw_target_dir)?;
1464+
}
1465+
Ok(())
1466+
}
1467+
1468+
/// Groups target-directory association rows by physical target dir path.
1469+
fn grouped_target_directories(
1470+
conn: &Connection,
1471+
) -> CargoResult<Vec<GroupedTargetDirectory>> {
1472+
let mut grouped = HashMap::<PathBuf, Vec<TargetDirectoryAssociation>>::new();
1473+
for (workspace_manifest, target_dir, timestamp) in Self::target_directory_rows(conn)? {
1474+
let raw_target_dir = PathBuf::from(target_dir);
1475+
let normalized_target_dir = paths::normalize_path(&raw_target_dir);
1476+
grouped
1477+
.entry(normalized_target_dir)
1478+
.or_default()
1479+
.push(TargetDirectoryAssociation {
1480+
workspace_manifest: PathBuf::from(workspace_manifest),
1481+
raw_target_dir,
1482+
timestamp,
1483+
});
1484+
}
1485+
Ok(grouped
1486+
.into_iter()
1487+
.map(|(path, associations)| GroupedTargetDirectory { path, associations })
1488+
.collect())
1489+
}
1490+
1491+
/// Adds paths to delete from `target_directory` whose effective last use is
1492+
/// older than the given timestamp, while preserving a shared target dir if
1493+
/// any valid recent association remains.
1494+
fn get_target_dirs_to_clean_age(
1495+
conn: &Connection,
1496+
max_age: Timestamp,
1497+
delete_groups: &mut Vec<GroupedTargetDirectory>,
1498+
) -> CargoResult<()> {
1499+
debug!(target: "gc", "cleaning target_directory since {max_age:?}");
1500+
for grouped in Self::grouped_target_directories(conn)? {
1501+
let (valid, leaked): (Vec<_>, Vec<_>) = grouped
1502+
.associations
1503+
.iter()
1504+
.cloned()
1505+
.partition(|assoc| assoc.workspace_manifest.exists());
1506+
1507+
let effective_timestamp = valid
1508+
.iter()
1509+
.map(|assoc| assoc.timestamp)
1510+
.max()
1511+
.or_else(|| leaked.iter().map(|assoc| assoc.timestamp).max())
1512+
.unwrap();
1513+
1514+
if effective_timestamp < max_age {
1515+
delete_groups.push(grouped);
1516+
continue;
1517+
}
1518+
1519+
for assoc in leaked {
1520+
Self::delete_target_directory_row(conn, &assoc.workspace_manifest, &assoc.raw_target_dir)?;
1521+
}
1522+
}
1523+
Ok(())
1524+
}
1525+
1526+
/// Adds paths to delete from target_directory to keep total size under max_size.
1527+
fn get_target_dirs_to_clean_size(
1528+
conn: &Connection,
1529+
max_size: u64,
1530+
delete_groups: &mut Vec<GroupedTargetDirectory>,
1531+
) -> CargoResult<()> {
1532+
debug!(target: "gc", "cleaning target_directory till under {max_size:?}");
1533+
1534+
let mut grouped = Vec::new();
1535+
for grouped_target in Self::grouped_target_directories(conn)? {
1536+
let (valid, leaked): (Vec<_>, Vec<_>) = grouped_target
1537+
.associations
1538+
.iter()
1539+
.cloned()
1540+
.partition(|assoc| assoc.workspace_manifest.exists());
1541+
1542+
let effective_timestamp = valid
1543+
.iter()
1544+
.map(|assoc| assoc.timestamp)
1545+
.max()
1546+
.or_else(|| leaked.iter().map(|assoc| assoc.timestamp).max())
1547+
.unwrap();
1548+
1549+
if !valid.is_empty() {
1550+
for assoc in leaked {
1551+
Self::delete_target_directory_row(conn, &assoc.workspace_manifest, &assoc.raw_target_dir)?;
1552+
}
1553+
}
1554+
1555+
let size = cargo_util::du(&grouped_target.path, &[]).unwrap_or(0);
1556+
grouped.push(TargetDirectorySizeEntry {
1557+
grouped: grouped_target,
1558+
effective_timestamp,
1559+
size,
1560+
});
1561+
}
1562+
1563+
grouped.sort_by(|a, b| a.effective_timestamp.cmp(&b.effective_timestamp));
1564+
1565+
let mut total_size: u64 = grouped.iter().map(|entry| entry.size).sum();
1566+
debug!(target: "gc", "total target_directory size appears to be {total_size}");
1567+
1568+
if total_size <= max_size {
1569+
return Ok(());
1570+
}
1571+
1572+
for entry in grouped {
1573+
if total_size <= max_size {
1574+
break;
1575+
}
1576+
delete_groups.push(entry.grouped);
1577+
total_size = total_size.saturating_sub(entry.size);
1578+
}
1579+
1580+
Ok(())
1581+
}
1582+
}
1583+
1584+
#[derive(Clone, Debug)]
1585+
struct TargetDirectoryAssociation {
1586+
workspace_manifest: PathBuf,
1587+
raw_target_dir: PathBuf,
1588+
timestamp: Timestamp,
1589+
}
1590+
1591+
#[derive(Debug)]
1592+
struct GroupedTargetDirectory {
1593+
path: PathBuf,
1594+
associations: Vec<TargetDirectoryAssociation>,
1595+
}
1596+
1597+
#[derive(Debug)]
1598+
struct TargetDirectorySizeEntry {
1599+
grouped: GroupedTargetDirectory,
1600+
effective_timestamp: Timestamp,
1601+
size: u64,
14031602
}
14041603

14051604
/// Helper to generate the upsert for the parent tables.

src/cargo/ops/cargo_clean.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ pub fn clean(ws: &Workspace<'_>, opts: &CleanOptions<'_>) -> CargoResult<()> {
141141
Ok(())
142142
}
143143

144-
fn validate_target_dir_tag(target_dir_path: &Path) -> CargoResult<()> {
144+
pub(crate) fn validate_target_dir_tag(target_dir_path: &Path) -> CargoResult<()> {
145145
const TAG_SIGNATURE: &[u8] = b"Signature: 8a477f597d28d172789f06886806bc55";
146146

147147
let tag_path = target_dir_path.join("CACHEDIR.TAG");

src/cargo/ops/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ pub use self::resolve::{
5555
pub use self::vendor::{VendorOptions, vendor};
5656

5757
pub mod cargo_add;
58-
mod cargo_clean;
58+
pub(crate) mod cargo_clean;
5959
pub(crate) mod cargo_compile;
6060
pub mod cargo_config;
6161
mod cargo_doc;

0 commit comments

Comments
 (0)