Skip to content

Commit ea71f12

Browse files
committed
Integrate linecount analysis into tarball processing
This enhances the tarball processing pipeline to include SLOC analysis by adding `crates_io_linecount` dependency to the tarball processing crate and extending the `TarballInfo` struct with a `linecount_stats` field. The integration occurs seamlessly during tarball file processing, where each qualifying source file is analyzed and its statistics are accumulated. All tarball processing test snapshots are updated to include linecount data, demonstrating the feature works correctly across various crate structures. The integration preserves existing functionality while adding minimal overhead to the tarball validation and processing pipeline.
1 parent f2a7274 commit ea71f12

13 files changed

+92
-1
lines changed

crates/crates_io_tarball/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ builder = ["dep:flate2", "dep:tar"]
1313
[dependencies]
1414
astral-tokio-tar = "=0.5.2"
1515
cargo-manifest = "=0.19.1"
16+
crates_io_linecount = { path = "../crates_io_linecount" }
1617
flate2 = { version = "=1.1.2", optional = true }
1718
serde = { version = "=1.0.219", features = ["derive"] }
1819
serde_json = "=1.0.140"

crates/crates_io_tarball/src/lib.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ const DEFAULT_BUF_SIZE: usize = 128 * 1024;
3030
pub struct TarballInfo {
3131
pub manifest: Manifest,
3232
pub vcs_info: Option<CargoVcsInfo>,
33+
pub linecount_stats: crates_io_linecount::LinecountStats,
3334
}
3435

3536
#[derive(Debug, thiserror::Error)]
@@ -74,6 +75,7 @@ pub async fn process_tarball<R: tokio::io::AsyncRead + Unpin>(
7475
let mut vcs_info = None;
7576
let mut paths = Vec::new();
7677
let mut manifests = BTreeMap::new();
78+
let mut linecount_stats = crates_io_linecount::LinecountStats::new();
7779
let mut entries = archive.entries()?;
7880

7981
while let Some(entry) = entries.next().await {
@@ -103,6 +105,12 @@ pub async fn process_tarball<R: tokio::io::AsyncRead + Unpin>(
103105

104106
paths.push(in_pkg_path.to_path_buf());
105107

108+
// Check if this file should be counted for line statistics
109+
let is_file = entry_type.is_file();
110+
let language_type_for_counting = is_file
111+
.then(|| crates_io_linecount::should_count_path(in_pkg_path))
112+
.flatten();
113+
106114
// Let's go hunting for the VCS info and crate manifest. The only valid place for these is
107115
// in the package root in the tarball.
108116
let in_pkg_path_str = in_pkg_path.to_string_lossy();
@@ -121,6 +129,11 @@ pub async fn process_tarball<R: tokio::io::AsyncRead + Unpin>(
121129
validate_manifest(&manifest)?;
122130

123131
manifests.insert(owned_entry_path, manifest);
132+
} else if let Some(language_type) = language_type_for_counting {
133+
// If this is a file that we want to count, read it and update the line count stats.
134+
let mut contents = Vec::new();
135+
entry.read_to_end(&mut contents).await?;
136+
linecount_stats.add_file(language_type, &contents);
124137
}
125138
}
126139

@@ -146,7 +159,11 @@ pub async fn process_tarball<R: tokio::io::AsyncRead + Unpin>(
146159

147160
manifest.complete_from_abstract_filesystem(&PathsFileSystem(paths))?;
148161

149-
Ok(TarballInfo { manifest, vcs_info })
162+
Ok(TarballInfo {
163+
manifest,
164+
vcs_info,
165+
linecount_stats,
166+
})
150167
}
151168

152169
struct PathsFileSystem(Vec<PathBuf>);

crates/crates_io_tarball/src/snapshots/crates_io_tarball__tests__app.snap

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,15 @@ TarballInfo {
7676
badges: None,
7777
},
7878
vcs_info: None,
79+
linecount_stats: LinecountStats {
80+
languages: {
81+
Rust: LanguageStats {
82+
code_lines: 1,
83+
comment_lines: 0,
84+
files: 1,
85+
},
86+
},
87+
total_code_lines: 1,
88+
total_comment_lines: 0,
89+
},
7990
}

crates/crates_io_tarball/src/snapshots/crates_io_tarball__tests__lib.snap

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,4 +80,15 @@ TarballInfo {
8080
badges: None,
8181
},
8282
vcs_info: None,
83+
linecount_stats: LinecountStats {
84+
languages: {
85+
Rust: LanguageStats {
86+
code_lines: 1,
87+
comment_lines: 0,
88+
files: 1,
89+
},
90+
},
91+
total_code_lines: 1,
92+
total_comment_lines: 0,
93+
},
8394
}

crates/crates_io_tarball/src/snapshots/crates_io_tarball__tests__lib_with_bins_and_example.snap

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,4 +140,15 @@ TarballInfo {
140140
badges: None,
141141
},
142142
vcs_info: None,
143+
linecount_stats: LinecountStats {
144+
languages: {
145+
Rust: LanguageStats {
146+
code_lines: 3,
147+
comment_lines: 0,
148+
files: 3,
149+
},
150+
},
151+
total_code_lines: 3,
152+
total_comment_lines: 0,
153+
},
143154
}

crates/crates_io_tarball/src/snapshots/crates_io_tarball__tests__process_tarball_test.snap

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,9 @@ TarballInfo {
5757
badges: None,
5858
},
5959
vcs_info: None,
60+
linecount_stats: LinecountStats {
61+
languages: {},
62+
total_code_lines: 0,
63+
total_comment_lines: 0,
64+
},
6065
}

crates/crates_io_tarball/src/snapshots/crates_io_tarball__tests__process_tarball_test_incomplete_vcs_info.snap

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,9 @@ TarballInfo {
6161
path_in_vcs: "",
6262
},
6363
),
64+
linecount_stats: LinecountStats {
65+
languages: {},
66+
total_code_lines: 0,
67+
total_comment_lines: 0,
68+
},
6469
}

crates/crates_io_tarball/src/snapshots/crates_io_tarball__tests__process_tarball_test_lowercase_manifest.snap

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,9 @@ TarballInfo {
6161
badges: None,
6262
},
6363
vcs_info: None,
64+
linecount_stats: LinecountStats {
65+
languages: {},
66+
total_code_lines: 0,
67+
total_comment_lines: 0,
68+
},
6469
}

crates/crates_io_tarball/src/snapshots/crates_io_tarball__tests__process_tarball_test_manifest.snap

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,4 +71,9 @@ TarballInfo {
7171
badges: None,
7272
},
7373
vcs_info: None,
74+
linecount_stats: LinecountStats {
75+
languages: {},
76+
total_code_lines: 0,
77+
total_comment_lines: 0,
78+
},
7479
}

crates/crates_io_tarball/src/snapshots/crates_io_tarball__tests__process_tarball_test_manifest_with_boolean_readme.snap

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,9 @@ TarballInfo {
6363
badges: None,
6464
},
6565
vcs_info: None,
66+
linecount_stats: LinecountStats {
67+
languages: {},
68+
total_code_lines: 0,
69+
total_comment_lines: 0,
70+
},
6671
}

crates/crates_io_tarball/src/snapshots/crates_io_tarball__tests__process_tarball_test_manifest_with_default_readme.snap

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,9 @@ TarballInfo {
5757
badges: None,
5858
},
5959
vcs_info: None,
60+
linecount_stats: LinecountStats {
61+
languages: {},
62+
total_code_lines: 0,
63+
total_comment_lines: 0,
64+
},
6065
}

crates/crates_io_tarball/src/snapshots/crates_io_tarball__tests__process_tarball_test_manifest_with_project.snap

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,9 @@ TarballInfo {
6161
badges: None,
6262
},
6363
vcs_info: None,
64+
linecount_stats: LinecountStats {
65+
languages: {},
66+
total_code_lines: 0,
67+
total_comment_lines: 0,
68+
},
6469
}

crates/crates_io_tarball/src/snapshots/crates_io_tarball__tests__process_tarball_test_vcs_info.snap

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,9 @@ TarballInfo {
6161
path_in_vcs: "path/in/vcs",
6262
},
6363
),
64+
linecount_stats: LinecountStats {
65+
languages: {},
66+
total_code_lines: 0,
67+
total_comment_lines: 0,
68+
},
6469
}

0 commit comments

Comments
 (0)