Skip to content

Commit 57b8f65

Browse files
sokraclaude
authored andcommitted
next-core: deduplicate output assets and detect content conflicts on emit (#92292)
Adds deduplication and conflict detection to the asset emission stage in `crates/next-core/src/emit.rs`, and a new `IssueStage::Emit` variant in `turbopack-core`. Before emitting, assets are grouped by their output path. If multiple assets map to the same path: - If their content is identical, one is silently chosen (deduplication). - If their content differs, both versions are written to `<node_root>/<content_hash>.<ext>` and an `EmitConflictIssue` is raised for each conflict. All assets are still emitted — conflicts do not abort the build. Previously, duplicate output assets for the same path were emitted unconditionally — whichever write happened last silently won. This masked build graph bugs where two different modules produced conflicting output files. Reporting conflicts as issues (rather than silently overwriting) makes them visible and easy to diagnose without breaking the build. - Collect all assets with their resolved paths via `try_flat_join`. - Bucket them into two `FxIndexMap<FileSystemPath, Vec<ResolvedVc<Box<dyn OutputAsset>>>>` — one for node-root assets and one for client assets. - For each bucket entry, call `check_duplicates`: compare every asset against the first using `assets_diff`. If content differs, emit an `EmitConflictIssue` as a turbo-tasks collectible — but still return the first asset so emission continues. - `assets_diff` is a `#[turbo_tasks::function]` that takes only `(asset1, asset2, extension, node_root)` — the `asset_path` stays out of the task key to avoid unnecessary task cardinality. When file content differs, it hashes each version with xxh3, writes them to `<node_root>/<hash>.<ext>`, and returns the paths in the detail message so the user can diff them. - `EmitConflictIssue` implements the `Issue` trait with `IssueStage::Emit` (new variant added to `turbopack-core`), `IssueSeverity::Error`, a descriptive title, and a detail message explaining the type of conflict. - Node-root and client assets are emitted in parallel via `futures::join!` (not `try_join!`) to ensure deterministic error reporting — both branches always run to completion so errors are reported in a consistent order. --------- Co-authored-by: Tobias Koppers <sokra@users.noreply.github.com> Co-authored-by: Claude <noreply@anthropic.com>
1 parent f158df1 commit 57b8f65

File tree

2 files changed

+255
-32
lines changed
  • crates/next-core/src
  • turbopack/crates/turbopack-core/src/issue

2 files changed

+255
-32
lines changed

crates/next-core/src/emit.rs

Lines changed: 253 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
1-
use anyhow::Result;
1+
use anyhow::{Ok, Result};
2+
use futures::join;
3+
use smallvec::{SmallVec, smallvec};
24
use tracing::Instrument;
3-
use turbo_tasks::{TryFlatJoinIterExt, Vc};
4-
use turbo_tasks_fs::{FileSystemPath, rebase};
5+
use turbo_rcstr::RcStr;
6+
use turbo_tasks::{
7+
FxIndexMap, ResolvedVc, TryFlatJoinIterExt, TryJoinIterExt, ValueToStringRef, Vc,
8+
};
9+
use turbo_tasks_fs::{FileContent, FileSystemPath, rebase};
10+
use turbo_tasks_hash::{encode_hex, hash_xxh3_hash64};
511
use turbopack_core::{
6-
asset::Asset,
12+
asset::{Asset, AssetContent},
13+
issue::{Issue, IssueExt, IssueSeverity, IssueStage, OptionStyledString, StyledString},
714
output::{ExpandedOutputAssets, OutputAsset, OutputAssets},
815
reference::all_assets_from_entries,
916
};
@@ -43,40 +50,124 @@ pub async fn emit_assets(
4350
client_relative_path: FileSystemPath,
4451
client_output_path: FileSystemPath,
4552
) -> Result<()> {
46-
let _: Vec<()> = assets
53+
enum Location {
54+
Node,
55+
Client,
56+
}
57+
let assets = assets
4758
.await?
4859
.iter()
4960
.copied()
50-
.map(|asset| {
51-
let node_root = node_root.clone();
52-
let client_relative_path = client_relative_path.clone();
53-
let client_output_path = client_output_path.clone();
54-
55-
async move {
56-
let path = asset.path().owned().await?;
57-
let span = tracing::info_span!("emit asset", name = %path.value_to_string().await?);
58-
async move {
59-
Ok(if path.is_inside_ref(&node_root) {
60-
Some(emit(*asset).as_side_effect().await?)
61-
} else if path.is_inside_ref(&client_relative_path) {
62-
// Client assets are emitted to the client output path, which is prefixed
63-
// with _next. We need to rebase them to remove that
64-
// prefix.
65-
Some(
66-
emit_rebase(*asset, client_relative_path, client_output_path)
67-
.as_side_effect()
68-
.await?,
69-
)
70-
} else {
71-
None
72-
})
73-
}
74-
.instrument(span)
75-
.await
76-
}
61+
.map(async |asset| {
62+
let path = asset.path().owned().await?;
63+
let location = if path.is_inside_ref(&node_root) {
64+
Location::Node
65+
} else if path.is_inside_ref(&client_relative_path) {
66+
Location::Client
67+
} else {
68+
return Ok(None);
69+
};
70+
Ok(Some((location, path, asset)))
7771
})
7872
.try_flat_join()
7973
.await?;
74+
75+
type AssetVec = SmallVec<[ResolvedVc<Box<dyn OutputAsset>>; 1]>;
76+
let mut node_assets_by_path: FxIndexMap<FileSystemPath, AssetVec> = FxIndexMap::default();
77+
let mut client_assets_by_path: FxIndexMap<FileSystemPath, AssetVec> = FxIndexMap::default();
78+
for (location, path, asset) in assets {
79+
match location {
80+
Location::Node => {
81+
node_assets_by_path
82+
.entry(path)
83+
.or_insert_with(|| smallvec![])
84+
.push(asset);
85+
}
86+
Location::Client => {
87+
client_assets_by_path
88+
.entry(path)
89+
.or_insert_with(|| smallvec![])
90+
.push(asset);
91+
}
92+
}
93+
}
94+
95+
/// Checks for duplicate assets at the same path. If duplicates with
96+
/// different content are found, emits an `EmitConflictIssue` for each
97+
/// conflict but still returns the first asset so emission can continue.
98+
async fn check_duplicates(
99+
path: &FileSystemPath,
100+
assets: AssetVec,
101+
node_root: &FileSystemPath,
102+
) -> Result<ResolvedVc<Box<dyn OutputAsset>>> {
103+
let mut iter = assets.into_iter();
104+
let first = iter.next().unwrap();
105+
for next in iter {
106+
let ext: RcStr = path.extension().into();
107+
if let Some(detail) = assets_diff(*next, *first, ext, node_root.clone())
108+
.owned()
109+
.await?
110+
{
111+
EmitConflictIssue {
112+
asset_path: path.clone(),
113+
detail,
114+
}
115+
.resolved_cell()
116+
.emit();
117+
}
118+
}
119+
Ok(first)
120+
}
121+
122+
// Use join! instead of try_join! to collect all errors deterministically
123+
// rather than returning whichever branch fails first non-deterministically.
124+
let (node_result, client_result) = join!(
125+
node_assets_by_path
126+
.into_iter()
127+
.map(|(path, assets)| {
128+
let node_root = node_root.clone();
129+
130+
async move {
131+
let asset = check_duplicates(&path, assets, &node_root).await?;
132+
let span = tracing::info_span!(
133+
"emit asset",
134+
name = %path.to_string_ref().await?
135+
);
136+
async move { emit(*asset).as_side_effect().await }
137+
.instrument(span)
138+
.await
139+
}
140+
})
141+
.try_join(),
142+
client_assets_by_path
143+
.into_iter()
144+
.map(|(path, assets)| {
145+
let node_root = node_root.clone();
146+
let client_relative_path = client_relative_path.clone();
147+
let client_output_path = client_output_path.clone();
148+
149+
async move {
150+
let asset = check_duplicates(&path, assets, &node_root).await?;
151+
let span = tracing::info_span!(
152+
"emit asset",
153+
name = %path.to_string_ref().await?
154+
);
155+
async move {
156+
// Client assets are emitted to the client output path, which is
157+
// prefixed with _next. We need to rebase them to
158+
// remove that prefix.
159+
emit_rebase(*asset, client_relative_path, client_output_path)
160+
.as_side_effect()
161+
.await
162+
}
163+
.instrument(span)
164+
.await
165+
}
166+
})
167+
.try_join(),
168+
);
169+
node_result?;
170+
client_result?;
80171
Ok(())
81172
}
82173

@@ -110,3 +201,133 @@ async fn emit_rebase(
110201
.await?;
111202
Ok(())
112203
}
204+
205+
/// Compares two assets that target the same output path. If their content
206+
/// differs, writes both versions under `node_root` as `<hash>.<ext>` and
207+
/// returns a description of the difference.
208+
#[turbo_tasks::function]
209+
async fn assets_diff(
210+
asset1: Vc<Box<dyn OutputAsset>>,
211+
asset2: Vc<Box<dyn OutputAsset>>,
212+
extension: RcStr,
213+
node_root: FileSystemPath,
214+
) -> Result<Vc<Option<RcStr>>> {
215+
let content1 = asset1.content().await?;
216+
let content2 = asset2.content().await?;
217+
218+
let detail = match (&*content1, &*content2) {
219+
(AssetContent::File(content1), AssetContent::File(content2)) => {
220+
let content1 = content1.await?;
221+
let content2 = content2.await?;
222+
223+
match (&*content1, &*content2) {
224+
(FileContent::NotFound, FileContent::NotFound) => None,
225+
(FileContent::Content(file1), FileContent::Content(file2)) => {
226+
if file1 == file2 {
227+
None
228+
} else {
229+
// Write both versions under node_root as <hash>.<ext> so the
230+
// user can diff them.
231+
let ext = &*extension;
232+
let hash1 = encode_hex(hash_xxh3_hash64(file1.content().content_hash()));
233+
let hash2 = encode_hex(hash_xxh3_hash64(file2.content().content_hash()));
234+
let name1 = if ext.is_empty() {
235+
hash1
236+
} else {
237+
format!("{hash1}.{ext}")
238+
};
239+
let name2 = if ext.is_empty() {
240+
hash2
241+
} else {
242+
format!("{hash2}.{ext}")
243+
};
244+
let path1 = node_root.join(&name1)?;
245+
let path2 = node_root.join(&name2)?;
246+
path1
247+
.write(FileContent::Content(file1.clone()).cell())
248+
.as_side_effect()
249+
.await?;
250+
path2
251+
.write(FileContent::Content(file2.clone()).cell())
252+
.as_side_effect()
253+
.await?;
254+
Some(format!(
255+
"file content differs, written to:\n {}\n {}",
256+
path1.to_string_ref().await?,
257+
path2.to_string_ref().await?,
258+
))
259+
}
260+
}
261+
_ => Some(
262+
"assets at the same path have mismatched file content types (one task wants \
263+
to write the file, another wants to delete it)"
264+
.into(),
265+
),
266+
}
267+
}
268+
(
269+
AssetContent::Redirect {
270+
target: target1,
271+
link_type: link_type1,
272+
},
273+
AssetContent::Redirect {
274+
target: target2,
275+
link_type: link_type2,
276+
},
277+
) => {
278+
if target1 == target2 && link_type1 == link_type2 {
279+
None
280+
} else {
281+
Some(format!(
282+
"assets at the same path are both redirects but point to different targets: \
283+
{target1} vs {target2}"
284+
))
285+
}
286+
}
287+
_ => Some(
288+
"assets at the same path have different content types (one is a file, the other is a \
289+
redirect)"
290+
.into(),
291+
),
292+
};
293+
294+
Ok(Vc::cell(detail.map(|d| d.into())))
295+
}
296+
297+
#[turbo_tasks::value]
298+
struct EmitConflictIssue {
299+
asset_path: FileSystemPath,
300+
detail: RcStr,
301+
}
302+
303+
#[turbo_tasks::value_impl]
304+
impl Issue for EmitConflictIssue {
305+
#[turbo_tasks::function]
306+
fn file_path(&self) -> Vc<FileSystemPath> {
307+
self.asset_path.clone().cell()
308+
}
309+
310+
#[turbo_tasks::function]
311+
fn stage(&self) -> Vc<IssueStage> {
312+
IssueStage::Emit.cell()
313+
}
314+
315+
fn severity(&self) -> IssueSeverity {
316+
IssueSeverity::Error
317+
}
318+
319+
#[turbo_tasks::function]
320+
fn title(&self) -> Vc<StyledString> {
321+
StyledString::Text(
322+
"Two or more assets with different content were emitted to the same output path".into(),
323+
)
324+
.cell()
325+
}
326+
327+
#[turbo_tasks::function]
328+
fn description(&self) -> Vc<OptionStyledString> {
329+
Vc::cell(Some(
330+
StyledString::Text(self.detail.clone()).resolved_cell(),
331+
))
332+
}
333+
}

turbopack/crates/turbopack-core/src/issue/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -875,6 +875,7 @@ pub enum IssueStage {
875875
Resolve,
876876
Bindings,
877877
CodeGen,
878+
Emit,
878879
Unsupported,
879880
Misc,
880881
Other(RcStr),
@@ -893,6 +894,7 @@ impl Display for IssueStage {
893894
IssueStage::Analysis => write!(f, "analysis"),
894895
IssueStage::Bindings => write!(f, "bindings"),
895896
IssueStage::CodeGen => write!(f, "code gen"),
897+
IssueStage::Emit => write!(f, "emit"),
896898
IssueStage::Unsupported => write!(f, "unsupported"),
897899
IssueStage::AppStructure => write!(f, "app structure"),
898900
IssueStage::Misc => write!(f, "misc"),

0 commit comments

Comments
 (0)