Skip to content

Commit 0c0994c

Browse files
Fix DbUpdateException handling in catalog import to prevent cascading failures (#412)
* Initial plan * Fix DbUpdateException handling in catalog import - Handle all DbUpdateException types, not just duplicate key violations - Detach entities on any constraint violation to prevent cascading failures - Add NOT NULL violation detection for better diagnostics - Improve validation: check for whitespace-only PackageId values - Add defensive validation for PackageIdLowered after setting - All 18 tests pass Co-authored-by: bruno-garcia <1633368+bruno-garcia@users.noreply.github.com> * Add validation tests for null/whitespace PackageId - Add 7 theory-based tests covering null, empty, and whitespace cases - Verify both batch and individual processing paths throw InvalidOperationException - All 25 tests pass Co-authored-by: bruno-garcia <1633368+bruno-garcia@users.noreply.github.com> * Fix duplicate logging placeholders - Rename duplicate {PackageId} placeholder to {PackageIdValue}/{PackageVersionValue} - Ensures all values are properly logged without ambiguity - Addresses code review feedback Co-authored-by: bruno-garcia <1633368+bruno-garcia@users.noreply.github.com> * Address code review feedback - Add IsConstraintViolationException() to detect all 23xxx constraint violations - Rethrow non-constraint DbUpdateExceptions (timeouts, deadlocks) after detaching - Remove unreachable defensive checks for PackageIdLowered (ToLowerInvariant never returns null) - Simplify logging: remove redundant {PackageIdValue}/{PackageVersionValue} placeholders - All 25 tests pass Co-authored-by: bruno-garcia <1633368+bruno-garcia@users.noreply.github.com> * Simplify null-conditional operator in IsConstraintViolationException Use ?? false pattern for better clarity instead of == true Co-authored-by: bruno-garcia <1633368+bruno-garcia@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: bruno-garcia <1633368+bruno-garcia@users.noreply.github.com>
1 parent 8708296 commit 0c0994c

2 files changed

Lines changed: 149 additions & 19 deletions

File tree

src/NuGetTrends.Scheduler.Tests/CatalogLeafProcessorTests.cs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,61 @@ public async Task ProcessPackageDetailsAsync_NewPackage_PopulatesPackageIdLowere
468468
"PackageIdLowered should be populated in individual processing path");
469469
}
470470

471+
/// <summary>
472+
/// Tests that ProcessPackageDetailsBatchAsync throws InvalidOperationException
473+
/// when PackageId is null or whitespace.
474+
/// </summary>
475+
[Theory]
476+
[InlineData(null)]
477+
[InlineData("")]
478+
[InlineData(" ")]
479+
[InlineData("\t")]
480+
public async Task ProcessPackageDetailsBatchAsync_NullOrWhitespacePackageId_ThrowsInvalidOperationException(string? packageId)
481+
{
482+
// Arrange
483+
var (provider, processor) = CreateProcessor();
484+
using var _ = provider;
485+
486+
var leaves = new List<PackageDetailsCatalogLeaf>
487+
{
488+
new() { PackageId = packageId, PackageVersion = "1.0.0", CommitTimestamp = DateTimeOffset.UtcNow },
489+
};
490+
491+
// Act & Assert
492+
var ex = await Assert.ThrowsAsync<InvalidOperationException>(() =>
493+
processor.ProcessPackageDetailsBatchAsync(leaves, CancellationToken.None));
494+
495+
ex.Message.Should().Contain("PackageId must be set and non-empty");
496+
}
497+
498+
/// <summary>
499+
/// Tests that ProcessPackageDetailsAsync (individual) throws InvalidOperationException
500+
/// when PackageId is null or whitespace.
501+
/// </summary>
502+
[Theory]
503+
[InlineData(null)]
504+
[InlineData("")]
505+
[InlineData(" ")]
506+
public async Task ProcessPackageDetailsAsync_NullOrWhitespacePackageId_ThrowsInvalidOperationException(string? packageId)
507+
{
508+
// Arrange
509+
var (provider, processor) = CreateProcessor();
510+
using var _ = provider;
511+
512+
var leaf = new PackageDetailsCatalogLeaf
513+
{
514+
PackageId = packageId,
515+
PackageVersion = "1.0.0",
516+
CommitTimestamp = DateTimeOffset.UtcNow,
517+
};
518+
519+
// Act & Assert
520+
var ex = await Assert.ThrowsAsync<InvalidOperationException>(() =>
521+
processor.ProcessPackageDetailsAsync(leaf, CancellationToken.None));
522+
523+
ex.Message.Should().Contain("PackageId must be set and non-empty");
524+
}
525+
471526
}
472527

473528
/// <summary>

src/NuGetTrends.Scheduler/CatalogLeafProcessor.cs

Lines changed: 94 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,20 @@ public class CatalogLeafProcessor : ICatalogLeafProcessor
1313
/// See: https://www.postgresql.org/docs/current/errcodes-appendix.html
1414
/// </summary>
1515
private const string PostgresUniqueViolationCode = "23505";
16+
17+
/// <summary>
18+
/// PostgreSQL error code for not_null_violation.
19+
/// See: https://www.postgresql.org/docs/current/errcodes-appendix.html
20+
/// </summary>
21+
private const string PostgresNotNullViolationCode = "23502";
22+
23+
/// <summary>
24+
/// PostgreSQL error code prefix for integrity constraint violations (class 23).
25+
/// Includes: unique_violation (23505), not_null_violation (23502), foreign_key_violation (23503),
26+
/// check_violation (23514), exclusion_violation (23P01), etc.
27+
/// See: https://www.postgresql.org/docs/current/errcodes-appendix.html
28+
/// </summary>
29+
private const string PostgresConstraintViolationPrefix = "23";
1630

1731
private readonly IServiceProvider _provider;
1832
private readonly ILogger<CatalogLeafProcessor> _logger;
@@ -114,11 +128,11 @@ public async Task ProcessPackageDetailsBatchAsync(IReadOnlyList<PackageDetailsCa
114128

115129
foreach (var leaf in newLeaves)
116130
{
117-
if (string.IsNullOrEmpty(leaf.PackageId))
131+
if (string.IsNullOrWhiteSpace(leaf.PackageId))
118132
{
119133
throw new InvalidOperationException(
120-
"PackageId must be set before inserting a PackageDetailsCatalogLeaf. " +
121-
"The NuGet catalog leaf should always provide a PackageId.");
134+
"PackageId must be set and non-empty before inserting a PackageDetailsCatalogLeaf. " +
135+
"The NuGet catalog leaf should always provide a valid PackageId.");
122136
}
123137

124138
leaf.PackageIdLowered = leaf.PackageId.ToLowerInvariant();
@@ -130,13 +144,38 @@ public async Task ProcessPackageDetailsBatchAsync(IReadOnlyList<PackageDetailsCa
130144
{
131145
await Save(token);
132146
}
133-
catch (DbUpdateException ex) when (IsDuplicateKeyException(ex))
147+
catch (DbUpdateException ex)
134148
{
135-
// Race condition: another process inserted one or more packages between our query and save.
136-
// Fall back to individual processing for this batch to handle partial success.
137-
_logger.LogDebug("Concurrent insert detected in batch, falling back to individual processing.");
149+
// Only handle constraint violations (23xxx codes).
150+
// Non-constraint errors (timeouts, connection errors, deadlocks) should fail the job.
151+
if (!IsConstraintViolationException(ex))
152+
{
153+
// Detach entities to prevent cascading failures, then rethrow
154+
foreach (var leaf in newLeaves)
155+
{
156+
Context.Entry(leaf).State = EntityState.Detached;
157+
}
158+
throw;
159+
}
160+
161+
// Constraint violation - fall back to individual processing for partial success
162+
var isDuplicateKey = IsDuplicateKeyException(ex);
163+
var isNotNull = IsNotNullViolationException(ex);
164+
165+
if (isDuplicateKey)
166+
{
167+
_logger.LogDebug("Concurrent insert detected in batch, falling back to individual processing.");
168+
}
169+
else if (isNotNull)
170+
{
171+
_logger.LogWarning(ex, "NOT NULL constraint violation in batch, falling back to individual processing.");
172+
}
173+
else
174+
{
175+
_logger.LogWarning(ex, "Database constraint violation in batch, falling back to individual processing.");
176+
}
138177

139-
// Detach all entities we tried to add
178+
// Detach all entities we tried to add to prevent cascading failures
140179
foreach (var leaf in newLeaves)
141180
{
142181
Context.Entry(leaf).State = EntityState.Detached;
@@ -157,11 +196,11 @@ private async Task ProcessPackageDetailsIndividualAsync(PackageDetailsCatalogLea
157196

158197
if (!exists)
159198
{
160-
if (string.IsNullOrEmpty(leaf.PackageId))
199+
if (string.IsNullOrWhiteSpace(leaf.PackageId))
161200
{
162201
throw new InvalidOperationException(
163-
"PackageId must be set before inserting a PackageDetailsCatalogLeaf. " +
164-
"The NuGet catalog leaf should always provide a PackageId.");
202+
"PackageId must be set and non-empty before inserting a PackageDetailsCatalogLeaf. " +
203+
"The NuGet catalog leaf should always provide a valid PackageId.");
165204
}
166205

167206
leaf.PackageIdLowered = leaf.PackageId.ToLowerInvariant();
@@ -171,17 +210,42 @@ private async Task ProcessPackageDetailsIndividualAsync(PackageDetailsCatalogLea
171210
{
172211
await Save(token);
173212
}
174-
catch (DbUpdateException ex) when (IsDuplicateKeyException(ex))
213+
catch (DbUpdateException ex)
175214
{
176-
// Race condition: another process inserted the same package between our AnyAsync check
177-
// and SaveChangesAsync. This is harmless - the package exists, which is what we wanted.
178-
// Detach the entity to prevent cascading failures on subsequent saves.
215+
// Detach entity to prevent cascading failures
179216
Context.Entry(leaf).State = EntityState.Detached;
180217

181-
_logger.LogDebug(
182-
"Package {PackageId} v{PackageVersion} already exists (concurrent insert), skipping.",
183-
leaf.PackageId,
184-
leaf.PackageVersion);
218+
if (IsDuplicateKeyException(ex))
219+
{
220+
// Race condition: another process inserted the same package between our AnyAsync check
221+
// and SaveChangesAsync. This is harmless - the package exists, which is what we wanted.
222+
_logger.LogDebug(
223+
"Package {PackageId} v{PackageVersion} already exists (concurrent insert), skipping.",
224+
leaf.PackageId,
225+
leaf.PackageVersion);
226+
}
227+
else if (IsNotNullViolationException(ex))
228+
{
229+
_logger.LogError(ex,
230+
"NOT NULL constraint violation for package {PackageId} v{PackageVersion}. " +
231+
"PackageIdLowered={PackageIdLowered}",
232+
leaf.PackageId,
233+
leaf.PackageVersion,
234+
leaf.PackageIdLowered);
235+
}
236+
else if (IsConstraintViolationException(ex))
237+
{
238+
// Other constraint violations (foreign key, check constraint, etc.)
239+
_logger.LogError(ex,
240+
"Database constraint violation for package {PackageId} v{PackageVersion}",
241+
leaf.PackageId,
242+
leaf.PackageVersion);
243+
}
244+
else
245+
{
246+
// Non-constraint errors (timeouts, connection errors, deadlocks) should fail the job
247+
throw;
248+
}
185249
}
186250
}
187251
}
@@ -191,6 +255,17 @@ private static bool IsDuplicateKeyException(DbUpdateException ex)
191255
return ex.InnerException is PostgresException { SqlState: PostgresUniqueViolationCode };
192256
}
193257

258+
private static bool IsNotNullViolationException(DbUpdateException ex)
259+
{
260+
return ex.InnerException is PostgresException { SqlState: PostgresNotNullViolationCode };
261+
}
262+
263+
private static bool IsConstraintViolationException(DbUpdateException ex)
264+
{
265+
return ex.InnerException is PostgresException pgEx &&
266+
(pgEx.SqlState?.StartsWith(PostgresConstraintViolationPrefix, StringComparison.Ordinal) ?? false);
267+
}
268+
194269
/// <summary>
195270
/// Case-insensitive package key for efficient lookup in HashSet.
196271
/// </summary>

0 commit comments

Comments
 (0)