Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions documentation/en/configuration/metrics-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,6 @@
This document lists all Prometheus metrics exported by Curio. All metrics use the `curio_` namespace prefix.

> **Note**: This file is auto-generated from source code. Run `make docsgen-metrics` to update.
## Database Metrics (HarmonyDB)

| Metric | Type | Description |
|--------|------|-------------|
| `curio_db_errors` | gauge/counter | Total error count. |
| `curio_db_hits` | gauge/counter | Total number of uses. |
| `curio_db_open_connections` | gauge/counter | Total connection count. |
| `curio_db_total_wait` | gauge/counter | Total delay. A numerator over hits to get average wait. |
| `curio_db_waits` | histogram | The histogram of waits for query completions. |
| `curio_db_which_host` | histogram | The index of the hostname being used |

## Task Metrics (HarmonyTask)

| Metric | Type | Description |
Expand Down
18 changes: 18 additions & 0 deletions documentation/en/design/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,21 @@ To address these issues in Curio, we have implemented a GPU picker library calle
<figure><img src="../.gitbook/assets/2024-06-04-040735_1470x522_scrot (1).png" alt=""><figcaption><p>Curio FFISelect in action</p></figcaption></figure>

This approach ensures efficient and conflict-free GPU usage, with each task being handled by a dedicated GPU, thus resolving the historical issues observed with the `lotus-miner` scheduler.

# Security Boundary

This is what Curio expects an SP to secure in order to have a safe experience.
Curio is cluster software which coordinates directly and through the database. It also communicates to the public through chain providers (Lotus) and the market node. To secure this properly, ensure that only trusted people & services have access to:
- logs: (these include inputs to failing processes)
- physical machines,
- virtual machine access (ssh) for Curio, Lotus, or Yugabyte
- Curio or Lotus' or Yugabyte's open ports (with exceptions noted by Lotus, and the Curio market node)
-- This includes the admin web ui for Curio which exposes numerous capabilities beyond viewing.
Comment thread
snadrus marked this conversation as resolved.

Safe to share with untrusted parties: (will not receive private information)
- Prometheus output
- alerts can be sent to untrusted receivers
- CuView (at your own risk) has modes for light investigation.

Curio team recommends a network (VPN) containing all the pieces to have limited access.
Logs are mostly clean except for errors which try to be as specific as possible, so partial redaction may be best here if sharing with untrusted parties.
3 changes: 2 additions & 1 deletion market/mk12/mk12_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,8 @@ func GetDealStatus(ctx context.Context, db *harmonydb.DB, req DealStatusRequest,
uuid = $1;`, req.DealUUID)

if err != nil {
return errResp(fmt.Sprintf("failed to query the db for deal status: %s", err))
reqLog.Errorw("failed to query deal status", "err", err)
return errResp("failed to query the db for deal status")
}

if len(pdeals) > 1 {
Expand Down
4 changes: 2 additions & 2 deletions market/mk20/http/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func AuthMiddleware(db *harmonydb.DB, cfg *config.CurioConfig) func(http.Handler
allowed, client, err := mk20.Auth(authHeader, db, cfg)
if err != nil {
log.Errorw("failed to authenticate request", "err", err)
http.Error(w, "Error during authentication: "+err.Error(), http.StatusInternalServerError)
http.Error(w, "Error during authentication", http.StatusInternalServerError)
return
}

Expand All @@ -86,7 +86,7 @@ func AuthMiddleware(db *harmonydb.DB, cfg *config.CurioConfig) func(http.Handler
allowed, err := mk20.AuthenticateClient(db, idStr, client)
if err != nil {
log.Errorw("failed to authenticate client", "err", err)
http.Error(w, err.Error(), http.StatusUnauthorized)
http.Error(w, "Error during authentication", http.StatusInternalServerError)
return
}
if !allowed {
Expand Down
65 changes: 43 additions & 22 deletions pdp/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ func (p *PDPService) handleCreateProofSet(w http.ResponseWriter, r *http.Request
// Step 3: Get the sender address from 'eth_keys' table where role = 'pdp' limit 1
fromAddress, err := p.getSenderAddress(ctx)
if err != nil {
http.Error(w, "Failed to get sender address: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to get sender address: %v", err)
http.Error(w, "Failed to get sender address", http.StatusInternalServerError)
return
}

Expand Down Expand Up @@ -364,7 +365,8 @@ func (p *PDPService) handleGetProofSetCreationStatus(w http.ResponseWriter, r *h
http.Error(w, "Proof set creation not found for given txHash", http.StatusNotFound)
return
}
http.Error(w, "Failed to query proof set creation: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to query proof set creation: %v", err)
http.Error(w, "Failed to query proof set creation", http.StatusInternalServerError)
return
}

Expand Down Expand Up @@ -402,7 +404,8 @@ func (p *PDPService) handleGetProofSetCreationStatus(w http.ResponseWriter, r *h
http.Error(w, "Message status not found for given txHash", http.StatusInternalServerError)
return
}
http.Error(w, "Failed to query message status: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to query message status: %v", err)
http.Error(w, "Failed to query message status", http.StatusInternalServerError)
return
}

Expand All @@ -422,7 +425,8 @@ func (p *PDPService) handleGetProofSetCreationStatus(w http.ResponseWriter, r *h
http.Error(w, "Proof set not found despite proofset_created = true", http.StatusInternalServerError)
return
}
http.Error(w, "Failed to query proof set: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to query proof set: %v", err)
http.Error(w, "Failed to query proof set", http.StatusInternalServerError)
return
}
response.ProofSetId = &proofSetId
Expand Down Expand Up @@ -485,7 +489,8 @@ func (p *PDPService) handleGetProofSet(w http.ResponseWriter, r *http.Request) {
http.Error(w, "Proof set not found", http.StatusNotFound)
return
}
http.Error(w, "Failed to retrieve proof set: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to retrieve proof set: %v", err)
http.Error(w, "Failed to retrieve proof set", http.StatusInternalServerError)
return
}

Expand All @@ -510,7 +515,8 @@ func (p *PDPService) handleGetProofSet(w http.ResponseWriter, r *http.Request) {
ORDER BY root_id, subroot_offset
`, proofSetId)
if err != nil {
http.Error(w, "Failed to retrieve proof set roots: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to retrieve proof set roots: %v", err)
http.Error(w, "Failed to retrieve proof set roots", http.StatusInternalServerError)
return
}

Expand All @@ -522,7 +528,8 @@ func (p *PDPService) handleGetProofSet(w http.ResponseWriter, r *http.Request) {
WHERE id = $1
`, proofSetId).Scan(&nextChallengeEpoch)
if err != nil {
http.Error(w, "Failed to retrieve next challenge epoch: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to retrieve next challenge epoch: %v", err)
http.Error(w, "Failed to retrieve next challenge epoch", http.StatusInternalServerError)
return
}

Expand Down Expand Up @@ -615,7 +622,8 @@ func (p *PDPService) handleAddRootToProofSet(w http.ResponseWriter, r *http.Requ
http.Error(w, "Proof set not found", http.StatusNotFound)
return
}
http.Error(w, "Failed to retrieve proof set: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to retrieve proof set: %v", err)
http.Error(w, "Failed to retrieve proof set", http.StatusInternalServerError)
return
}

Expand Down Expand Up @@ -725,7 +733,8 @@ func (p *PDPService) handleAddRootToProofSet(w http.ResponseWriter, r *http.Requ
WHERE ppr.service = $1 AND ppr.piece_cid = ANY($2)
`, serviceLabel, subrootCIDsList)
if err != nil {
return false, err
log.Errorf("Failed to retrieve pdp_piecerefs: %v", err)
return false, fmt.Errorf("failed to retrieve pdp_piecerefs")
}
defer rows.Close()

Expand Down Expand Up @@ -811,7 +820,8 @@ func (p *PDPService) handleAddRootToProofSet(w http.ResponseWriter, r *http.Requ
return true, nil
}, harmonydb.OptionRetry())
if err != nil {
http.Error(w, "Failed to validate subroots: "+err.Error(), http.StatusBadRequest)
log.Errorf("Failed to validate subroots: %v", err)
http.Error(w, "Failed to validate subroots", http.StatusBadRequest)
return
}

Expand Down Expand Up @@ -876,7 +886,8 @@ func (p *PDPService) handleAddRootToProofSet(w http.ResponseWriter, r *http.Requ
// Step 7: Get the sender address from 'eth_keys' table where role = 'pdp' limit 1
fromAddress, err := p.getSenderAddress(ctx)
if err != nil {
http.Error(w, "Failed to get sender address: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to get sender address: %v", err)
http.Error(w, "Failed to get sender address", http.StatusInternalServerError)
return
}

Expand Down Expand Up @@ -919,7 +930,7 @@ func (p *PDPService) handleAddRootToProofSet(w http.ResponseWriter, r *http.Requ
log.Errorw("Failed to insert AddRoots into message_waits_eth",
"txHash", txHashLower,
"error", err)
return false, err // Return false to rollback the transaction
return false, errors.New("failed to insert AddRoots into message_waits_eth") // Return false to rollback the transaction
}

// Update proof set for initialization upon first add
Expand All @@ -928,7 +939,8 @@ func (p *PDPService) handleAddRootToProofSet(w http.ResponseWriter, r *http.Requ
WHERE id = $1 AND prev_challenge_request_epoch IS NULL AND challenge_request_msg_hash IS NULL AND prove_at_epoch IS NULL
`, proofSetIDUint64)
if err != nil {
return false, err
log.Errorf("Failed to update proof set for initialization upon first add: %v", err)
return false, errors.New("failed to update proof set for initialization upon first add")
}

// Insert into pdp_proofset_roots
Expand Down Expand Up @@ -961,7 +973,8 @@ func (p *PDPService) handleAddRootToProofSet(w http.ResponseWriter, r *http.Requ
subrootInfo.PDPPieceRefID,
)
if err != nil {
return false, err
log.Errorf("Failed to insert into pdp_proofset_roots: %v", err)
return false, errors.New("failed to insert into pdp_proofset_roots")
}
}
}
Expand Down Expand Up @@ -1039,7 +1052,8 @@ func (p *PDPService) handleGetRootAdditionStatus(w http.ResponseWriter, r *http.
http.Error(w, "Proof set not found", http.StatusNotFound)
return
}
http.Error(w, "Failed to retrieve proof set: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to retrieve proof set: %v", err)
http.Error(w, "Failed to retrieve proof set", http.StatusInternalServerError)
return
}

Expand Down Expand Up @@ -1069,7 +1083,8 @@ func (p *PDPService) handleGetRootAdditionStatus(w http.ResponseWriter, r *http.
ORDER BY add_message_index, subroot_offset
`, proofSetID, txHash)
if err != nil {
http.Error(w, "Failed to query root additions: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to query root additions: %v", err)
http.Error(w, "Failed to query root additions", http.StatusInternalServerError)
return
}

Expand All @@ -1088,7 +1103,8 @@ func (p *PDPService) handleGetRootAdditionStatus(w http.ResponseWriter, r *http.
http.Error(w, "Transaction status not found", http.StatusNotFound)
return
}
http.Error(w, "Failed to query transaction status: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to query transaction status: %v", err)
http.Error(w, "Failed to query transaction status", http.StatusInternalServerError)
return
}

Expand Down Expand Up @@ -1216,7 +1232,8 @@ func (p *PDPService) handleDeleteProofSetRoot(w http.ResponseWriter, r *http.Req
http.Error(w, "Proof set not found", http.StatusNotFound)
return
}
http.Error(w, "Failed to retrieve proof set: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to retrieve proof set: %v", err)
http.Error(w, "Failed to retrieve proof set", http.StatusInternalServerError)
return
}

Expand Down Expand Up @@ -1247,7 +1264,8 @@ func (p *PDPService) handleDeleteProofSetRoot(w http.ResponseWriter, r *http.Req
// Get the sender address
fromAddress, err := p.getSenderAddress(ctx)
if err != nil {
http.Error(w, "Failed to get sender address: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to get sender address: %v", err)
http.Error(w, "Failed to get sender address", http.StatusInternalServerError)
return
}

Expand Down Expand Up @@ -1285,7 +1303,8 @@ func (p *PDPService) handleDeleteProofSetRoot(w http.ResponseWriter, r *http.Req
return true, nil
}, harmonydb.OptionRetry())
if err != nil {
http.Error(w, "Failed to schedule delete root: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to schedule delete root: %v", err)
http.Error(w, "Failed to schedule delete root", http.StatusInternalServerError)
return
}

Expand Down Expand Up @@ -1341,7 +1360,8 @@ func (p *PDPService) handleGetProofSetRoot(w http.ResponseWriter, r *http.Reques
http.Error(w, "Root not found", http.StatusNotFound)
return
}
http.Error(w, "Failed to retrieve root: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to retrieve root: %v", err)
http.Error(w, "Failed to retrieve root", http.StatusInternalServerError)
return
}

Expand All @@ -1359,7 +1379,8 @@ func (p *PDPService) handleGetProofSetRoot(w http.ResponseWriter, r *http.Reques
ORDER BY subroot_offset
`, proofSetID, rootID)
if err != nil {
http.Error(w, "Failed to retrieve subroots: "+err.Error(), http.StatusInternalServerError)
log.Errorf("Failed to retrieve subroots: %v", err)
http.Error(w, "Failed to retrieve subroots", http.StatusInternalServerError)
return
}

Expand Down
Loading