Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -358,19 +358,23 @@ For more information on Cross Fork Object References, please [read our blog post

## 17. Scan Hugging Face

### Scan a Hugging Face Model, Dataset or Space
### Scan a Hugging Face Model, Dataset, Space or Bucket

```bash
trufflehog huggingface --model <model_id> --space <space_id> --dataset <dataset_id>
trufflehog huggingface \
--model <model_id> \
--dataset <dataset_id> \
--space <space_id> \
--bucket <bucket_id>
```

### Scan all Models, Datasets and Spaces belonging to a Hugging Face Organization or User
### Scan all Models, Datasets, Spaces and Buckets belonging to a Hugging Face Organization or User

```bash
trufflehog huggingface --org <orgname> --user <username>
```

(Optionally) When scanning an organization or user, you can skip an entire class of resources with `--skip-models`, `--skip-datasets`, `--skip-spaces` OR a particular resource with `--ignore-models <model_id>`, `--ignore-datasets <dataset_id>`, `--ignore-spaces <space_id>`.
(Optionally) When scanning an organization or user, you can skip an entire class of resources with `--skip-all-models`, `--skip-all-datasets`, `--skip-all-spaces`, `--skip-all-buckets` OR a particular resource with `--ignore-models <model_id>`, `--ignore-datasets <dataset_id>`, `--ignore-spaces <space_id>`, `--ignore-buckets <bucket_id>`.

### Scan Discussion and PR Comments

Expand Down Expand Up @@ -422,6 +426,7 @@ TruffleHog has a sub-command for each source of data that you may want to scan:
- git
- github
- gitlab
- huggingface
- docker
- s3
- filesystem (files and directories)
Expand Down
14 changes: 13 additions & 1 deletion docs/man/trufflehog.1
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ Jenkins password
Skip TLS verification
.SS
\fBhuggingface [<flags>]\fR
Find credentials in HuggingFace datasets, models and spaces.
Find credentials in HuggingFace datasets, models, spaces and buckets.
.TP
\fB--endpoint="https://huggingface.co"\fR
HuggingFace endpoint.
Expand All @@ -501,6 +501,9 @@ HuggingFace space to scan. You can repeat this flag. Example: 'username/space'
\fB--dataset=DATASET\fR
HuggingFace dataset to scan. You can repeat this flag. Example: 'username/dataset'
.TP
\fB--bucket=BUCKET\fR
HuggingFace bucket to scan. You can repeat this flag. Example: 'username/bucket'
.TP
\fB--org=ORG\fR
HuggingFace organization to scan. You can repeat this flag. Example: "trufflesecurity"
.TP
Expand Down Expand Up @@ -528,6 +531,12 @@ Spaces to ignore in scan. You can repeat this flag. Must use HuggingFace space f
\fB--ignore-datasets=IGNORE-DATASETS\fR
Datasets to ignore in scan. You can repeat this flag. Must use HuggingFace dataset full name. Example: 'username/dataset' (Only used with --user or --org)
.TP
\fB--include-buckets=INCLUDE-BUCKETS\fR
Buckets to include in scan. You can repeat this flag. Must use HuggingFace bucket full name. Example: 'username/bucket' (Only used with --user or --org)
.TP
\fB--ignore-buckets=IGNORE-BUCKETS\fR
Buckets to ignore in scan. You can repeat this flag. Must use HuggingFace bucket full name. Example: 'username/bucket' (Only used with --user or --org)
.TP
\fB--skip-all-models\fR
Skip all model scans. (Only used with --user or --org)
.TP
Expand All @@ -537,6 +546,9 @@ Skip all space scans. (Only used with --user or --org)
\fB--skip-all-datasets\fR
Skip all dataset scans. (Only used with --user or --org)
.TP
\fB--skip-all-buckets\fR
Skip all bucket scans. (Only used with --user or --org)
.TP
\fB--include-discussions\fR
Include discussions in scan.
.TP
Expand Down
14 changes: 11 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,11 +249,12 @@ var (
jenkinsPassword = jenkinsScan.Flag("password", "Jenkins password").Envar("JENKINS_PASSWORD").String()
jenkinsInsecureSkipVerifyTLS = jenkinsScan.Flag("insecure-skip-verify-tls", "Skip TLS verification").Envar("JENKINS_INSECURE_SKIP_VERIFY_TLS").Bool()

huggingfaceScan = cli.Command("huggingface", "Find credentials in HuggingFace datasets, models and spaces.")
huggingfaceScan = cli.Command("huggingface", "Find credentials in HuggingFace datasets, models, spaces and buckets.")
huggingfaceEndpoint = huggingfaceScan.Flag("endpoint", "HuggingFace endpoint.").Default("https://huggingface.co").String()
huggingfaceModels = huggingfaceScan.Flag("model", "HuggingFace model to scan. You can repeat this flag. Example: 'username/model'").Strings()
huggingfaceSpaces = huggingfaceScan.Flag("space", "HuggingFace space to scan. You can repeat this flag. Example: 'username/space'").Strings()
huggingfaceDatasets = huggingfaceScan.Flag("dataset", "HuggingFace dataset to scan. You can repeat this flag. Example: 'username/dataset'").Strings()
huggingfaceBuckets = huggingfaceScan.Flag("bucket", "HuggingFace bucket to scan. You can repeat this flag. Example: 'username/bucket'").Strings()
huggingfaceOrgs = huggingfaceScan.Flag("org", `HuggingFace organization to scan. You can repeat this flag. Example: "trufflesecurity"`).Strings()
huggingfaceUsers = huggingfaceScan.Flag("user", `HuggingFace user to scan. You can repeat this flag. Example: "trufflesecurity"`).Strings()
huggingfaceToken = huggingfaceScan.Flag("token", "HuggingFace token. Can be provided with environment variable HUGGINGFACE_TOKEN.").Envar("HUGGINGFACE_TOKEN").String()
Expand All @@ -264,9 +265,12 @@ var (
huggingfaceIgnoreModels = huggingfaceScan.Flag("ignore-models", "Models to ignore in scan. You can repeat this flag. Must use HuggingFace model full name. Example: 'username/model' (Only used with --user or --org)").Strings()
huggingfaceIgnoreSpaces = huggingfaceScan.Flag("ignore-spaces", "Spaces to ignore in scan. You can repeat this flag. Must use HuggingFace space full name. Example: 'username/space' (Only used with --user or --org)").Strings()
huggingfaceIgnoreDatasets = huggingfaceScan.Flag("ignore-datasets", "Datasets to ignore in scan. You can repeat this flag. Must use HuggingFace dataset full name. Example: 'username/dataset' (Only used with --user or --org)").Strings()
huggingfaceIncludeBuckets = huggingfaceScan.Flag("include-buckets", "Buckets to include in scan. You can repeat this flag. Must use HuggingFace bucket full name. Example: 'username/bucket' (Only used with --user or --org)").Strings()
huggingfaceIgnoreBuckets = huggingfaceScan.Flag("ignore-buckets", "Buckets to ignore in scan. You can repeat this flag. Must use HuggingFace bucket full name. Example: 'username/bucket' (Only used with --user or --org)").Strings()
huggingfaceSkipAllModels = huggingfaceScan.Flag("skip-all-models", "Skip all model scans. (Only used with --user or --org)").Bool()
huggingfaceSkipAllSpaces = huggingfaceScan.Flag("skip-all-spaces", "Skip all space scans. (Only used with --user or --org)").Bool()
huggingfaceSkipAllDatasets = huggingfaceScan.Flag("skip-all-datasets", "Skip all dataset scans. (Only used with --user or --org)").Bool()
huggingfaceSkipAllBuckets = huggingfaceScan.Flag("skip-all-buckets", "Skip all bucket scans. (Only used with --user or --org)").Bool()
huggingfaceIncludeDiscussions = huggingfaceScan.Flag("include-discussions", "Include discussions in scan.").Bool()
huggingfaceIncludePrs = huggingfaceScan.Flag("include-prs", "Include pull requests in scan.").Bool()

Expand Down Expand Up @@ -1118,27 +1122,31 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
*huggingfaceEndpoint = strings.TrimRight(*huggingfaceEndpoint, "/")
}

if len(*huggingfaceModels) == 0 && len(*huggingfaceSpaces) == 0 && len(*huggingfaceDatasets) == 0 && len(*huggingfaceOrgs) == 0 && len(*huggingfaceUsers) == 0 {
return scanMetrics, fmt.Errorf("invalid config: you must specify at least one organization, user, model, space or dataset")
if len(*huggingfaceModels) == 0 && len(*huggingfaceSpaces) == 0 && len(*huggingfaceDatasets) == 0 && len(*huggingfaceBuckets) == 0 && len(*huggingfaceOrgs) == 0 && len(*huggingfaceUsers) == 0 {
return scanMetrics, fmt.Errorf("invalid config: you must specify at least one organization, user, model, space, dataset or bucket")
}

cfg := engine.HuggingfaceConfig{
Endpoint: *huggingfaceEndpoint,
Models: *huggingfaceModels,
Spaces: *huggingfaceSpaces,
Datasets: *huggingfaceDatasets,
Buckets: *huggingfaceBuckets,
Organizations: *huggingfaceOrgs,
Users: *huggingfaceUsers,
Token: *huggingfaceToken,
IncludeModels: *huggingfaceIncludeModels,
IncludeSpaces: *huggingfaceIncludeSpaces,
IncludeDatasets: *huggingfaceIncludeDatasets,
IncludeBuckets: *huggingfaceIncludeBuckets,
IgnoreModels: *huggingfaceIgnoreModels,
IgnoreSpaces: *huggingfaceIgnoreSpaces,
IgnoreDatasets: *huggingfaceIgnoreDatasets,
IgnoreBuckets: *huggingfaceIgnoreBuckets,
SkipAllModels: *huggingfaceSkipAllModels,
SkipAllSpaces: *huggingfaceSkipAllSpaces,
SkipAllDatasets: *huggingfaceSkipAllDatasets,
SkipAllBuckets: *huggingfaceSkipAllBuckets,
IncludeDiscussions: *huggingfaceIncludeDiscussions,
IncludePrs: *huggingfaceIncludePrs,
Concurrency: *concurrency,
Expand Down
12 changes: 10 additions & 2 deletions pkg/engine/huggingface.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/sources/huggingface"
)

// HuggingFaceConfig represents the configuration for HuggingFace.
// HuggingfaceConfig represents the configuration for HuggingFace.
type HuggingfaceConfig struct {
Endpoint string
Models []string
Expand All @@ -24,16 +24,20 @@ type HuggingfaceConfig struct {
IgnoreSpaces []string
IncludeDatasets []string
IgnoreDatasets []string
Buckets []string
IncludeBuckets []string
IgnoreBuckets []string
SkipAllModels bool
SkipAllSpaces bool
SkipAllDatasets bool
SkipAllBuckets bool
IncludeDiscussions bool
IncludePrs bool
Token string
Concurrency int
}

// ScanGitHub scans HuggingFace with the provided options.
// ScanHuggingface scans HuggingFace with the provided options.
func (e *Engine) ScanHuggingface(ctx context.Context, c HuggingfaceConfig) (sources.JobProgressRef, error) {
connection := sourcespb.Huggingface{
Endpoint: c.Endpoint,
Expand All @@ -48,9 +52,13 @@ func (e *Engine) ScanHuggingface(ctx context.Context, c HuggingfaceConfig) (sour
IgnoreSpaces: c.IgnoreSpaces,
IncludeDatasets: c.IncludeDatasets,
IgnoreDatasets: c.IgnoreDatasets,
Buckets: c.Buckets,
IncludeBuckets: c.IncludeBuckets,
IgnoreBuckets: c.IgnoreBuckets,
SkipAllModels: c.SkipAllModels,
SkipAllSpaces: c.SkipAllSpaces,
SkipAllDatasets: c.SkipAllDatasets,
SkipAllBuckets: c.SkipAllBuckets,
IncludeDiscussions: c.IncludeDiscussions,
IncludePrs: c.IncludePrs,
}
Expand Down
Loading