fix: support missing OpenAI env vars

dacharyc · dacharyc · commit a4505347cb19 · 2026-04-19T16:38:37.000-04:00
diff --git a/README.md b/README.md
@@ -334,11 +334,13 @@ skill-validator score evaluate --provider claude-cli <path>
 | Provider | Env var | Default model | Covers |
 |---|---|---|---|
 | `anthropic` (default) | `ANTHROPIC_API_KEY` | `claude-sonnet-4-5-20250929` | Anthropic |
-| `openai` | `OPENAI_API_KEY` | `gpt-5.2` | OpenAI, Ollama, Together, Groq, Azure, etc. |
+| `openai` | `OPENAI_API_KEY` | `gpt-5.2` | OpenAI, Ollama, Together, Groq, Azure, etc. \*\* |
 | `claude-cli` | _(none)_ | `sonnet` | Claude CLI (uses locally authenticated `claude` binary) \* |
 
 \* **Accuracy note:** The `claude-cli` provider shells out to the `claude` CLI, which loads local context (CLAUDE.md files, project memory, rules) into each scoring call. This extra context may influence scores, making them less reproducible across environments compared to the API-based providers. For the most consistent results, use the `anthropic` or `openai` providers with an API key.
 
+\*\* The `openai` provider also reads these optional environment variables: `OPENAI_BASE_URL` (API base URL, overridden by `--base-url`), `OPENAI_ORG_ID` (sent as `OpenAI-Organization` header), and `OPENAI_PROJECT_ID` (sent as `OpenAI-Project` header). The org/project headers are only sent when targeting an OpenAI endpoint (e.g. `api.openai.com`, `us.api.openai.com`), not when using third-party compatible APIs.
+
 Use `--model` to override the default model and `--base-url` to point at any OpenAI-compatible endpoint (e.g. `http://localhost:11434/v1` for Ollama). If the endpoint requires a specific token limit parameter, use `--max-tokens-style` to override auto-detection:
 
 | Value | Behavior |
diff --git a/cmd/score_evaluate.go b/cmd/score_evaluate.go
@@ -38,8 +38,13 @@ The path can be:
   - A specific .md file — scores just that reference file
 
 Requires an API key via environment variable:
-  ANTHROPIC_API_KEY (for --provider anthropic, the default)
-  OPENAI_API_KEY    (for --provider openai)
+  ANTHROPIC_API_KEY  (for --provider anthropic, the default)
+  OPENAI_API_KEY     (for --provider openai)
+
+Optional OpenAI environment variables:
+  OPENAI_BASE_URL    (API base URL; overridden by --base-url flag)
+  OPENAI_ORG_ID      (organization ID; sent as OpenAI-Organization header)
+  OPENAI_PROJECT_ID  (project ID; sent as OpenAI-Project header)
 
 The claude-cli provider uses the locally installed "claude" CLI and does not
 require an API key. This is useful when the CLI is already authenticated
@@ -88,12 +93,25 @@ func runScoreEvaluate(cmd *cobra.Command, args []string) error {
 		}
 	}
 
+	// For the openai provider, read optional env vars for base URL, org, and project.
+	baseURL := evalBaseURL
+	var orgID, projectID string
+	if strings.ToLower(evalProvider) == "openai" {
+		if baseURL == "" {
+			baseURL = os.Getenv("OPENAI_BASE_URL")
+		}
+		orgID = os.Getenv("OPENAI_ORG_ID")
+		projectID = os.Getenv("OPENAI_PROJECT_ID")
+	}
+
 	client, err := judge.NewClient(judge.ClientOptions{
 		Provider:       evalProvider,
 		APIKey:         apiKey,
-		BaseURL:        evalBaseURL,
+		BaseURL:        baseURL,
 		Model:          evalModel,
 		MaxTokensStyle: evalMaxTokensStyle,
+		OrgID:          orgID,
+		ProjectID:      projectID,
 	})
 	if err != nil {
 		return err
diff --git a/judge/client.go b/judge/client.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"net/url"
 	"os/exec"
 	"strings"
 	"time"
@@ -38,6 +39,8 @@ type ClientOptions struct {
 	Model             string // Optional; defaults per provider
 	MaxTokensStyle    string // "auto", "max_tokens", or "max_completion_tokens"
 	MaxResponseTokens int    // Maximum tokens in the LLM response; 0 defaults to 500
+	OrgID             string // Optional OpenAI organization ID; sent as OpenAI-Organization header
+	ProjectID         string // Optional OpenAI project ID; sent as OpenAI-Project header
 }
 
 // NewClient creates an LLMClient for the given options.
@@ -84,7 +87,7 @@ func NewClient(opts ClientOptions) (LLMClient, error) {
 			baseURL = "https://api.openai.com/v1"
 		}
 		baseURL = strings.TrimRight(baseURL, "/")
-		return &openaiClient{apiKey: opts.APIKey, baseURL: baseURL, model: model, maxTokensStyle: opts.MaxTokensStyle, maxTokens: maxResp}, nil
+		return &openaiClient{apiKey: opts.APIKey, baseURL: baseURL, model: model, maxTokensStyle: opts.MaxTokensStyle, maxTokens: maxResp, orgID: opts.OrgID, projectID: opts.ProjectID}, nil
 	default:
 		return nil, fmt.Errorf("unsupported provider %q (use \"anthropic\", \"openai\", or \"claude-cli\")", opts.Provider)
 	}
@@ -186,11 +189,25 @@ type openaiClient struct {
 	model          string
 	maxTokensStyle string
 	maxTokens      int
+	orgID          string
+	projectID      string
 }
 
 func (c *openaiClient) Provider() string  { return "openai" }
 func (c *openaiClient) ModelName() string { return c.model }
 
+// isOpenAIHost reports whether the given base URL points to an OpenAI endpoint
+// (api.openai.com, us.api.openai.com, etc.) as opposed to a third-party
+// compatible API like Ollama or vLLM.
+func isOpenAIHost(baseURL string) bool {
+	u, err := url.Parse(baseURL)
+	if err != nil {
+		return false
+	}
+	host := u.Hostname()
+	return strings.HasSuffix(host, ".openai.com")
+}
+
 type openaiRequest struct {
 	Model               string          `json:"model"`
 	MaxTokens           int             `json:"max_tokens,omitempty"`
@@ -266,6 +283,17 @@ func (c *openaiClient) Complete(ctx context.Context, systemPrompt, userContent s
 	req.Header.Set("Content-Type", "application/json")
 	req.Header.Set("Authorization", "Bearer "+c.apiKey)
 
+	// Only send OpenAI-specific org/project headers when targeting an
+	// OpenAI endpoint, so they don't leak to Ollama or other compatible APIs.
+	if isOpenAIHost(c.baseURL) {
+		if c.orgID != "" {
+			req.Header.Set("OpenAI-Organization", c.orgID)
+		}
+		if c.projectID != "" {
+			req.Header.Set("OpenAI-Project", c.projectID)
+		}
+	}
+
 	resp, err := defaultHTTPClient.Do(req)
 	if err != nil {
 		return "", fmt.Errorf("API request failed: %w", err)
diff --git a/judge/client_test.go b/judge/client_test.go
@@ -5,8 +5,10 @@ import (
 	"fmt"
 	"net/http"
 	"net/http/httptest"
+	"net/url"
 	"strings"
 	"testing"
+	"time"
 )
 
 // stubLookPath replaces the lookPath variable for the duration of a test,
@@ -140,6 +142,146 @@ func TestUseMaxCompletionTokens(t *testing.T) {
 	}
 }
 
+func TestIsOpenAIHost(t *testing.T) {
+	tests := []struct {
+		baseURL string
+		want    bool
+	}{
+		{"https://api.openai.com/v1", true},
+		{"https://us.api.openai.com/v1", true},
+		{"https://eu.api.openai.com/v1", true},
+		{"http://localhost:11434/v1", false},
+		{"https://my-proxy.example.com/v1", false},
+		{"https://notopenai.com/v1", false},
+		{"not a url", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.baseURL, func(t *testing.T) {
+			got := isOpenAIHost(tt.baseURL)
+			if got != tt.want {
+				t.Errorf("isOpenAIHost(%q) = %v, want %v", tt.baseURL, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestOpenAIClient_OrgProjectHeaders(t *testing.T) {
+	t.Run("headers sent for openai.com", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if got := r.Header.Get("OpenAI-Organization"); got != "org-123" {
+				t.Errorf("OpenAI-Organization = %q, want %q", got, "org-123")
+			}
+			if got := r.Header.Get("OpenAI-Project"); got != "proj-456" {
+				t.Errorf("OpenAI-Project = %q, want %q", got, "proj-456")
+			}
+			w.Header().Set("Content-Type", "application/json")
+			_, _ = fmt.Fprint(w, `{"choices": [{"message": {"content": "ok"}}]}`)
+		}))
+		defer server.Close()
+
+		// The test server isn't on openai.com, so we construct the client directly
+		// to test the header logic with an openai.com baseURL that actually points
+		// at the test server. Instead, we test the client with the test server URL
+		// and verify via a different approach: construct the openaiClient directly.
+		c := &openaiClient{
+			apiKey:    "test-key",
+			baseURL:   "https://api.openai.com/v1",
+			model:     "gpt-4o",
+			maxTokens: 500,
+			orgID:     "org-123",
+			projectID: "proj-456",
+		}
+
+		// Override defaultHTTPClient to proxy to test server
+		origClient := defaultHTTPClient
+		defer func() { defaultHTTPClient = origClient }()
+
+		// Use a custom transport that rewrites the host to the test server
+		testURL := server.URL
+		defaultHTTPClient = &http.Client{
+			Timeout:   5 * time.Second,
+			Transport: &rewriteTransport{target: testURL},
+		}
+
+		_, err := c.Complete(t.Context(), "system", "user")
+		if err != nil {
+			t.Fatalf("Complete failed: %v", err)
+		}
+	})
+
+	t.Run("headers not sent for custom base URL", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if got := r.Header.Get("OpenAI-Organization"); got != "" {
+				t.Errorf("OpenAI-Organization should be empty for non-OpenAI host, got %q", got)
+			}
+			if got := r.Header.Get("OpenAI-Project"); got != "" {
+				t.Errorf("OpenAI-Project should be empty for non-OpenAI host, got %q", got)
+			}
+			w.Header().Set("Content-Type", "application/json")
+			_, _ = fmt.Fprint(w, `{"choices": [{"message": {"content": "ok"}}]}`)
+		}))
+		defer server.Close()
+
+		client, err := NewClient(ClientOptions{
+			Provider:  "openai",
+			APIKey:    "test-key",
+			BaseURL:   server.URL,
+			Model:     "llama3",
+			OrgID:     "org-123",
+			ProjectID: "proj-456",
+		})
+		if err != nil {
+			t.Fatalf("NewClient: %v", err)
+		}
+		_, err = client.Complete(t.Context(), "system", "user")
+		if err != nil {
+			t.Fatalf("Complete failed: %v", err)
+		}
+	})
+
+	t.Run("empty org and project not sent", func(t *testing.T) {
+		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			if got := r.Header.Get("OpenAI-Organization"); got != "" {
+				t.Errorf("OpenAI-Organization should be empty, got %q", got)
+			}
+			if got := r.Header.Get("OpenAI-Project"); got != "" {
+				t.Errorf("OpenAI-Project should be empty, got %q", got)
+			}
+			w.Header().Set("Content-Type", "application/json")
+			_, _ = fmt.Fprint(w, `{"choices": [{"message": {"content": "ok"}}]}`)
+		}))
+		defer server.Close()
+
+		client, err := NewClient(ClientOptions{
+			Provider: "openai",
+			APIKey:   "test-key",
+			BaseURL:  server.URL,
+			Model:    "gpt-4o",
+		})
+		if err != nil {
+			t.Fatalf("NewClient: %v", err)
+		}
+		_, err = client.Complete(t.Context(), "system", "user")
+		if err != nil {
+			t.Fatalf("Complete failed: %v", err)
+		}
+	})
+}
+
+// rewriteTransport rewrites requests to a different target URL while
+// preserving the original Host header for testing.
+type rewriteTransport struct {
+	target string
+}
+
+func (t *rewriteTransport) RoundTrip(req *http.Request) (*http.Response, error) {
+	targetURL, _ := url.Parse(t.target)
+	req.URL.Scheme = targetURL.Scheme
+	req.URL.Host = targetURL.Host
+	return http.DefaultTransport.RoundTrip(req)
+}
+
 func TestMaxTokensStyleOverride(t *testing.T) {
 	tests := []struct {
 		name           string