Skip to content

Commit 8f38b3d

Browse files
authored
fix(internal/godocfx): don't save timestamps until modules are successfully processed (#11563)
1 parent 72046f5 commit 8f38b3d

3 files changed

Lines changed: 99 additions & 67 deletions

File tree

internal/godocfx/index.go

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,23 @@ import (
2020
"encoding/json"
2121
"log"
2222
"net/http"
23+
"net/url"
2324
"strings"
2425
"time"
2526
)
2627

27-
// indexer gets a limited list of entries from index.golang.org.
28-
type indexer interface {
29-
get(prefixes []string, since time.Time) (entries []indexEntry, last time.Time, err error)
28+
func mustParse(s string) *url.URL {
29+
u, err := url.Parse(s)
30+
if err != nil {
31+
log.Fatalf("Failed to parse URL: %q", s)
32+
}
33+
return u
3034
}
3135

3236
// indexClient is used to access index.golang.org.
33-
type indexClient struct{}
34-
35-
var _ indexer = indexClient{}
37+
type indexClient struct {
38+
indexURL string
39+
}
3640

3741
// indexEntry represents a line in the output of index.golang.org/index.
3842
type indexEntry struct {
@@ -41,47 +45,45 @@ type indexEntry struct {
4145
Timestamp time.Time
4246
}
4347

44-
// newModules returns the new modules with the given prefix.
48+
// get returns modules with the given prefix that were published since the given timestamp.
4549
//
46-
// newModules uses index.golang.org/index?since=timestamp to find new module
47-
// versions since the given timestamp.
50+
// get uses index.golang.org/index?since=timestamp to find new module versions.
4851
//
49-
// newModules stores the timestamp of the last successful run with tSaver.
50-
func newModules(ctx context.Context, i indexer, tSaver timeSaver, prefixes []string) ([]indexEntry, error) {
51-
since, err := tSaver.get(ctx)
52-
if err != nil {
53-
return nil, err
54-
}
52+
// get returns the entries and the timestamp of the last seen entry, which may be later
53+
// than the timestamp of the last returned entry.
54+
func (ic indexClient) get(ctx context.Context, prefixes []string, since time.Time) ([]indexEntry, time.Time, error) {
5555
fiveMinAgo := time.Now().Add(-5 * time.Minute).UTC() // When to stop processing.
5656
entries := []indexEntry{}
5757
log.Printf("Fetching index.golang.org entries since %s", since.Format(time.RFC3339))
58-
count := 0
59-
for {
60-
count++
58+
pages := 0
59+
lastSeen := since
60+
for !lastSeen.After(fiveMinAgo) {
61+
log.Printf("last seen: %v", lastSeen.Format(time.RFC3339))
62+
pages++
6163
var cur []indexEntry
62-
cur, since, err = i.get(prefixes, since)
64+
var err error
65+
cur, lastSeen, err = ic.getPage(prefixes, lastSeen)
6366
if err != nil {
64-
return nil, err
67+
return nil, time.Time{}, err
6568
}
6669
entries = append(entries, cur...)
67-
if since.After(fiveMinAgo) {
68-
break
69-
}
70-
}
71-
log.Printf("Parsed %d index.golang.org pages up to %s", count, since.Format(time.RFC3339))
72-
if err := tSaver.put(ctx, since); err != nil {
73-
return nil, err
7470
}
71+
log.Printf("Parsed %d index.golang.org pages from %s to %s", pages, since.Format(time.RFC3339), lastSeen.Format(time.RFC3339))
7572

76-
return entries, nil
73+
return entries, lastSeen, nil
7774
}
7875

7976
// get fetches a single chronological page of modules from
8077
// index.golang.org/index.
81-
func (indexClient) get(prefixes []string, since time.Time) ([]indexEntry, time.Time, error) {
78+
func (ic indexClient) getPage(prefixes []string, since time.Time) ([]indexEntry, time.Time, error) {
8279
entries := []indexEntry{}
83-
sinceString := since.Format(time.RFC3339)
84-
resp, err := http.Get("https://index.golang.org/index?since=" + sinceString)
80+
81+
u := mustParse(ic.indexURL)
82+
q := u.Query()
83+
q.Set("since", since.Format(time.RFC3339))
84+
u.RawQuery = q.Encode()
85+
log.Printf("Fetching %s", u.String())
86+
resp, err := http.Get(u.String())
8587
if err != nil {
8688
return nil, time.Time{}, err
8789
}

internal/godocfx/index_test.go

Lines changed: 43 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,48 +16,64 @@ package main
1616

1717
import (
1818
"context"
19+
"fmt"
20+
"net/http"
21+
"net/http/httptest"
1922
"testing"
2023
"time"
2124
)
2225

2326
const wantEntries = 5
2427

25-
type fakeIC struct{}
28+
// responses is used for faking HTTP responses. Format each value with a timestamp based on whether get() should
29+
// fetch another page.
30+
var responses = []string{
31+
`{"Path":"golang.org/x/net","Version":"v0.0.0-20180627171509-e514e69ffb8b","Timestamp":"2019-04-10T20:30:42.413493Z"}
32+
{"Path":"golang.org/x/exp/notary","Version":"v0.0.0-20190409044807-56b785ea58b2","Timestamp":"2019-04-10T20:37:01.409908Z"}
33+
{"Path":"golang.org/x/crypto","Version":"v0.0.0-20181025213731-e84da0312774","Timestamp":"2019-04-10T20:40:43.408586Z"}
34+
{"Path":"golang.org/x/net","Version":"v0.0.0-20181213202711-891ebc4b82d6","Timestamp":"2019-04-10T20:40:54.12906Z"}
35+
{"Path":"cloud.google.com/go","Version":"v1.0.0","Timestamp":"%s"}`,
2636

27-
func (f fakeIC) get(prefixes []string, since time.Time) (entries []indexEntry, last time.Time, err error) {
28-
e := indexEntry{Timestamp: since.Add(24 * time.Hour)}
29-
return []indexEntry{e}, e.Timestamp, nil
37+
`{"Path":"cloud.google.com/go/storage","Version":"v1.0.0","Timestamp":"2020-04-10T20:40:43.408586Z"}
38+
{"Path":"cloud.google.com/go/bigquery","Version":"v1.0.0","Timestamp":"2020-04-10T20:40:43.408586Z"}
39+
{"Path":"golang.org/x/crypto","Version":"v0.0.0-20181025213731-e84da0312774","Timestamp":"2020-04-10T20:40:43.408586Z"}
40+
{"Path":"cloud.google.com/go/spanner","Version":"v1.0.0","Timestamp":"2020-04-10T20:40:43.408586Z"}
41+
{"Path":"cloud.google.com/go","Version":"v1.0.0","Timestamp":"%s"}`,
3042
}
3143

32-
type fakeTS struct {
33-
getCalled, putCalled bool
34-
}
35-
36-
func (f *fakeTS) get(context.Context) (time.Time, error) {
37-
f.getCalled = true
38-
t := time.Now().Add(-wantEntries * 24 * time.Hour).UTC()
39-
return t, nil
40-
}
44+
func TestGet(t *testing.T) {
45+
numCalls := 0
46+
lastTime := time.Now().Add(-time.Minute).Format(time.RFC3339)
47+
s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
48+
numCalls++
49+
if numCalls == 1 {
50+
resp := []byte(fmt.Sprintf(responses[0], time.Now().Add(-time.Hour).Format(time.RFC3339)))
51+
w.Write(resp)
52+
return
53+
}
54+
if numCalls == 2 {
55+
resp := []byte(fmt.Sprintf(responses[1], lastTime))
56+
w.Write(resp)
57+
return
58+
}
59+
t.Fatalf("Unexpected call #%d to server", numCalls)
60+
}))
4161

42-
func (f *fakeTS) put(context.Context, time.Time) error {
43-
f.putCalled = true
44-
return nil
45-
}
62+
ic := indexClient{indexURL: s.URL}
4663

47-
func TestNewModules(t *testing.T) {
48-
ic := fakeIC{}
49-
ts := &fakeTS{}
50-
entries, err := newModules(context.Background(), ic, ts, []string{"cloud.google.com"})
64+
entries, last, err := ic.get(context.Background(), []string{"cloud.google.com"}, time.Time{})
5165
if err != nil {
52-
t.Fatalf("newModules got err: %v", err)
66+
t.Fatalf("get got err: %v", err)
5367
}
5468
if got, want := len(entries), wantEntries; got != want {
55-
t.Errorf("newModules got %d entries, want %d", got, want)
69+
t.Errorf("get got %d entries, want %d", got, want)
5670
}
57-
if !ts.getCalled {
58-
t.Errorf("fakeTS.get was never called")
71+
72+
want, err := time.Parse(time.RFC3339, lastTime)
73+
if err != nil {
74+
t.Fatalf("Failed to parse time: %v", err)
5975
}
60-
if !ts.putCalled {
61-
t.Errorf("fakeTS.put was never called")
76+
if !last.Equal(want) {
77+
t.Errorf("get got last time %v, want %v", last, lastTime)
6278
}
6379
}

internal/godocfx/main.go

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ import (
4040
"flag"
4141
"fmt"
4242
"io"
43-
"io/ioutil"
4443
"log"
4544
"os"
4645
"os/exec"
@@ -67,16 +66,25 @@ func main() {
6766
log.Fatalf("%s missing required argument: module path/prefix", os.Args[0])
6867
}
6968

69+
tSaver := &dsTimeSaver{projectID: *projectID}
70+
var lastSeen time.Time
71+
72+
ctx := context.Background()
7073
modNames := flag.Args()
7174
var mods []indexEntry
7275
if *newMods {
7376
if *projectID == "" {
7477
log.Fatal("Must set -project when using -new-modules")
7578
}
7679
var err error
77-
mods, err = newModules(context.Background(), indexClient{}, &dsTimeSaver{projectID: *projectID}, modNames)
80+
lastSeen, err = tSaver.get(ctx)
81+
if err != nil {
82+
log.Fatalf("Failed to get latest timestamp: %v", err)
83+
}
84+
ic := indexClient{indexURL: "https://index.golang.org/index"}
85+
mods, lastSeen, err = ic.get(ctx, modNames, lastSeen)
7886
if err != nil {
79-
log.Fatal(err)
87+
log.Fatalf("Failed to get from index: %v", err)
8088
}
8189
} else {
8290
for _, mod := range modNames {
@@ -108,10 +116,16 @@ func main() {
108116
namer := &friendlyAPINamer{
109117
metaURL: "https://raw.githubusercontent.com/googleapis/google-cloud-go/main/internal/.repo-metadata-full.json",
110118
}
111-
optionalExtraFiles := []string{}
112-
if ok := processMods(mods, *outDir, namer, optionalExtraFiles, *print); !ok {
119+
if ok := processMods(mods, *outDir, namer, nil, *print); !ok {
113120
os.Exit(1)
114121
}
122+
123+
// Only save the latest timestamp if we successfully processed all modules.
124+
if *newMods {
125+
if err := tSaver.put(ctx, lastSeen); err != nil {
126+
log.Fatalf("Failed to save latest timestamp: %v", err)
127+
}
128+
}
115129
}
116130

117131
func runCmd(dir, name string, args ...string) error {
@@ -121,19 +135,19 @@ func runCmd(dir, name string, args ...string) error {
121135
cmd.Stdout = os.Stdout
122136
cmd.Stderr = os.Stderr
123137
if err := cmd.Start(); err != nil {
124-
return fmt.Errorf("Start: %v", err)
138+
return fmt.Errorf("cmd.Start: %v", err)
125139
}
126140
if err := cmd.Wait(); err != nil {
127-
return fmt.Errorf("Wait: %s", err)
141+
return fmt.Errorf("cmd.Wait: %s", err)
128142
}
129143
return nil
130144
}
131145

132146
func processMods(mods []indexEntry, outDir string, namer *friendlyAPINamer, optionalExtraFiles []string, print bool) bool {
133147
// Create a temp module so we can get the exact version asked for.
134-
workingDir, err := ioutil.TempDir("", "godocfx-*")
148+
workingDir, err := os.MkdirTemp("", "godocfx-*")
135149
if err != nil {
136-
log.Fatalf("ioutil.TempDir: %v", err)
150+
log.Fatalf("os.MkdirTemp: %v", err)
137151
}
138152
// Use a fake module that doesn't start with cloud.google.com/go.
139153
runCmd(workingDir, "go", "mod", "init", "cloud.google.com/lets-build-some-docs")

0 commit comments

Comments
 (0)