Skip to content

Commit dffa884

Browse files
authored
Store indices to disk (#279)
1 parent eda7f03 commit dffa884

22 files changed

+968
-503
lines changed

include/Compiler/CompilationUnit.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#pragma once
22

3+
#include <chrono>
4+
35
#include "Directive.h"
46
#include "Compiler/Diagnostic.h"
57
#include "AST/SymbolID.h"
@@ -166,6 +168,10 @@ class CompilationUnit {
166168

167169
auto top_level_decls() -> llvm::ArrayRef<clang::Decl*>;
168170

171+
std::chrono::milliseconds build_at();
172+
173+
std::chrono::milliseconds build_duration();
174+
169175
clang::LangOptions& lang_options();
170176

171177
clang::ASTContext& context();

include/Index/IncludeGraph.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,15 @@ namespace clice::index {
1212

1313
struct IncludeLocation {
1414
/// The file path of the include directive.
15-
std::uint32_t path = -1;
15+
std::uint32_t path_id = -1;
1616

1717
/// The line number of the include directive, 1-based.
1818
std::uint32_t line = -1;
1919

2020
/// The include location that introduces this file.
2121
std::uint32_t include = -1;
22+
23+
friend bool operator== (const IncludeLocation&, const IncludeLocation&) = default;
2224
};
2325

2426
struct IncludeGraph {
@@ -51,7 +53,7 @@ struct IncludeGraph {
5153
std::uint32_t path_id(clang::FileID fid) {
5254
auto include = include_location_id(fid);
5355
if(include != -1) {
54-
return locations[include].path;
56+
return locations[include].path_id;
5557
} else {
5658
return paths.size() - 1;
5759
}

include/Index/MergedIndex.h

Lines changed: 52 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -1,156 +1,83 @@
11
#pragma once
22

33
#include "TUIndex.h"
4-
#include "Support/Bitmap.h"
54
#include "llvm/Support/Allocator.h"
5+
#include "llvm/Support/MemoryBuffer.h"
66

7-
namespace llvm {
8-
9-
template <typename... Ts>
10-
unsigned dense_hash(const Ts&... ts) {
11-
return llvm::DenseMapInfo<std::tuple<Ts...>>::getHashValue(std::tuple{ts...});
12-
}
13-
14-
template <>
15-
struct DenseMapInfo<clice::index::Occurrence> {
16-
using R = clice::LocalSourceRange;
17-
using V = clice::index::Occurrence;
18-
19-
inline static V getEmptyKey() {
20-
return V(R(-1, 0), 0);
21-
}
22-
23-
inline static V getTombstoneKey() {
24-
return V(R(-2, 0), 0);
25-
}
26-
27-
static auto getHashValue(const V& v) {
28-
return dense_hash(v.range.begin, v.range.end, v.target);
29-
}
30-
31-
static bool isEqual(const V& lhs, const V& rhs) {
32-
return lhs.range == rhs.range && lhs.target == rhs.target;
33-
}
34-
};
7+
namespace clice::index {
358

36-
template <>
37-
struct DenseMapInfo<clice::index::Relation> {
38-
using R = clice::index::Relation;
9+
class MergedIndex {
10+
private:
11+
struct Impl;
3912

40-
inline static R getEmptyKey() {
41-
return R{
42-
.kind = clice::RelationKind(),
43-
.range = clice::LocalSourceRange(-1, 0),
44-
.target_symbol = 0,
45-
};
46-
}
13+
using Self = MergedIndex;
4714

48-
inline static R getTombstoneKey() {
49-
return R{
50-
.kind = clice::RelationKind(),
51-
.range = clice::LocalSourceRange(-2, 0),
52-
.target_symbol = 0,
53-
};
54-
}
15+
MergedIndex(std::unique_ptr<llvm::MemoryBuffer> buffer, std::unique_ptr<Impl> impl);
5516

56-
/// Contextual doen't take part in hashing and equality.
57-
static auto getHashValue(const R& relation) {
58-
return dense_hash(relation.kind.value(),
59-
relation.range.begin,
60-
relation.range.end,
61-
relation.target_symbol);
62-
}
17+
void load_in_memory(this Self& self);
6318

64-
static bool isEqual(const R& lhs, const R& rhs) {
65-
return lhs.kind == rhs.kind && lhs.range == rhs.range &&
66-
lhs.target_symbol == rhs.target_symbol;
67-
}
68-
};
19+
public:
20+
MergedIndex();
6921

70-
} // namespace llvm
22+
MergedIndex(llvm::StringRef data);
7123

72-
namespace clice::index {
73-
74-
/// struct CompilationContext {
75-
/// /// The target of this compilation.
76-
/// llvm::StringRef target;
77-
///
78-
/// /// The canonical compilation command.
79-
/// llvm::StringRef command;
80-
///
81-
/// /// A version field for verification.
82-
/// std::uint32_t version;
83-
/// };
84-
///
85-
/// struct HeaderContext : CompilationContext {
86-
/// /// The include location in the include graph.
87-
/// std::uint32_t include;
88-
///
89-
/// /// The path of the file includes this header.
90-
/// llvm::StringRef path;
91-
/// };
92-
93-
struct HeaderContexts {
94-
std::uint32_t version = 0;
95-
96-
struct Context {
97-
std::uint32_t include;
98-
std::uint32_t canonical_id;
99-
100-
friend bool operator== (const Context&, const Context&) = default;
101-
};
102-
103-
/// A array of include location and its context id.
104-
llvm::SmallVector<Context> includes;
105-
106-
friend bool operator== (const HeaderContexts&, const HeaderContexts&) = default;
107-
};
24+
MergedIndex(const MergedIndex&) = delete;
10825

109-
struct MergedIndex {
110-
/// For each merged index, we will give it a canonical id.
111-
/// The max canonical id.
112-
std::uint32_t max_canonical_id = 0;
26+
MergedIndex(MergedIndex&& other);
11327

114-
/// We use the value of SHA256 to judge whether two indices are same.
115-
/// Index with same content will be given same canonical id.
116-
llvm::StringMap<std::uint32_t> canonical_cache;
28+
MergedIndex& operator= (const MergedIndex&) = delete;
11729

118-
/// The reference count of each canonical id.
119-
std::vector<std::uint32_t> canonical_ref_counts;
30+
MergedIndex& operator= (MergedIndex&& other);
12031

121-
/// The canonical id set of removed index.
122-
roaring::Roaring removed;
32+
~MergedIndex();
12333

124-
/// A map between source file path and its header contexts.
125-
llvm::StringMap<HeaderContexts> contexts;
34+
/// Load merged index from disk
35+
static MergedIndex load(llvm::StringRef path);
12636

127-
/// All merged symbol occurrences.
128-
llvm::DenseMap<Occurrence, roaring::Roaring> occurrences;
37+
/// Serialize it to binary format.
38+
void serialize(this const Self& self, llvm::raw_ostream& out);
12939

130-
/// All merged symbol relations.
131-
llvm::DenseMap<SymbolHash, llvm::DenseMap<Relation, roaring::Roaring>> relations;
40+
/// Lookup the occurrence in corresponding offset.
41+
void lookup(this const Self& self,
42+
std::uint32_t offset,
43+
llvm::function_ref<bool(const Occurrence&)> callback);
13244

133-
/// FIXME: The content of this file.
134-
/// std::string content;
45+
/// Lookup the relations of given symbol.
46+
void lookup(this const Self& self,
47+
SymbolHash symbol,
48+
RelationKind kind,
49+
llvm::function_ref<bool(const Relation&)> callback);
13550

136-
/// Sorted occurrences cache for fast lookup.
137-
std::vector<Occurrence> cache_occurrences;
51+
/// Whether this index needs rebuilding.
52+
bool need_update(this const Self& self, llvm::ArrayRef<llvm::StringRef> path_mapping);
13853

139-
void remove(llvm::StringRef path);
54+
bool need_rewrite() {
55+
return impl != nullptr;
56+
}
14057

141-
void merge(llvm::StringRef path, std::uint32_t include, FileIndex& index);
58+
/// Remove the index of specific path id.
59+
void remove(this Self& self, std::uint32_t path_id);
14260

143-
std::vector<Occurrence> lookup(std::uint32_t offset);
61+
/// Merge the index with given compilation context.
62+
void merge(this Self& self,
63+
std::uint32_t path_id,
64+
std::chrono::milliseconds build_at,
65+
std::vector<IncludeLocation> include_locations,
66+
FileIndex& index);
14467

145-
void serialize(this MergedIndex& self, llvm::raw_ostream& out);
68+
/// Merge the index with given header context.
69+
void merge(this Self& self, std::uint32_t path_id, std::uint32_t include_id, FileIndex& index);
14670

147-
friend bool operator== (const MergedIndex&, const MergedIndex&) = default;
148-
};
71+
friend bool operator== (MergedIndex& lhs, MergedIndex& rhs);
14972

150-
struct MergedIndexView {
151-
const void* data;
73+
private:
74+
/// The binary serialization data of index. If you load merged index
75+
/// from disk, we use directly access the data without deserialization
76+
/// unless you want to modify it.
77+
std::unique_ptr<llvm::MemoryBuffer> buffer;
15278

153-
MergedIndex deserialize();
79+
/// The in memory data of the index.
80+
std::unique_ptr<Impl> impl;
15481
};
15582

15683
} // namespace clice::index

include/Index/ProjectIndex.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ struct ProjectIndex {
4747

4848
SymbolTable symbols;
4949

50-
void merge(this ProjectIndex& self, TUIndex& index);
50+
llvm::SmallVector<std::uint32_t> merge(this ProjectIndex& self, TUIndex& index);
5151

5252
void serialize(this ProjectIndex& self, llvm::raw_ostream& os);
5353

include/Index/TUIndex.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22

3+
#include <chrono>
34
#include "IncludeGraph.h"
45
#include "AST/SourceCode.h"
56
#include "AST/SymbolKind.h"
@@ -43,6 +44,8 @@ struct FileIndex {
4344
llvm::DenseMap<SymbolHash, std::vector<Relation>> relations;
4445

4546
std::vector<Occurrence> occurrences;
47+
48+
std::array<std::uint8_t, 32> hash();
4649
};
4750

4851
struct Symbol {
@@ -59,12 +62,18 @@ struct Symbol {
5962
using SymbolTable = llvm::DenseMap<SymbolHash, Symbol>;
6063

6164
struct TUIndex {
65+
/// The building timestamp of this file.
66+
std::chrono::milliseconds built_at;
67+
68+
/// The include information of this file.
6269
IncludeGraph graph;
6370

6471
SymbolTable symbols;
6572

6673
llvm::DenseMap<clang::FileID, FileIndex> file_indices;
6774

75+
FileIndex main_file_index;
76+
6877
static TUIndex build(CompilationUnit& unit);
6978
};
7079

include/Index/schema.fbs

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,29 @@ table CacheEntry {
2222
canonical_id: uint;
2323
}
2424

25-
struct Context {
26-
include_: uint;
25+
struct IncludeContext {
26+
include_id: uint;
2727
canonical_id: uint;
2828
}
2929

30-
table HeaderContexts {
30+
table HeaderContextEntry {
31+
path_id: uint;
3132
version: uint;
32-
includes: [Context];
33+
includes: [IncludeContext];
3334
}
3435

35-
table HeaderContextsEntry {
36-
path: string;
37-
contexts: HeaderContexts;
36+
struct IncludeLocation {
37+
path_id: uint;
38+
line: uint;
39+
include_id: uint;
40+
}
41+
42+
table CompilationContextEntry {
43+
path_id: uint;
44+
version: uint;
45+
canonical_id: uint;
46+
build_at: ulong;
47+
include_locations: [IncludeLocation];
3848
}
3949

4050
table OccurrenceEntry {
@@ -67,7 +77,9 @@ table MergedIndex {
6777

6878
canonical_cache: [CacheEntry];
6979

70-
contexts: [HeaderContextsEntry];
80+
header_contexts: [HeaderContextEntry];
81+
82+
compilation_contexts: [CompilationContextEntry];
7183

7284
occurrences: [OccurrenceEntry];
7385

0 commit comments

Comments
 (0)