Skip to content

Commit 2e91f88

Browse files
mergify[bot]yihuangtac0turtle
authored
feat: more compact exported snapshot (backport #703) (#743)
Co-authored-by: yihuang <huang@crypto.com> Co-authored-by: Marko <marbar3778@yahoo.com>
1 parent 6553fd6 commit 2e91f88

3 files changed

Lines changed: 237 additions & 37 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
### Improvements
66

77
- [#726](https://github.com/cosmos/iavl/pull/726) Make `KVPair` and `ChangeSet` serializable with protobuf.
8+
- [#703](https://github.com/cosmos/iavl/pull/703) New APIs `NewCompressExporter`/`NewCompressImporter` to support more compact snapshot format.
89

910
### Breaking Changes
1011

compress.go

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
package iavl
2+
3+
import (
4+
"encoding/binary"
5+
"fmt"
6+
)
7+
8+
type NodeExporter interface {
9+
Next() (*ExportNode, error)
10+
}
11+
12+
type NodeImporter interface {
13+
Add(*ExportNode) error
14+
}
15+
16+
// CompressExporter wraps the normal exporter to apply some compressions on `ExportNode`:
17+
// - branch keys are skipped
18+
// - leaf keys are encoded with delta compared with the previous leaf
19+
// - branch node's version are encoded with delta compared with the max version in it's children
20+
type CompressExporter struct {
21+
inner NodeExporter
22+
lastKey []byte
23+
versionStack []int64
24+
}
25+
26+
var _ NodeExporter = (*CompressExporter)(nil)
27+
28+
func NewCompressExporter(exporter NodeExporter) NodeExporter {
29+
return &CompressExporter{inner: exporter}
30+
}
31+
32+
func (e *CompressExporter) Next() (*ExportNode, error) {
33+
n, err := e.inner.Next()
34+
if err != nil {
35+
return nil, err
36+
}
37+
38+
if n.Height == 0 {
39+
// apply delta encoding to leaf keys
40+
n.Key, e.lastKey = deltaEncode(n.Key, e.lastKey), n.Key
41+
42+
e.versionStack = append(e.versionStack, n.Version)
43+
} else {
44+
// branch keys can be derived on the fly when import, safe to skip
45+
n.Key = nil
46+
47+
// delta encode the version
48+
maxVersion := maxInt64(e.versionStack[len(e.versionStack)-1], e.versionStack[len(e.versionStack)-2])
49+
e.versionStack = e.versionStack[:len(e.versionStack)-1]
50+
e.versionStack[len(e.versionStack)-1] = n.Version
51+
n.Version -= maxVersion
52+
}
53+
54+
return n, nil
55+
}
56+
57+
// CompressImporter wraps the normal importer to do de-compressions before hand.
58+
type CompressImporter struct {
59+
inner NodeImporter
60+
lastKey []byte
61+
minKeyStack [][]byte
62+
versionStack []int64
63+
}
64+
65+
var _ NodeImporter = (*CompressImporter)(nil)
66+
67+
func NewCompressImporter(importer NodeImporter) NodeImporter {
68+
return &CompressImporter{inner: importer}
69+
}
70+
71+
func (i *CompressImporter) Add(node *ExportNode) error {
72+
if node.Height == 0 {
73+
key, err := deltaDecode(node.Key, i.lastKey)
74+
if err != nil {
75+
return err
76+
}
77+
node.Key = key
78+
i.lastKey = key
79+
80+
i.minKeyStack = append(i.minKeyStack, key)
81+
i.versionStack = append(i.versionStack, node.Version)
82+
} else {
83+
// use the min-key in right branch as the node key
84+
node.Key = i.minKeyStack[len(i.minKeyStack)-1]
85+
// leave the min-key in left branch in the stack
86+
i.minKeyStack = i.minKeyStack[:len(i.minKeyStack)-1]
87+
88+
// decode branch node version
89+
maxVersion := maxInt64(i.versionStack[len(i.versionStack)-1], i.versionStack[len(i.versionStack)-2])
90+
node.Version += maxVersion
91+
i.versionStack = i.versionStack[:len(i.versionStack)-1]
92+
i.versionStack[len(i.versionStack)-1] = node.Version
93+
}
94+
95+
return i.inner.Add(node)
96+
}
97+
98+
func deltaEncode(key, lastKey []byte) []byte {
99+
var sizeBuf [binary.MaxVarintLen64]byte
100+
shared := diffOffset(lastKey, key)
101+
n := binary.PutUvarint(sizeBuf[:], uint64(shared))
102+
return append(sizeBuf[:n], key[shared:]...)
103+
}
104+
105+
func deltaDecode(key, lastKey []byte) ([]byte, error) {
106+
shared, n := binary.Uvarint(key)
107+
if n <= 0 {
108+
return nil, fmt.Errorf("uvarint parse failed %d", n)
109+
}
110+
111+
key = key[n:]
112+
if shared == 0 {
113+
return key, nil
114+
}
115+
116+
newKey := make([]byte, shared+uint64(len(key)))
117+
copy(newKey, lastKey[:shared])
118+
copy(newKey[shared:], key)
119+
return newKey, nil
120+
}
121+
122+
// diffOffset returns the index of first byte that's different in two bytes slice.
123+
func diffOffset(a, b []byte) int {
124+
var off int
125+
var l int
126+
if len(a) < len(b) {
127+
l = len(a)
128+
} else {
129+
l = len(b)
130+
}
131+
for ; off < l; off++ {
132+
if a[off] != b[off] {
133+
break
134+
}
135+
}
136+
return off
137+
}
138+
139+
func maxInt64(a, b int64) int64 {
140+
if a > b {
141+
return a
142+
}
143+
return b
144+
}

export_test.go

Lines changed: 92 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ func setupExportTreeBasic(t require.TestingT) *ImmutableTree {
4949
require.NoError(t, err)
5050
_, _, err = tree.Remove([]byte("z"))
5151
require.NoError(t, err)
52+
_, err = tree.Set([]byte("abc"), []byte{6})
53+
require.NoError(t, err)
5254
_, version, err := tree.SaveVersion()
5355
require.NoError(t, err)
5456

@@ -162,10 +164,12 @@ func TestExporter(t *testing.T) {
162164

163165
expect := []*ExportNode{
164166
{Key: []byte("a"), Value: []byte{1}, Version: 1, Height: 0},
167+
{Key: []byte("abc"), Value: []byte{6}, Version: 3, Height: 0},
168+
{Key: []byte("abc"), Value: nil, Version: 3, Height: 1},
165169
{Key: []byte("b"), Value: []byte{2}, Version: 3, Height: 0},
166-
{Key: []byte("b"), Value: nil, Version: 3, Height: 1},
167170
{Key: []byte("c"), Value: []byte{3}, Version: 3, Height: 0},
168-
{Key: []byte("c"), Value: nil, Version: 3, Height: 2},
171+
{Key: []byte("c"), Value: nil, Version: 3, Height: 1},
172+
{Key: []byte("b"), Value: nil, Version: 3, Height: 2},
169173
{Key: []byte("d"), Value: []byte{4}, Version: 2, Height: 0},
170174
{Key: []byte("e"), Value: []byte{5}, Version: 3, Height: 0},
171175
{Key: []byte("e"), Value: nil, Version: 3, Height: 1},
@@ -188,6 +192,41 @@ func TestExporter(t *testing.T) {
188192
assert.Equal(t, expect, actual)
189193
}
190194

195+
func TestExporterCompress(t *testing.T) {
196+
tree := setupExportTreeBasic(t)
197+
198+
expect := []*ExportNode{
199+
{Key: []byte{0, 'a'}, Value: []byte{1}, Version: 1, Height: 0},
200+
{Key: []byte{1, 'b', 'c'}, Value: []byte{6}, Version: 3, Height: 0},
201+
{Key: nil, Value: nil, Version: 0, Height: 1},
202+
{Key: []byte{0, 'b'}, Value: []byte{2}, Version: 3, Height: 0},
203+
{Key: []byte{0, 'c'}, Value: []byte{3}, Version: 3, Height: 0},
204+
{Key: nil, Value: nil, Version: 0, Height: 1},
205+
{Key: nil, Value: nil, Version: 0, Height: 2},
206+
{Key: []byte{0, 'd'}, Value: []byte{4}, Version: 2, Height: 0},
207+
{Key: []byte{0, 'e'}, Value: []byte{5}, Version: 3, Height: 0},
208+
{Key: nil, Value: nil, Version: 0, Height: 1},
209+
{Key: nil, Value: nil, Version: 0, Height: 3},
210+
}
211+
212+
actual := make([]*ExportNode, 0, len(expect))
213+
innerExporter, err := tree.Export()
214+
require.NoError(t, err)
215+
defer innerExporter.Close()
216+
217+
exporter := NewCompressExporter(innerExporter)
218+
for {
219+
node, err := exporter.Next()
220+
if err == ErrorExportDone {
221+
break
222+
}
223+
require.NoError(t, err)
224+
actual = append(actual, node)
225+
}
226+
227+
assert.Equal(t, expect, actual)
228+
}
229+
191230
func TestExporter_Import(t *testing.T) {
192231
testcases := map[string]*ImmutableTree{
193232
"empty tree": NewImmutableTree(db.NewMemDB(), 0, false),
@@ -200,50 +239,66 @@ func TestExporter_Import(t *testing.T) {
200239

201240
for desc, tree := range testcases {
202241
tree := tree
203-
t.Run(desc, func(t *testing.T) {
204-
t.Parallel()
205-
206-
exporter, err := tree.Export()
207-
require.NoError(t, err)
208-
defer exporter.Close()
209-
210-
newTree, err := NewMutableTree(db.NewMemDB(), 0, false)
211-
require.NoError(t, err)
212-
importer, err := newTree.Import(tree.Version())
213-
require.NoError(t, err)
214-
defer importer.Close()
215-
216-
for {
217-
item, err := exporter.Next()
218-
if err == ErrorExportDone {
219-
err = importer.Commit()
220-
require.NoError(t, err)
221-
break
242+
for _, compress := range []bool{false, true} {
243+
if compress {
244+
desc += "-compress"
245+
}
246+
compress := compress
247+
t.Run(desc, func(t *testing.T) {
248+
t.Parallel()
249+
250+
innerExporter, err := tree.Export()
251+
require.NoError(t, err)
252+
defer innerExporter.Close()
253+
254+
exporter := NodeExporter(innerExporter)
255+
if compress {
256+
exporter = NewCompressExporter(innerExporter)
222257
}
258+
259+
newTree, err := NewMutableTree(db.NewMemDB(), 0, false)
223260
require.NoError(t, err)
224-
err = importer.Add(item)
261+
innerImporter, err := newTree.Import(tree.Version())
225262
require.NoError(t, err)
226-
}
263+
defer innerImporter.Close()
227264

228-
treeHash, err := tree.Hash()
229-
require.NoError(t, err)
230-
newTreeHash, err := newTree.Hash()
231-
require.NoError(t, err)
265+
importer := NodeImporter(innerImporter)
266+
if compress {
267+
importer = NewCompressImporter(innerImporter)
268+
}
232269

233-
require.Equal(t, treeHash, newTreeHash, "Tree hash mismatch")
234-
require.Equal(t, tree.Size(), newTree.Size(), "Tree size mismatch")
235-
require.Equal(t, tree.Version(), newTree.Version(), "Tree version mismatch")
270+
for {
271+
item, err := exporter.Next()
272+
if err == ErrorExportDone {
273+
err = innerImporter.Commit()
274+
require.NoError(t, err)
275+
break
276+
}
277+
require.NoError(t, err)
278+
err = importer.Add(item)
279+
require.NoError(t, err)
280+
}
236281

237-
tree.Iterate(func(key, value []byte) bool { //nolint:errcheck
238-
index, _, err := tree.GetWithIndex(key)
282+
treeHash, err := tree.Hash()
239283
require.NoError(t, err)
240-
newIndex, newValue, err := newTree.GetWithIndex(key)
284+
newTreeHash, err := newTree.Hash()
241285
require.NoError(t, err)
242-
require.Equal(t, index, newIndex, "Index mismatch for key %v", key)
243-
require.Equal(t, value, newValue, "Value mismatch for key %v", key)
244-
return false
286+
287+
require.Equal(t, treeHash, newTreeHash, "Tree hash mismatch")
288+
require.Equal(t, tree.Size(), newTree.Size(), "Tree size mismatch")
289+
require.Equal(t, tree.Version(), newTree.Version(), "Tree version mismatch")
290+
291+
tree.Iterate(func(key, value []byte) bool { //nolint:errcheck
292+
index, _, err := tree.GetWithIndex(key)
293+
require.NoError(t, err)
294+
newIndex, newValue, err := newTree.GetWithIndex(key)
295+
require.NoError(t, err)
296+
require.Equal(t, index, newIndex, "Index mismatch for key %v", key)
297+
require.Equal(t, value, newValue, "Value mismatch for key %v", key)
298+
return false
299+
})
245300
})
246-
})
301+
}
247302
}
248303
}
249304

0 commit comments

Comments
 (0)