Skip to content

Commit bede4c2

Browse files
authored
Test and fix for bug in selectMapimpl (#726)
### Rationale for this change Maps of Nullables produce corrupted parquet. ### What changes are included in this PR? The existing tests for `TakeKernelMap` are made runnable by correcting the test json to be suitable input for `array.FromJSON()`. Added a condition in `assertTakeArrays()` that checks that the data type has been preserved. Using the C++ implementation as a reference. ### Are these changes tested? Yes, but not deeply. ### Are there any user-facing changes? The api is unchanged, however parquet output will change slightly for those files containing maps with optional values.
1 parent 0ff2395 commit bede4c2

2 files changed

Lines changed: 36 additions & 28 deletions

File tree

arrow/compute/selection.go

Lines changed: 9 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import (
2626
"github.com/apache/arrow-go/v18/arrow/array"
2727
"github.com/apache/arrow-go/v18/arrow/compute/exec"
2828
"github.com/apache/arrow-go/v18/arrow/compute/internal/kernels"
29-
"github.com/apache/arrow-go/v18/arrow/memory"
3029
"golang.org/x/sync/errgroup"
3130
)
3231

@@ -375,37 +374,19 @@ func selectMapImpl(fn exec.ArrayKernelExec) exec.ArrayKernelExec {
375374
childIndices := out.Children[0].MakeArray()
376375
defer childIndices.Release()
377376

378-
// Maps have a single struct child containing the keys and items
379-
// We need to take from both the keys and items arrays
380-
takenKeys, err := TakeArrayOpts(ctx.Ctx, values.Keys(), childIndices, kernels.TakeOptions{BoundsCheck: false})
381-
if err != nil {
382-
return err
383-
}
384-
defer takenKeys.Release()
377+
// Take the entire struct child as a unit, preserving all field metadata
378+
// (including Nullable flags and validity bitmaps). This matches the C++
379+
// reference implementation which calls Take on typed_values.values() whole.
380+
structChild := array.MakeFromData(values.Data().(*array.Data).Children()[0])
381+
defer structChild.Release()
385382

386-
takenItems, err := TakeArrayOpts(ctx.Ctx, values.Items(), childIndices, kernels.TakeOptions{BoundsCheck: false})
383+
takenStruct, err := TakeArrayOpts(ctx.Ctx, structChild, childIndices, kernels.TakeOptions{BoundsCheck: false})
387384
if err != nil {
388385
return err
389386
}
390-
defer takenItems.Release()
391-
392-
// Build the struct child array with the taken keys and items
393-
// Maps have a single struct child with "key" and "value" fields
394-
structType := arrow.StructOf(
395-
arrow.Field{Name: "key", Type: values.Keys().DataType()},
396-
arrow.Field{Name: "value", Type: values.Items().DataType()},
397-
)
398-
399-
// Create struct data with taken keys and items as children
400-
structData := array.NewData(
401-
structType,
402-
int(childIndices.Len()),
403-
[]*memory.Buffer{nil},
404-
[]arrow.ArrayData{takenKeys.Data(), takenItems.Data()},
405-
0, 0)
406-
defer structData.Release()
407-
408-
out.Children[0].TakeOwnership(structData)
387+
defer takenStruct.Release()
388+
389+
out.Children[0].TakeOwnership(takenStruct.Data())
409390
return nil
410391
}
411392
}

arrow/compute/vector_selection_test.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1769,6 +1769,33 @@ func (tk *TakeKernelTestTable) TestTakeTable() {
17691769
tk.assertChunkedTake(schm, tblJSON, []string{`[0, 1]`, `[2, 3]`}, tblJSON)
17701770
}
17711771

1772+
// TestMapPreservesValueNullable verifies that Take preserves the Map's struct
1773+
// child type exactly. arrow.MapOf marks the value field Nullable: true; a
1774+
// buggy selectMapImpl reconstructs the struct child via arrow.StructOf field
1775+
// literals whose Nullable defaults to false, silently dropping the flag.
1776+
// The outer MapType is always preserved by the kernel framework, so the
1777+
// mismatch must be checked on the struct child directly.
1778+
func TestMapPreservesValueNullable(t *testing.T) {
1779+
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
1780+
defer mem.AssertSize(t, 0)
1781+
ctx := compute.WithAllocator(context.TODO(), mem)
1782+
1783+
dt := arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32)
1784+
values, _, _ := array.FromJSON(mem, dt, strings.NewReader(`[[{"key": "a", "value": 1}]]`), array.WithUseNumber())
1785+
defer values.Release()
1786+
indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[0]`))
1787+
defer indices.Release()
1788+
1789+
actual, err := compute.TakeArray(ctx, values, indices)
1790+
require.NoError(t, err)
1791+
defer actual.Release()
1792+
1793+
wantChildType := values.Data().Children()[0].DataType()
1794+
gotChildType := actual.Data().Children()[0].DataType()
1795+
assert.Truef(t, arrow.TypeEqual(wantChildType, gotChildType),
1796+
"Map struct child type not preserved:\nwant: %s\n got: %s", wantChildType, gotChildType)
1797+
}
1798+
17721799
func TestTakeKernels(t *testing.T) {
17731800
suite.Run(t, new(TakeKernelTest))
17741801
for _, dt := range numericTypes {

0 commit comments

Comments
 (0)