Skip to content

Commit fc20f37

Browse files
Willem-J-anWillem Jan Noort
andauthored
fix(avro): correctly set nullability for ListType (#709)
### Rationale for this change Nullability of a list field itself is lost when converting from avro to arrow schema; only the nullability of the list members is applied. ### What changes are included in this PR? Use buildArrowField function for listfield to leverage arrow field creation; which applies nullability as expected. Also use buildArrowField for float,double,boolean case for consistency. ### Are these changes tested? Added testcase for a nullable list. ### Are there any user-facing changes? Debatable --------- Co-authored-by: Willem Jan Noort <WillemJan.Noort@essent.nl>
1 parent 6317dba commit fc20f37

5 files changed

Lines changed: 35 additions & 12 deletions

File tree

arrow/avro/reader_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ func TestReader(t *testing.T) {
8080
Type: arrow.BinaryTypes.Binary,
8181
Nullable: true,
8282
},
83+
{
84+
Name: "nullable_remote_ips",
85+
Type: arrow.ListOfNonNullable(arrow.BinaryTypes.Binary),
86+
Nullable: true,
87+
},
8388
{
8489
Name: "person",
8590
Type: arrow.StructOf(

arrow/avro/schema.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ func arrowSchemafromAvro(n *schemaNode) {
106106
k := strconv.FormatInt(int64(index), 10)
107107
symbols[k] = symbol
108108
}
109-
var dt = arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered: false}
109+
dt := arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered: false}
110110
sl := int64(len(symbols))
111111
switch {
112112
case sl <= math.MaxUint8:
@@ -125,12 +125,14 @@ func arrowSchemafromAvro(n *schemaNode) {
125125
} else {
126126
arrowSchemafromAvro(c)
127127
}
128+
var typ *arrow.ListType
128129
switch c.arrowField.Nullable {
129130
case true:
130-
n.arrowField = arrow.Field{Name: n.name, Type: arrow.ListOfField(c.arrowField), Metadata: c.arrowField.Metadata}
131+
typ = arrow.ListOfField(c.arrowField)
131132
case false:
132-
n.arrowField = arrow.Field{Name: n.name, Type: arrow.ListOfNonNullable(c.arrowField.Type), Metadata: c.arrowField.Metadata}
133+
typ = arrow.ListOfNonNullable(c.arrowField.Type)
133134
}
135+
n.arrowField = buildArrowField(n, typ, c.arrowField.Metadata)
134136
case "map":
135137
n.schemaCache.Add(n.schema.(*avro.MapSchema).Values().(avro.NamedSchema).Name(), n.schema.(*avro.MapSchema).Values())
136138
c := n.newChild(n.name, n.schema.(*avro.MapSchema).Values())
@@ -160,7 +162,7 @@ func arrowSchemafromAvro(n *schemaNode) {
160162
n.arrowField = buildArrowField(n, avroPrimitiveToArrowType(string(st)), arrow.Metadata{})
161163
}
162164
case "float", "double", "boolean":
163-
n.arrowField = arrow.Field{Name: n.name, Type: avroPrimitiveToArrowType(string(st)), Nullable: n.nullable}
165+
n.arrowField = buildArrowField(n, avroPrimitiveToArrowType(string(st)), arrow.Metadata{})
164166
case "<ref>":
165167
refSchema := n.schemaCache.Get(string(n.schema.(*avro.RefSchema).Schema().Name()))
166168
if refSchema == nil {

arrow/avro/schema_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ func TestSchemaStringEqual(t *testing.T) {
7979
Type: arrow.BinaryTypes.Binary,
8080
Nullable: true,
8181
},
82+
{
83+
Name: "nullable_remote_ips",
84+
Type: arrow.ListOfNonNullable(arrow.BinaryTypes.Binary),
85+
Nullable: true,
86+
},
8287
{
8388
Name: "person",
8489
Type: arrow.StructOf(

arrow/avro/testdata/alltypes.avsc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,16 @@
8585
"bytes"
8686
]
8787
},
88+
{
89+
"name": "nullable_remote_ips",
90+
"type": [
91+
"null",
92+
{
93+
"type": "array",
94+
"items": "bytes"
95+
}
96+
]
97+
},
8898
{
8999
"name": "person",
90100
"type": {

arrow/avro/testdata/testdata.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ type Example struct {
126126
Fraction *float64 `avro:"fraction" json:"fraction"`
127127
IsEmergency bool `avro:"is_emergency" json:"is_emergency"`
128128
RemoteIP *ByteArray `avro:"remote_ip" json:"remote_ip"`
129+
NullableRemoteIPS *[]ByteArray `avro:"nullable_remote_ips" json:"nullable_remote_ips"`
129130
Person PersonData `avro:"person" json:"person"`
130131
DecimalField DecimalType `avro:"decimalField" json:"decimalField"`
131132
Decimal256Field DecimalType `avro:"decimal256Field" json:"decimal256Field"`
@@ -215,12 +216,12 @@ func sampleData() Example {
215216
InheritNamespace: "d",
216217
Md5: MD5{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
217218
},
218-
ID: 42,
219-
BigID: 42000000000,
220-
Temperature: func() *float32 { v := float32(36.6); return &v }(),
221-
Fraction: func() *float64 { v := float64(0.75); return &v }(),
222-
IsEmergency: true,
223-
RemoteIP: func() *ByteArray { v := ByteArray{192, 168, 1, 1}; return &v }(),
219+
ID: 42,
220+
BigID: 42000000000,
221+
Temperature: func() *float32 { v := float32(36.6); return &v }(),
222+
Fraction: func() *float64 { v := float64(0.75); return &v }(),
223+
IsEmergency: true,
224+
RemoteIP: func() *ByteArray { v := ByteArray{192, 168, 1, 1}; return &v }(),
224225
Person: PersonData{
225226
Lastname: "Doe",
226227
Address: AddressUSRecord{
@@ -248,7 +249,7 @@ func sampleData() Example {
248249

249250
func writeOCFSampleData(td string, data Example) string {
250251
path := filepath.Join(td, sampleAvroFileName)
251-
ocfFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
252+
ocfFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
252253
if err != nil {
253254
log.Fatal(err)
254255
}
@@ -272,7 +273,7 @@ func writeOCFSampleData(td string, data Example) string {
272273

273274
func writeJSONSampleData(td string, data Example) string {
274275
path := filepath.Join(td, sampleJSONFileName)
275-
jsonFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
276+
jsonFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
276277
if err != nil {
277278
log.Fatal(err)
278279
}

0 commit comments

Comments
 (0)