@@ -60,8 +60,7 @@ use crate::arrow::converter::{
6060 DecimalConverter , FixedLenBinaryConverter , FixedSizeArrayConverter ,
6161 Int96ArrayConverter , Int96Converter , IntervalDayTimeArrayConverter ,
6262 IntervalDayTimeConverter , IntervalYearMonthArrayConverter ,
63- IntervalYearMonthConverter , LargeBinaryArrayConverter , LargeBinaryConverter ,
64- LargeUtf8ArrayConverter , LargeUtf8Converter ,
63+ IntervalYearMonthConverter , Utf8ArrayConverter , Utf8Converter ,
6564} ;
6665use crate :: arrow:: record_reader:: buffer:: { ScalarValue , ValuesBuffer } ;
6766use crate :: arrow:: record_reader:: { GenericRecordReader , RecordReader } ;
@@ -81,6 +80,11 @@ use crate::schema::types::{
8180} ;
8281use crate :: schema:: visitor:: TypeVisitor ;
8382
83+ mod byte_array;
84+ mod offset_buffer;
85+
86+ pub use byte_array:: make_byte_array_reader;
87+
8488/// Array reader reads parquet data into arrow array.
8589pub trait ArrayReader {
8690 fn as_any ( & self ) -> & dyn Any ;
@@ -1778,57 +1782,43 @@ impl<'a> ArrayReaderBuilder {
17781782 null_mask_only,
17791783 ) ?,
17801784 ) ) ,
1781- PhysicalType :: BYTE_ARRAY => {
1782- if cur_type. get_basic_info ( ) . converted_type ( ) == ConvertedType :: UTF8 {
1783- if let Some ( ArrowType :: LargeUtf8 ) = arrow_type {
1784- let converter =
1785- LargeUtf8Converter :: new ( LargeUtf8ArrayConverter { } ) ;
1786- Ok ( Box :: new ( ComplexObjectArrayReader :: <
1787- ByteArrayType ,
1788- LargeUtf8Converter ,
1789- > :: new (
1790- page_iterator,
1791- column_desc,
1792- converter,
1793- arrow_type,
1794- ) ?) )
1795- } else {
1796- use crate :: arrow:: arrow_array_reader:: {
1797- ArrowArrayReader , StringArrayConverter ,
1798- } ;
1799- let converter = StringArrayConverter :: new ( ) ;
1800- Ok ( Box :: new ( ArrowArrayReader :: try_new (
1801- * page_iterator,
1802- column_desc,
1803- converter,
1804- arrow_type,
1805- ) ?) )
1785+ PhysicalType :: BYTE_ARRAY => match arrow_type {
1786+ // TODO: Replace with optimised dictionary reader (#171)
1787+ Some ( ArrowType :: Dictionary ( _, _) ) => {
1788+ match cur_type. get_basic_info ( ) . converted_type ( ) {
1789+ ConvertedType :: UTF8 => {
1790+ let converter = Utf8Converter :: new ( Utf8ArrayConverter { } ) ;
1791+ Ok ( Box :: new ( ComplexObjectArrayReader :: <
1792+ ByteArrayType ,
1793+ Utf8Converter ,
1794+ > :: new (
1795+ page_iterator,
1796+ column_desc,
1797+ converter,
1798+ arrow_type,
1799+ ) ?) )
1800+ }
1801+ _ => {
1802+ let converter = BinaryConverter :: new ( BinaryArrayConverter { } ) ;
1803+ Ok ( Box :: new ( ComplexObjectArrayReader :: <
1804+ ByteArrayType ,
1805+ BinaryConverter ,
1806+ > :: new (
1807+ page_iterator,
1808+ column_desc,
1809+ converter,
1810+ arrow_type,
1811+ ) ?) )
1812+ }
18061813 }
1807- } else if let Some ( ArrowType :: LargeBinary ) = arrow_type {
1808- let converter =
1809- LargeBinaryConverter :: new ( LargeBinaryArrayConverter { } ) ;
1810- Ok ( Box :: new ( ComplexObjectArrayReader :: <
1811- ByteArrayType ,
1812- LargeBinaryConverter ,
1813- > :: new (
1814- page_iterator,
1815- column_desc,
1816- converter,
1817- arrow_type,
1818- ) ?) )
1819- } else {
1820- let converter = BinaryConverter :: new ( BinaryArrayConverter { } ) ;
1821- Ok ( Box :: new ( ComplexObjectArrayReader :: <
1822- ByteArrayType ,
1823- BinaryConverter ,
1824- > :: new (
1825- page_iterator,
1826- column_desc,
1827- converter,
1828- arrow_type,
1829- ) ?) )
18301814 }
1831- }
1815+ _ => make_byte_array_reader (
1816+ page_iterator,
1817+ column_desc,
1818+ arrow_type,
1819+ null_mask_only,
1820+ ) ,
1821+ } ,
18321822 PhysicalType :: FIXED_LEN_BYTE_ARRAY
18331823 if cur_type. get_basic_info ( ) . converted_type ( )
18341824 == ConvertedType :: DECIMAL =>
0 commit comments