1212// See the License for the specific language governing permissions and
1313// limitations under the License.
1414
15- use std:: collections:: HashMap ;
16-
1715use common_exception:: Result ;
1816use common_expression:: types:: nullable:: NullableDomain ;
1917use common_expression:: types:: number:: SimpleDomain ;
@@ -26,7 +24,6 @@ use common_expression::types::StringType;
2624use common_expression:: types:: TimestampType ;
2725use common_expression:: types:: ValueType ;
2826use common_expression:: with_number_mapped_type;
29- use common_expression:: ColumnId ;
3027use common_expression:: ConstantFolder ;
3128use common_expression:: Domain ;
3229use common_expression:: Expr ;
@@ -43,7 +40,7 @@ use crate::Index;
4340pub struct RangeIndex {
4441 expr : Expr < String > ,
4542 func_ctx : FunctionContext ,
46- column_ids : HashMap < String , ColumnId > ,
43+ schema : TableSchemaRef ,
4744}
4845
4946impl RangeIndex {
@@ -52,19 +49,10 @@ impl RangeIndex {
5249 expr : & Expr < String > ,
5350 schema : TableSchemaRef ,
5451 ) -> Result < Self > {
55- let leaf_fields = schema. leaf_fields ( ) ;
56- let column_ids = leaf_fields. iter ( ) . fold (
57- HashMap :: with_capacity ( leaf_fields. len ( ) ) ,
58- |mut acc, field| {
59- acc. insert ( field. name ( ) . clone ( ) , field. column_id ( ) ) ;
60- acc
61- } ,
62- ) ;
63-
6452 Ok ( Self {
6553 expr : expr. clone ( ) ,
6654 func_ctx,
67- column_ids ,
55+ schema ,
6856 } )
6957 }
7058
@@ -83,11 +71,13 @@ impl RangeIndex {
8371 . column_refs ( )
8472 . into_iter ( )
8573 . map ( |( name, ty) | {
86- let stat = match self . column_ids . get ( & name) {
87- Some ( column_id) => stats. get ( column_id) ,
88- None => None ,
89- } ;
90- let domain = statistics_to_domain ( stat, & ty) ;
74+ let column_ids = self . schema . inner_column_ids_with_name ( & name) ;
75+ let stats = column_ids
76+ . iter ( )
77+ . filter_map ( |column_id| stats. get ( column_id) )
78+ . collect :: < _ > ( ) ;
79+
80+ let domain = statistics_to_domain ( stats, & ty) ;
9181 Ok ( ( name, domain) )
9282 } )
9383 . collect :: < Result < _ > > ( ) ?;
@@ -107,52 +97,92 @@ impl RangeIndex {
10797 }
10898}
10999
110- pub fn statistics_to_domain ( stat : Option < & ColumnStatistics > , data_type : & DataType ) -> Domain {
111- if stat . is_none ( ) {
100+ pub fn statistics_to_domain ( mut stats : Vec < & ColumnStatistics > , data_type : & DataType ) -> Domain {
101+ if stats . len ( ) != data_type . n_columns ( ) {
112102 return Domain :: full ( data_type) ;
113103 }
114- let stat = stat. unwrap ( ) ;
115- if stat. min . is_null ( ) || stat. max . is_null ( ) {
116- return Domain :: Nullable ( NullableDomain {
117- has_null : true ,
118- value : None ,
119- } ) ;
120- }
121- with_number_mapped_type ! ( |NUM_TYPE | match data_type {
122- DataType :: Number ( NumberDataType :: NUM_TYPE ) => {
123- NumberType :: <NUM_TYPE >:: upcast_domain( SimpleDomain {
124- min: NumberType :: <NUM_TYPE >:: try_downcast_scalar( & stat. min. as_ref( ) ) . unwrap( ) ,
125- max: NumberType :: <NUM_TYPE >:: try_downcast_scalar( & stat. max. as_ref( ) ) . unwrap( ) ,
126- } )
127- }
128- DataType :: String => Domain :: String ( StringDomain {
129- min: StringType :: try_downcast_scalar( & stat. min. as_ref( ) )
130- . unwrap( )
131- . to_vec( ) ,
132- max: Some (
133- StringType :: try_downcast_scalar( & stat. max. as_ref( ) )
134- . unwrap( )
135- . to_vec( )
136- ) ,
137- } ) ,
138- DataType :: Timestamp => TimestampType :: upcast_domain( SimpleDomain {
139- min: TimestampType :: try_downcast_scalar( & stat. min. as_ref( ) ) . unwrap( ) ,
140- max: TimestampType :: try_downcast_scalar( & stat. max. as_ref( ) ) . unwrap( ) ,
141- } ) ,
142- DataType :: Date => DateType :: upcast_domain( SimpleDomain {
143- min: DateType :: try_downcast_scalar( & stat. min. as_ref( ) ) . unwrap( ) ,
144- max: DateType :: try_downcast_scalar( & stat. max. as_ref( ) ) . unwrap( ) ,
145- } ) ,
146- DataType :: Nullable ( ty) => {
147- let domain = statistics_to_domain( Some ( stat) , ty) ;
104+ match data_type {
105+ DataType :: Nullable ( box inner_ty) => {
106+ if stats. len ( ) == 1 && ( stats[ 0 ] . min . is_null ( ) || stats[ 0 ] . max . is_null ( ) ) {
107+ return Domain :: Nullable ( NullableDomain {
108+ has_null : true ,
109+ value : None ,
110+ } ) ;
111+ }
112+ let has_null = if stats. len ( ) == 1 {
113+ stats[ 0 ] . null_count > 0
114+ } else {
115+ // Only leaf columns have statistics,
116+ // nested columns are treated as having nullable values
117+ true
118+ } ;
119+ let domain = statistics_to_domain ( stats, inner_ty) ;
148120 Domain :: Nullable ( NullableDomain {
149- has_null: stat . null_count > 0 ,
121+ has_null,
150122 value : Some ( Box :: new ( domain) ) ,
151123 } )
152124 }
153- // Unsupported data type
154- _ => Domain :: full( data_type) ,
155- } )
125+ DataType :: Tuple ( inner_tys) => {
126+ let inner_domains = inner_tys
127+ . iter ( )
128+ . map ( |inner_ty| {
129+ let n = inner_ty. n_columns ( ) ;
130+ let stats = stats. drain ( ..n) . collect ( ) ;
131+ statistics_to_domain ( stats, inner_ty)
132+ } )
133+ . collect :: < Vec < _ > > ( ) ;
134+ Domain :: Tuple ( inner_domains)
135+ }
136+ DataType :: Array ( box inner_ty) => {
137+ let n = inner_ty. n_columns ( ) ;
138+ let stats = stats. drain ( ..n) . collect ( ) ;
139+ let inner_domain = statistics_to_domain ( stats, inner_ty) ;
140+ Domain :: Array ( Some ( Box :: new ( inner_domain) ) )
141+ }
142+ DataType :: Map ( box inner_ty) => {
143+ let n = inner_ty. n_columns ( ) ;
144+ let stats = stats. drain ( ..n) . collect ( ) ;
145+ let inner_domain = statistics_to_domain ( stats, inner_ty) ;
146+ let kv_domain = inner_domain. as_tuple ( ) . unwrap ( ) ;
147+ Domain :: Map ( Some ( (
148+ Box :: new ( kv_domain[ 0 ] . clone ( ) ) ,
149+ Box :: new ( kv_domain[ 1 ] . clone ( ) ) ,
150+ ) ) )
151+ }
152+ _ => {
153+ let stat = stats[ 0 ] ;
154+ with_number_mapped_type ! ( |NUM_TYPE | match data_type {
155+ DataType :: Number ( NumberDataType :: NUM_TYPE ) => {
156+ NumberType :: <NUM_TYPE >:: upcast_domain( SimpleDomain {
157+ min: NumberType :: <NUM_TYPE >:: try_downcast_scalar( & stat. min. as_ref( ) )
158+ . unwrap( ) ,
159+ max: NumberType :: <NUM_TYPE >:: try_downcast_scalar( & stat. max. as_ref( ) )
160+ . unwrap( ) ,
161+ } )
162+ }
163+ DataType :: String => Domain :: String ( StringDomain {
164+ min: StringType :: try_downcast_scalar( & stat. min. as_ref( ) )
165+ . unwrap( )
166+ . to_vec( ) ,
167+ max: Some (
168+ StringType :: try_downcast_scalar( & stat. max. as_ref( ) )
169+ . unwrap( )
170+ . to_vec( )
171+ ) ,
172+ } ) ,
173+ DataType :: Timestamp => TimestampType :: upcast_domain( SimpleDomain {
174+ min: TimestampType :: try_downcast_scalar( & stat. min. as_ref( ) ) . unwrap( ) ,
175+ max: TimestampType :: try_downcast_scalar( & stat. max. as_ref( ) ) . unwrap( ) ,
176+ } ) ,
177+ DataType :: Date => DateType :: upcast_domain( SimpleDomain {
178+ min: DateType :: try_downcast_scalar( & stat. min. as_ref( ) ) . unwrap( ) ,
179+ max: DateType :: try_downcast_scalar( & stat. max. as_ref( ) ) . unwrap( ) ,
180+ } ) ,
181+ // Unsupported data type
182+ _ => Domain :: full( data_type) ,
183+ } )
184+ }
185+ }
156186}
157187
158188impl Index for RangeIndex { }
0 commit comments