@@ -38,20 +38,22 @@ CREATE TABLE src_table (
38
38
bigint_col BIGINT,
39
39
date_col DATE,
40
40
overlapping_col INT,
41
- constant_col INT
41
+ constant_col INT,
42
+ nulls_first_col INT,
43
+ nulls_last_col INT
42
44
) AS VALUES
43
45
-- first file
44
- (1, 3, 'aaa', 100, 1, 0, 0),
45
- (2, 2, 'bbb', 200, 2, 1, 0),
46
- (3, 1, 'ccc', 300, 3, 2, 0),
46
+ (1, 3, 'aaa', 100, 1, 0, 0, NULL, 1 ),
47
+ (2, 2, 'bbb', 200, 2, 1, 0, NULL, 2 ),
48
+ (3, 1, 'ccc', 300, 3, 2, 0, 1, 3 ),
47
49
-- second file
48
- (4, 6, 'ddd', 400, 4, 0, 0),
49
- (5, 5, 'eee', 500, 5, 1, 0),
50
- (6, 4, 'fff', 600, 6, 2, 0),
50
+ (4, 6, 'ddd', 400, 4, 0, 0, 2, 4 ),
51
+ (5, 5, 'eee', 500, 5, 1, 0, 3, 5 ),
52
+ (6, 4, 'fff', 600, 6, 2, 0, 4, 6 ),
51
53
-- third file
52
- (7, 9, 'ggg', 700, 7, 3, 0),
53
- (8, 8, 'hhh', 800, 8, 4, 0),
54
- (9, 7, 'iii', 900, 9, 5, 0);
54
+ (7, 9, 'ggg', 700, 7, 3, 0, 5, 7 ),
55
+ (8, 8, 'hhh', 800, 8, 4, 0, 6, NULL ),
56
+ (9, 7, 'iii', 900, 9, 5, 0, 7, NULL );
55
57
56
58
# Setup 3 files, in particular more files than there are partitions
57
59
@@ -90,45 +92,52 @@ CREATE EXTERNAL TABLE test_table (
90
92
bigint_col BIGINT NOT NULL,
91
93
date_col DATE NOT NULL,
92
94
overlapping_col INT NOT NULL,
93
- constant_col INT NOT NULL
95
+ constant_col INT NOT NULL,
96
+ nulls_first_col INT,
97
+ nulls_last_col INT
94
98
)
95
99
STORED AS PARQUET
96
100
PARTITIONED BY (partition_col)
97
- WITH ORDER (int_col ASC NULLS LAST, bigint_col ASC NULLS LAST)
101
+ WITH ORDER (
102
+ int_col ASC NULLS LAST,
103
+ bigint_col ASC NULLS LAST,
104
+ nulls_first_col ASC NULLS FIRST,
105
+ nulls_last_col ASC NULLS LAST
106
+ )
98
107
LOCATION 'test_files/scratch/parquet_sorted_statistics/test_table';
99
108
100
109
# Order by numeric columns
101
110
# This is to exercise file group sorting, which uses file-level statistics
102
111
# DataFusion doesn't currently support string column statistics
103
112
# This should not require a sort.
104
113
query TT
105
- EXPLAIN SELECT int_col, bigint_col
114
+ EXPLAIN SELECT int_col, bigint_col, nulls_first_col, nulls_last_col
106
115
FROM test_table
107
- ORDER BY int_col, bigint_col;
116
+ ORDER BY int_col, bigint_col, nulls_first_col NULLS FIRST, nulls_last_col NULLS LAST ;
108
117
----
109
118
logical_plan
110
- 01)Sort: test_table.int_col ASC NULLS LAST, test_table.bigint_col ASC NULLS LAST
111
- 02)--TableScan: test_table projection=[int_col, bigint_col]
119
+ 01)Sort: test_table.int_col ASC NULLS LAST, test_table.bigint_col ASC NULLS LAST, test_table.nulls_first_col ASC NULLS FIRST, test_table.nulls_last_col ASC NULLS LAST
120
+ 02)--TableScan: test_table projection=[int_col, bigint_col, nulls_first_col, nulls_last_col ]
112
121
physical_plan
113
- 01)SortPreservingMergeExec: [int_col@0 ASC NULLS LAST, bigint_col@1 ASC NULLS LAST]
114
- 02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet]]}, projection=[int_col, bigint_col], output_ordering=[int_col@0 ASC NULLS LAST, bigint_col@1 ASC NULLS LAST], file_type=parquet
122
+ 01)SortPreservingMergeExec: [int_col@0 ASC NULLS LAST, bigint_col@1 ASC NULLS LAST, nulls_first_col@2 ASC, nulls_last_col@3 ASC NULLS LAST ]
123
+ 02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet]]}, projection=[int_col, bigint_col, nulls_first_col, nulls_last_col ], output_ordering=[int_col@0 ASC NULLS LAST, bigint_col@1 ASC NULLS LAST, nulls_first_col@2 ASC, nulls_last_col@3 ASC NULLS LAST], file_type=parquet
115
124
116
125
# Another planning test, but project on a column with unsupported statistics
117
126
# We should be able to ignore this and look at only the relevant statistics
118
127
query TT
119
128
EXPLAIN SELECT string_col
120
129
FROM test_table
121
- ORDER BY int_col, bigint_col;
130
+ ORDER BY int_col, bigint_col, nulls_first_col NULLS FIRST, nulls_last_col NULLS LAST ;
122
131
----
123
132
logical_plan
124
133
01)Projection: test_table.string_col
125
- 02)--Sort: test_table.int_col ASC NULLS LAST, test_table.bigint_col ASC NULLS LAST
126
- 03)----Projection: test_table.string_col, test_table.int_col, test_table.bigint_col
127
- 04)------TableScan: test_table projection=[int_col, string_col, bigint_col]
134
+ 02)--Sort: test_table.int_col ASC NULLS LAST, test_table.bigint_col ASC NULLS LAST, test_table.nulls_first_col ASC NULLS FIRST, test_table.nulls_last_col ASC NULLS LAST
135
+ 03)----Projection: test_table.string_col, test_table.int_col, test_table.bigint_col, test_table.nulls_first_col, test_table.nulls_last_col
136
+ 04)------TableScan: test_table projection=[int_col, string_col, bigint_col, nulls_first_col, nulls_last_col ]
128
137
physical_plan
129
138
01)ProjectionExec: expr=[string_col@0 as string_col]
130
- 02)--SortPreservingMergeExec: [int_col@1 ASC NULLS LAST, bigint_col@2 ASC NULLS LAST]
131
- 03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet]]}, projection=[string_col, int_col, bigint_col], output_ordering=[int_col@1 ASC NULLS LAST, bigint_col@2 ASC NULLS LAST], file_type=parquet
139
+ 02)--SortPreservingMergeExec: [int_col@1 ASC NULLS LAST, bigint_col@2 ASC NULLS LAST, nulls_first_col@3 ASC, nulls_last_col@4 ASC NULLS LAST ]
140
+ 03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet]]}, projection=[string_col, int_col, bigint_col, nulls_first_col, nulls_last_col ], output_ordering=[int_col@1 ASC NULLS LAST, bigint_col@2 ASC NULLS LAST, nulls_first_col@3 ASC, nulls_last_col@4 ASC NULLS LAST], file_type=parquet
132
141
133
142
# Clean up & recreate but sort on descending column
134
143
statement ok
0 commit comments