Skip to content

Commit 96c2f82

Browse files
authored
chore: revert "fix: fix memory_size of sliced string view. (#19014)" (#19051)
* Revert "fix: fix memory_size of sliced string view. (#19014)" This reverts commit 8b4a558. * simplify tests. * simplify tests. * simplify tests.
1 parent ebfa25e commit 96c2f82

File tree

16 files changed

+40
-56
lines changed

16 files changed

+40
-56
lines changed

โ€Žsrc/common/column/src/binview/mod.rsโ€Ž

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,7 @@ impl<T: ViewType + ?Sized> BinaryViewColumnGeneric<T> {
297297
}
298298

299299
pub fn memory_size(&self) -> usize {
300-
// when read back from parquet, the buffer is a bytes array without views
301-
self.total_bytes_len + self.len() * 20
300+
self.total_buffer_len + self.len() * 16
302301
}
303302

304303
fn total_unshared_buffer_len(&self) -> usize {

โ€Žsrc/common/column/tests/it/binview/mod.rsโ€Ž

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -151,30 +151,15 @@ fn test_slice() {
151151
];
152152

153153
let array: Utf8ViewColumn = data.into_iter().collect();
154-
assert_eq!(array.total_bytes_len(), 78);
155-
assert_eq!(array.total_buffer_len(), 60);
156-
assert_eq!(array.memory_size(), 198);
154+
assert_eq!(array.memory_size(), 156);
157155

158-
let a0 = array.clone().sliced(0, 2);
159-
assert_eq!(a0.into_iter().collect::<Vec<_>>(), vec!["hello", "world",]);
160-
assert_eq!(a0.memory_size(), 50);
161-
assert_eq!(a0.total_bytes_len(), 10);
162-
163-
let a1 = array.clone().sliced(2, 3);
164-
assert_eq!(a1.into_iter().collect::<Vec<_>>(), vec![
156+
let a3 = array.sliced(2, 3);
157+
assert_eq!(a3.into_iter().collect::<Vec<_>>(), vec![
165158
"databend",
166159
"yyyyyyyyyyyyyyyyyyyyy",
167160
"zzzzzzzzzzzzzzzzzzzzz",
168161
]);
169-
assert_eq!(a1.memory_size(), 110);
170-
assert_eq!(a1.total_bytes_len(), 50);
171-
172-
let a2 = array.sliced(5, 1);
173-
assert_eq!(a2.into_iter().collect::<Vec<_>>(), vec![
174-
"abcabcabcabcabcabc",
175-
]);
176-
assert_eq!(a2.memory_size(), 38);
177-
assert_eq!(a2.total_bytes_len(), 18);
162+
assert_eq!(a3.memory_size(), 108);
178163
}
179164

180165
#[test]

โ€Žsrc/query/expression/src/values.rsโ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -892,7 +892,7 @@ impl ScalarRef<'_> {
892892
ScalarRef::Decimal(_) => n * self.memory_size(),
893893
ScalarRef::Boolean(_) => n.div_ceil(8),
894894
ScalarRef::Binary(s) => s.len() * n + (n + 1) * 8,
895-
ScalarRef::String(s) => n * (20 + s.len()),
895+
ScalarRef::String(s) => n * 16 + if s.len() > 12 && n > 0 { s.len() } else { 0 },
896896
ScalarRef::Timestamp(_) => n * 8,
897897
ScalarRef::TimestampTz(_) => n * 16,
898898
ScalarRef::Date(_) => n * 4,

โ€Žsrc/query/expression/tests/it/block.rsโ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ fn test_block_entry_memory_size() {
9090
assert_eq!(3, entry.memory_size());
9191

9292
let col = StringType::from_data((0..10).map(|x| x.to_string()).collect::<Vec<_>>());
93-
assert_eq!(col.memory_size(), 210);
93+
assert_eq!(col.memory_size(), 10 * 16);
9494

9595
let array = ArrayColumn::<Int64Type>::new(
9696
Buffer::from_iter(0..10i64),

โ€Žtests/nox/python_client/test_local.pyโ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def test_stream_load():
6161
]
6262
progress = conn.stream_load("INSERT INTO test VALUES", values)
6363
assert progress.write_rows == 3, f"progress.write_rows: {progress.write_rows}"
64-
# assert progress.write_bytes >= 242, f"progress.write_bytes: {progress.write_bytes}"
64+
# assert progress.write_bytes == 211, f"progress.write_bytes: {progress.write_bytes}"
6565

6666
rows = conn.query_iter("SELECT * FROM test")
6767
ret = [row.values() for row in rows]

โ€Žtests/sqllogictests/suites/base/09_fuse_engine/09_0020_analyze.testโ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ query T
161161
select * from fuse_statistic('db_09_0020', 't_string') order by column_name asc;
162162
----
163163
id 10 0 4 [bucket id: 0, min: "1", max: "1", ndv: 1.0, count: 1.0], [bucket id: 1, min: "2", max: "2", ndv: 1.0, count: 1.0], [bucket id: 2, min: "3", max: "3", ndv: 1.0, count: 1.0], [bucket id: 3, min: "4", max: "4", ndv: 1.0, count: 1.0], [bucket id: 4, min: "5", max: "5", ndv: 1.0, count: 1.0], [bucket id: 5, min: "6", max: "6", ndv: 1.0, count: 1.0], [bucket id: 6, min: "7", max: "7", ndv: 1.0, count: 1.0], [bucket id: 7, min: "8", max: "8", ndv: 1.0, count: 1.0], [bucket id: 8, min: "9", max: "9", ndv: 1.0, count: 1.0], [bucket id: 9, min: "10", max: "10", ndv: 1.0, count: 1.0]
164-
str_val 10 0 23 [bucket id: 0, min: "1.0", max: "1.0", ndv: 1.0, count: 1.0], [bucket id: 1, min: "10.0", max: "10.0", ndv: 1.0, count: 1.0], [bucket id: 2, min: "2.0", max: "2.0", ndv: 1.0, count: 1.0], [bucket id: 3, min: "3.0", max: "3.0", ndv: 1.0, count: 1.0], [bucket id: 4, min: "4.0", max: "4.0", ndv: 1.0, count: 1.0], [bucket id: 5, min: "5.0", max: "5.0", ndv: 1.0, count: 1.0], [bucket id: 6, min: "6.0", max: "6.0", ndv: 1.0, count: 1.0], [bucket id: 7, min: "7.0", max: "7.0", ndv: 1.0, count: 1.0], [bucket id: 8, min: "8.0", max: "8.0", ndv: 1.0, count: 1.0], [bucket id: 9, min: "9.0", max: "9.0", ndv: 1.0, count: 1.0]
164+
str_val 10 0 16 [bucket id: 0, min: "1.0", max: "1.0", ndv: 1.0, count: 1.0], [bucket id: 1, min: "10.0", max: "10.0", ndv: 1.0, count: 1.0], [bucket id: 2, min: "2.0", max: "2.0", ndv: 1.0, count: 1.0], [bucket id: 3, min: "3.0", max: "3.0", ndv: 1.0, count: 1.0], [bucket id: 4, min: "4.0", max: "4.0", ndv: 1.0, count: 1.0], [bucket id: 5, min: "5.0", max: "5.0", ndv: 1.0, count: 1.0], [bucket id: 6, min: "6.0", max: "6.0", ndv: 1.0, count: 1.0], [bucket id: 7, min: "7.0", max: "7.0", ndv: 1.0, count: 1.0], [bucket id: 8, min: "8.0", max: "8.0", ndv: 1.0, count: 1.0], [bucket id: 9, min: "9.0", max: "9.0", ndv: 1.0, count: 1.0]
165165

166166
# Test string comparison with histogram
167167
query I

โ€Žtests/sqllogictests/suites/base/09_fuse_engine/09_0044_issue_17314.testโ€Ž

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ analyze table t1;
3737
query IIT
3838
select * from fuse_statistic('issue_17314', 't1') order by column_name;
3939
----
40-
a 3 0 22 [bucket id: 0, min: "1", max: "1", ndv: 1.0, count: 1.0], [bucket id: 1, min: "2", max: "2", ndv: 1.0, count: 1.0], [bucket id: 2, min: "3", max: "3", ndv: 1.0, count: 1.0]
41-
b 3 0 23 [bucket id: 0, min: "11", max: "11", ndv: 1.0, count: 1.0], [bucket id: 1, min: "22", max: "22", ndv: 1.0, count: 1.0], [bucket id: 2, min: "33", max: "33", ndv: 1.0, count: 1.0]
40+
a 3 0 17 [bucket id: 0, min: "1", max: "1", ndv: 1.0, count: 1.0], [bucket id: 1, min: "2", max: "2", ndv: 1.0, count: 1.0], [bucket id: 2, min: "3", max: "3", ndv: 1.0, count: 1.0]
41+
b 3 0 17 [bucket id: 0, min: "11", max: "11", ndv: 1.0, count: 1.0], [bucket id: 1, min: "22", max: "22", ndv: 1.0, count: 1.0], [bucket id: 2, min: "33", max: "33", ndv: 1.0, count: 1.0]
4242

4343
statement ok
4444
drop table t1 all;

โ€Žtests/sqllogictests/suites/base/20+_others/20_0013_query_result_cache.testโ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ EXPLAIN SELECT * FROM t1, t2 ORDER BY a, b;
8989
ReadQueryResultCache
9090
โ”œโ”€โ”€ SQL: SELECT * FROM t1, t2 ORDER BY a, b
9191
โ”œโ”€โ”€ Number of rows: 9
92-
โ””โ”€โ”€ Result size: 225
92+
โ””โ”€โ”€ Result size: 180
9393

9494

9595

โ€Žtests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.testโ€Ž

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -326,26 +326,26 @@ query TIIIT
326326
select * from fuse_statistic('test_virtual_column', 'tweets');
327327
----
328328
id 10 0 4 (empty)
329-
data['create'] 10 0 24 (empty)
329+
data['create'] 10 0 16 (empty)
330330
data['id'] 10 0 8 (empty)
331331
data['replies'] 7 3 8 (empty)
332-
data['text'] 7 0 21 (empty)
332+
data['text'] 7 0 16 (empty)
333333
data['user']['id'] 6 0 8 (empty)
334334
data['likes'] 2 0 1 (empty)
335-
data['tags'][0] 2 0 4 (empty)
336-
data['tags'][1] 2 0 5 (empty)
335+
data['tags'][0] 2 0 3 (empty)
336+
data['tags'][1] 2 0 3 (empty)
337337

338338
query TTTIIIITTIT
339339
SHOW STATISTICS FROM TABLE test_virtual_column.tweets;
340340
----
341341
test_virtual_column tweets data 10 10 NULL NULL NULL NULL NULL (empty)
342-
test_virtual_column tweets data['create'] 10 10 10 0 1/08 6/07 24 (empty)
342+
test_virtual_column tweets data['create'] 10 10 10 0 1/08 6/07 16 (empty)
343343
test_virtual_column tweets data['id'] 10 10 10 0 1 10 8 (empty)
344344
test_virtual_column tweets data['likes'] 10 10 2 0 10 25 1 (empty)
345345
test_virtual_column tweets data['replies'] 10 10 7 3 0 9 8 (empty)
346-
test_virtual_column tweets data['tags'][0] 10 10 2 0 good new 4 (empty)
347-
test_virtual_column tweets data['tags'][1] 10 10 2 0 interesting popular 5 (empty)
348-
test_virtual_column tweets data['text'] 10 10 7 0 a z 21 (empty)
346+
test_virtual_column tweets data['tags'][0] 10 10 2 0 good new 3 (empty)
347+
test_virtual_column tweets data['tags'][1] 10 10 2 0 interesting popular 3 (empty)
348+
test_virtual_column tweets data['text'] 10 10 7 0 a z 16 (empty)
349349
test_virtual_column tweets data['user']['id'] 10 10 6 0 1 7 8 (empty)
350350
test_virtual_column tweets id 10 10 10 0 1 10 4 (empty)
351351

@@ -357,20 +357,20 @@ query TIIIT
357357
select * from fuse_statistic('test_virtual_column', 'tweets');
358358
----
359359
id 10 0 4 (empty)
360-
data['create'] 10 0 24 (empty)
360+
data['create'] 10 0 16 (empty)
361361
data['id'] 10 0 8 (empty)
362362
data['replies'] 7 3 8 (empty)
363-
data['text'] 8 0 21 (empty)
363+
data['text'] 8 0 16 (empty)
364364
data['user']['id'] 4 0 8 (empty)
365365

366366
query TTTIIIITTIT
367367
SHOW STATISTICS FROM TABLE test_virtual_column.tweets;
368368
----
369369
test_virtual_column tweets data 10 10 NULL NULL NULL NULL NULL (empty)
370-
test_virtual_column tweets data['create'] 10 10 10 0 1/08 6/07 24 (empty)
370+
test_virtual_column tweets data['create'] 10 10 10 0 1/08 6/07 16 (empty)
371371
test_virtual_column tweets data['id'] 10 10 10 0 1 10 8 (empty)
372372
test_virtual_column tweets data['replies'] 10 10 7 3 0 10 8 (empty)
373-
test_virtual_column tweets data['text'] 10 10 8 0 a z 21 (empty)
373+
test_virtual_column tweets data['text'] 10 10 8 0 a z 16 (empty)
374374
test_virtual_column tweets data['user']['id'] 10 10 4 0 1 7 8 (empty)
375375
test_virtual_column tweets id 10 10 10 0 1 10 4 (empty)
376376

โ€Žtests/sqllogictests/suites/mode/standalone/explain/index/explain_ngram_index.testโ€Ž

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,8 @@ INSERT INTO t3 VALUES
319319
query II
320320
select block_size, bloom_filter_size, ngram_index_size from fuse_block('test_ngram_index_db', 't3');
321321
----
322-
225 894 NULL
323-
256 894 NULL
322+
209 894 NULL
323+
240 894 NULL
324324

325325
statement ok
326326
CREATE NGRAM INDEX idx1 ON t3(content1, content2) gram_size = 5 bloom_size = 1048576
@@ -331,8 +331,8 @@ REFRESH NGRAM INDEX idx1 ON t3
331331
query III
332332
select block_size, bloom_filter_size, ngram_index_size from fuse_block('test_ngram_index_db', 't3');
333333
----
334-
225 2098570 2097234
335-
256 2098570 2097234
334+
209 2098570 2097234
335+
240 2098570 2097234
336336

337337
query ITT
338338
SELECT * FROM t3 WHERE content1 like '%speak%'

0 commit comments

Comments
ย (0)