Skip to content

Commit 7ac02fa

Browse files
authored
refactor: Optimize timestamp/timestamptz function by DayLUT (#19031)
1 parent a4896a1 commit 7ac02fa

File tree

5 files changed

+240
-136
lines changed

5 files changed

+240
-136
lines changed

src/query/expression/src/utils/date_helper.rs

Lines changed: 45 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
use std::sync::LazyLock;
1616

17+
use databend_common_column::types::timestamp_tz;
1718
use databend_common_exception::Result;
1819
use databend_common_timezone::fast_components_from_timestamp;
1920
use databend_common_timezone::fast_utc_from_local;
@@ -260,6 +261,18 @@ fn date_from_components(c: &DateTimeComponents) -> Option<Date> {
260261
Date::new(c.year as i16, c.month as i8, c.day as i8).ok()
261262
}
262263

264+
#[inline]
265+
pub fn timestamp_tz_local_micros(value: timestamp_tz) -> Option<i64> {
266+
let offset = value.micros_offset()?;
267+
value.timestamp().checked_add(offset)
268+
}
269+
270+
#[inline]
271+
pub fn timestamp_tz_components_via_lut(value: timestamp_tz) -> Option<DateTimeComponents> {
272+
let local = timestamp_tz_local_micros(value)?;
273+
fast_components_from_timestamp(local, &TimeZone::UTC)
274+
}
275+
263276
fn datetime_from_components(c: &DateTimeComponents) -> Option<DateTime> {
264277
let date = date_from_components(c)?;
265278
Some(date.at(
@@ -1025,55 +1038,39 @@ pub fn today_date(now: &Zoned, tz: &TimeZone) -> i32 {
10251038
// The working hours of all departments of The State Council are from 8 a.m. to 12 p.m. and from 1:30 p.m. to 5:30 p.m. The winter working hours will be implemented after September 17th.
10261039
pub fn calc_date_to_timestamp(val: i32, tz: &TimeZone) -> std::result::Result<i64, String> {
10271040
let ts = (val as i64) * 24 * 3600 * MICROS_PER_SEC;
1028-
let z = ts.to_timestamp(tz);
1029-
1030-
let tomorrow = z.date().tomorrow();
1031-
let yesterday = z.date().yesterday();
1032-
1033-
// If there were no yesterday or tomorrow, it might be the limit value.
1034-
// e.g. 9999-12-31
1035-
if tomorrow.is_err() || yesterday.is_err() {
1036-
let tz_offset_micros = tz
1037-
.to_timestamp(date(1970, 1, 1).at(0, 0, 0, 0))
1038-
.unwrap()
1039-
.as_microsecond();
1040-
return Ok(ts + tz_offset_micros);
1041-
}
1042-
1043-
// tomorrow midnight
1044-
let tomorrow_date = tomorrow.map_err(|e| format!("Calc tomorrow midnight with error {}", e))?;
1045-
1046-
let tomorrow_zoned = tomorrow_date.to_zoned(tz.clone()).unwrap_or(z.clone());
1047-
let tomorrow_is_dst = tz.to_offset_info(tomorrow_zoned.timestamp()).dst().is_dst();
1048-
1049-
// yesterday midnight
1050-
let yesterday_date =
1051-
yesterday.map_err(|e| format!("Calc yesterday midnight with error {}", e))?;
1052-
let yesterday_zoned = yesterday_date.to_zoned(tz.clone()).unwrap_or(z.clone());
1053-
let yesterday_is_std = tz
1054-
.to_offset_info(yesterday_zoned.timestamp())
1055-
.dst()
1056-
.is_std();
1057-
1058-
// today midnight
1059-
let today_datetime_midnight = z.date().to_datetime(Time::midnight());
1060-
let today_zoned = today_datetime_midnight
1061-
.to_zoned(tz.clone())
1062-
.map_err(|e| format!("Calc today midnight with error {}", e))?;
1063-
let today_is_dst = tz.to_offset_info(today_zoned.timestamp()).dst().is_dst();
1064-
1065-
let tz_offset_micros = tz
1066-
.to_timestamp(date(1970, 1, 1).at(0, 0, 0, 0))
1067-
.unwrap()
1068-
.as_microsecond();
1069-
1070-
let base_res = ts + tz_offset_micros;
1041+
let local_date = val.to_date(tz);
1042+
let year = i32::from(local_date.year());
1043+
let month = local_date.month() as u8;
1044+
let day = local_date.day() as u8;
1045+
1046+
if let Some(micros) = fast_utc_from_local(tz, year, month, day, 0, 0, 0, 0) {
1047+
return Ok(micros);
1048+
}
1049+
1050+
let midnight = local_date.to_datetime(Time::midnight());
1051+
match midnight.to_zoned(tz.clone()) {
1052+
Ok(zoned) => Ok(zoned.timestamp().as_microsecond()),
1053+
Err(_err) => {
1054+
for minutes in 1..=1440 {
1055+
let delta = SignedDuration::from_secs((minutes * 60) as i64);
1056+
if let Ok(adj) = midnight.checked_add(delta) {
1057+
if let Ok(zoned) = adj.to_zoned(tz.clone()) {
1058+
return Ok(zoned.timestamp().as_microsecond());
1059+
}
1060+
} else {
1061+
break;
1062+
}
1063+
}
10711064

1072-
// Origin:(today_is_dst && tomorrow_is_dst && !yesterday_is_std) || (today_is_dst && !tomorrow_is_dst && yesterday_is_std)
1073-
if today_is_dst && (tomorrow_is_dst != yesterday_is_std) {
1074-
Ok(base_res - 3600 * MICROS_PER_SEC)
1075-
} else {
1076-
Ok(base_res)
1065+
// The timezone database might not have explicit rules for extremely
1066+
// old/new dates, so fall back to the legacy behavior that applies the
1067+
// canonical offset we use for 1970-01-01.
1068+
let tz_offset_micros = tz
1069+
.to_timestamp(date(1970, 1, 1).at(0, 0, 0, 0))
1070+
.unwrap()
1071+
.as_microsecond();
1072+
Ok(ts + tz_offset_micros)
1073+
}
10771074
}
10781075
}
10791076

src/query/functions/benches/bench.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,26 @@ mod datetime_fast_path {
213213
});
214214
}
215215

216+
#[divan::bench]
217+
fn convert_timezone(bencher: divan::Bencher) {
218+
let expr = build_expr("convert_timezone('America/Los_Angeles', ts)", &[(
219+
"ts",
220+
DataType::Timestamp,
221+
)]);
222+
let data = &*SAMPLES;
223+
let block = DataBlock::new(vec![data.timestamp_entry()], data.rows());
224+
let func_ctx = FunctionContext {
225+
tz: TimeZone::get("Asia/Shanghai").unwrap(),
226+
..Default::default()
227+
};
228+
let evaluator = Evaluator::new(&block, &func_ctx, &BUILTIN_FUNCTIONS);
229+
230+
bencher.bench(|| {
231+
let value = evaluator.run(&expr).unwrap();
232+
divan::black_box(value);
233+
});
234+
}
235+
216236
fn build_expr(sql: &str, columns: &[(&str, DataType)]) -> Expr {
217237
let raw_expr = parser::parse_raw_expr(sql, columns);
218238
type_check::check(&raw_expr, &BUILTIN_FUNCTIONS).unwrap()

src/query/functions/src/scalars/timestamp/src/datetime.rs

Lines changed: 49 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -235,10 +235,7 @@ fn register_convert_timezone(registry: &mut FunctionRegistry) {
235235
return;
236236
}
237237
}
238-
// Convert source timestamp from source timezone to target timezone
239-
let p_src_timestamp = src_timestamp.to_timestamp(&ctx.func_ctx.tz);
240-
let src_dst_from_utc = p_src_timestamp.offset().seconds();
241-
238+
let source_tz = &ctx.func_ctx.tz;
242239
let t_tz = match TimeZone::get(target_tz) {
243240
Ok(tz) => tz,
244241
Err(e) => {
@@ -251,17 +248,33 @@ fn register_convert_timezone(registry: &mut FunctionRegistry) {
251248
}
252249
};
253250

254-
let result_timestamp = p_src_timestamp
255-
.with_time_zone(t_tz.clone())
256-
.timestamp()
257-
.as_microsecond();
258-
let target_dst_from_utc = p_src_timestamp
259-
.with_time_zone(t_tz.clone())
260-
.offset()
261-
.seconds();
251+
let source_components = fast_components_from_timestamp(src_timestamp, source_tz);
252+
let target_components = fast_components_from_timestamp(src_timestamp, &t_tz);
253+
254+
let (instant_micros, src_dst_from_utc, target_dst_from_utc) =
255+
if let (Some(src_comp), Some(target_comp)) =
256+
(source_components, target_components)
257+
{
258+
(
259+
src_timestamp,
260+
src_comp.offset_seconds,
261+
target_comp.offset_seconds,
262+
)
263+
} else {
264+
// Fall back to the slower Jiff conversion for timestamps
265+
// outside the LUT coverage (e.g. <1900 or >2299).
266+
let src_zoned = src_timestamp.to_timestamp(source_tz);
267+
let target_zoned = src_zoned.with_time_zone(t_tz.clone());
268+
(
269+
target_zoned.timestamp().as_microsecond(),
270+
src_zoned.offset().seconds(),
271+
target_zoned.offset().seconds(),
272+
)
273+
};
274+
262275
let offset_as_micros_sec = (target_dst_from_utc - src_dst_from_utc) as i64;
263276
match offset_as_micros_sec.checked_mul(MICROS_PER_SEC) {
264-
Some(offset) => match result_timestamp.checked_add(offset) {
277+
Some(offset) => match instant_micros.checked_add(offset) {
265278
Some(res) => output.push(res),
266279
None => {
267280
ctx.set_error(output.len(), "calc final time error".to_string());
@@ -759,6 +772,13 @@ fn register_timestamp_to_timestamp_tz(registry: &mut FunctionRegistry) {
759772
ctx: &mut EvalContext,
760773
) -> Value<TimestampTzType> {
761774
vectorize_with_builder_1_arg::<TimestampType, TimestampTzType>(|val, output, ctx| {
775+
if let Some(components) = fast_components_from_timestamp(val, &ctx.func_ctx.tz) {
776+
let offset = components.offset_seconds;
777+
let ts_tz = timestamp_tz::new(val - (offset as i64 * MICROS_PER_SEC), offset);
778+
output.push(ts_tz);
779+
return;
780+
}
781+
762782
let ts = match Timestamp::from_microsecond(val) {
763783
Ok(ts) => ts,
764784
Err(err) => {
@@ -1053,15 +1073,22 @@ fn register_timestamp_tz_to_date(registry: &mut FunctionRegistry) {
10531073
}
10541074

10551075
fn calc_timestamp_tz_to_date(val: timestamp_tz) -> Result<i32, String> {
1056-
let offset = Offset::from_seconds(val.seconds_offset()).map_err(|err| err.to_string())?;
1057-
1058-
Ok(val
1059-
.timestamp()
1060-
.to_timestamp(&TimeZone::fixed(offset))
1061-
.date()
1062-
.since((Unit::Day, Date::new(1970, 1, 1).unwrap()))
1063-
.unwrap()
1064-
.get_days())
1076+
if let Some(days) = timestamp_tz_components_via_lut(val)
1077+
.and_then(|c| days_from_components(c.year, c.month, c.day))
1078+
{
1079+
Ok(days)
1080+
} else {
1081+
let offset =
1082+
Offset::from_seconds(val.seconds_offset()).map_err(|err| err.to_string())?;
1083+
1084+
Ok(val
1085+
.timestamp()
1086+
.to_timestamp(&TimeZone::fixed(offset))
1087+
.date()
1088+
.since((Unit::Day, Date::new(1970, 1, 1).unwrap()))
1089+
.unwrap()
1090+
.get_days())
1091+
}
10651092
}
10661093
}
10671094

0 commit comments

Comments
 (0)