diff --git a/tpcds/README.md b/tpcds/README.md index 024c4b1..f00b953 100644 --- a/tpcds/README.md +++ b/tpcds/README.md @@ -9,6 +9,8 @@ This tpcds-kit is ported from [gregrahn/tpcds-kit](https://github.com/gregrahn/t * https://github.com/gregrahn/tpcds-kit/issues/31 * https://github.com/gregrahn/tpcds-kit/issues/33 +* Adjust some query templates to TiDB supported MySQL dialet, adjustments are ported from [FdeFabricio/tpcds-mysql](https://github.com/FdeFabricio/tpcds-mysql), thanks to [FdeFabricio](https://github.com/FdeFabricio/) + ## 1. Setup ### 1.1 install required development tools @@ -56,7 +58,7 @@ done Query generation is done via `dsqgen` with query templetes, here we use a pre-written shell script file [genquery.sh](./genquery.sh), after running this script, queries are located in directory "queries": ```sh -./genquery.sh +./genquery.sh 1 ``` All supported TPC-DS queries for TiDB are generated in `tools/queries` diff --git a/tpcds/genquery.sh b/tpcds/genquery.sh index 5b13697..3ad9222 100755 --- a/tpcds/genquery.sh +++ b/tpcds/genquery.sh @@ -1,7 +1,7 @@ #!/bin/bash set -eu -SCALE="1" +SCALE=$1 TEMPLATE_DIR="../query_templates" OUTPUT_DIR="queries" QUERY_ID="" @@ -18,8 +18,23 @@ function generate_query() mv "$OUTPUT_DIR/query_0.sql" "$OUTPUT_DIR/query_$QUERY_ID.sql" } -#unsupported="87 17 38 8 18 22 27 21 16 32 37 40 82 92 94 12 20 36 49 44 53 63 67 70 86 89 98 1 2 4 5 11 14 23 24 30 31 33 39 47 51 54 56 57 58 59 60 64 74 75 77 78 80 81 83 95 97 13 6" -unsupported="87 17 38 8 18 22 27 21 16 32 37 40 82 92 94 12 20 36 49 44 53 63 67 70 86 89 98 1 2 4 5 11 14 23 24 30 31 33 39 47 51 54 56 57 58 59 60 64 74 75 77 78 80 81 83 95 97" +function split_sql() +{ + cd $OUTPUT_DIR + + count=`grep ';' query_$QUERY_ID.sql | wc -l` + if [[ $count -eq 1 ]]; then + cd - + return + fi + + csplit --quiet --prefix=query_$QUERY_ID -k --suppress-matched --suffix-format="_%d.sql" query_$QUERY_ID.sql "/;/+1" + rm query_$QUERY_ID.sql + cd - +} + +unsupported="14 18 22 27 36 5 67 70 77 80 86" # rollup function +unsupported=$unsupported" 2 11 23 39 4 59" # tidb bug cd tools rm -rf $OUTPUT_DIR @@ -37,6 +52,7 @@ for i in {1..99}; do fi QUERY_ID="$i" generate_query + split_sql done mv $OUTPUT_DIR .. -cd - +cd .. diff --git a/tpcds/query_templates/query12.tpl b/tpcds/query_templates/query12.tpl index a6dee6a..c18a05a 100644 --- a/tpcds/query_templates/query12.tpl +++ b/tpcds/query_templates/query12.tpl @@ -55,7 +55,7 @@ where and i_category in ('[CATEGORY.1]', '[CATEGORY.2]', '[CATEGORY.3]') and ws_sold_date_sk = d_date_sk and d_date between cast('[SDATE]' as date) - and (cast('[SDATE]' as date) + 30 days) + and date_add(cast('[SDATE]' as date), interval 30 day) group by i_item_id ,i_item_desc diff --git a/tpcds/query_templates/query16.tpl b/tpcds/query_templates/query16.tpl index 40720c5..d25e1e1 100644 --- a/tpcds/query_templates/query16.tpl +++ b/tpcds/query_templates/query16.tpl @@ -55,7 +55,7 @@ from ,call_center where d_date between '[YEAR]-[MONTH]-01' and - (cast('[YEAR]-[MONTH]-01' as date) + 60 days) + date_add(cast('[YEAR]-[MONTH]-01' as date), interval 60 day) and cs1.cs_ship_date_sk = d_date_sk and cs1.cs_ship_addr_sk = ca_address_sk and ca_state = '[STATE]' diff --git a/tpcds/query_templates/query2.tpl b/tpcds/query_templates/query2.tpl index b273bb4..5bf3c90 100644 --- a/tpcds/query_templates/query2.tpl +++ b/tpcds/query_templates/query2.tpl @@ -37,13 +37,13 @@ with wscs as (select sold_date_sk ,sales_price - from select ws_sold_date_sk sold_date_sk + from (select ws_sold_date_sk sold_date_sk ,ws_ext_sales_price sales_price from web_sales union all select cs_sold_date_sk sold_date_sk ,cs_ext_sales_price sales_price - from catalog_sales), + from catalog_sales)), wswscs as (select d_week_seq, sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, diff --git a/tpcds/query_templates/query20.tpl b/tpcds/query_templates/query20.tpl index 1497c50..59ebb50 100644 --- a/tpcds/query_templates/query20.tpl +++ b/tpcds/query_templates/query20.tpl @@ -52,7 +52,7 @@ and i_category in ('[CATEGORY.1]', '[CATEGORY.2]', '[CATEGORY.3]') and cs_sold_date_sk = d_date_sk and d_date between cast('[SDATE]' as date) - and (cast('[SDATE]' as date) + 30 days) + and date_add(cast('[SDATE]' as date), interval 30 day) group by i_item_id ,i_item_desc ,i_category diff --git a/tpcds/query_templates/query21.tpl b/tpcds/query_templates/query21.tpl index 486ffc6..1c86fc0 100644 --- a/tpcds/query_templates/query21.tpl +++ b/tpcds/query_templates/query21.tpl @@ -39,10 +39,10 @@ [_LIMITA] select [_LIMITB] * from(select w_warehouse_name ,i_item_id - ,sum(case when (cast(d_date as date) < cast ('[SALES_DATE]' as date)) + ,sum(case when (cast(d_date as date) < cast('[SALES_DATE]' as date)) then inv_quantity_on_hand else 0 end) as inv_before - ,sum(case when (cast(d_date as date) >= cast ('[SALES_DATE]' as date)) + ,sum(case when (cast(d_date as date) >= cast('[SALES_DATE]' as date)) then inv_quantity_on_hand else 0 end) as inv_after from inventory @@ -53,8 +53,8 @@ and i_item_sk = inv_item_sk and inv_warehouse_sk = w_warehouse_sk and inv_date_sk = d_date_sk - and d_date between (cast ('[SALES_DATE]' as date) - 30 days) - and (cast ('[SALES_DATE]' as date) + 30 days) + and d_date between date_sub(cast('[SALES_DATE]' as date), interval 30 day) + and date_add(cast('[SALES_DATE]' as date), interval 30 day) group by w_warehouse_name, i_item_id) x where (case when inv_before > 0 then inv_after / inv_before diff --git a/tpcds/query_templates/query23.tpl b/tpcds/query_templates/query23.tpl index 7cb2f15..27a3e45 100644 --- a/tpcds/query_templates/query23.tpl +++ b/tpcds/query_templates/query23.tpl @@ -56,7 +56,7 @@ where ss_customer_sk = c_customer_sk and ss_sold_date_sk = d_date_sk and d_year in ([YEAR],[YEAR]+1,[YEAR]+2,[YEAR]+3) - group by c_customer_sk)), + group by c_customer_sk) temp1), best_ss_customer as (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales from store_sales @@ -84,7 +84,7 @@ from and d_moy = [MONTH] and ws_sold_date_sk = d_date_sk and ws_item_sk in (select item_sk from frequent_ss_items) - and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) temp2 [_LIMITC]; with frequent_ss_items as @@ -106,7 +106,7 @@ from where ss_customer_sk = c_customer_sk and ss_sold_date_sk = d_date_sk and d_year in ([YEAR],[YEAR]+1,[YEAR]+2,[YEAR]+3) - group by c_customer_sk)), + group by c_customer_sk) temp3), best_ss_customer as (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales from store_sales @@ -139,6 +139,6 @@ from and ws_item_sk in (select item_sk from frequent_ss_items) and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) and ws_bill_customer_sk = c_customer_sk - group by c_last_name,c_first_name) + group by c_last_name,c_first_name) temp4 order by c_last_name,c_first_name,sales [_LIMITC]; diff --git a/tpcds/query_templates/query32.tpl b/tpcds/query_templates/query32.tpl index b2b6fe1..2b1a0ba 100644 --- a/tpcds/query_templates/query32.tpl +++ b/tpcds/query_templates/query32.tpl @@ -46,7 +46,7 @@ where i_manufact_id = [IMID] and i_item_sk = cs_item_sk and d_date between '[CSDATE]' and - (cast('[CSDATE]' as date) + 90 days) + date_add(cast('[CSDATE]' as date), interval 90 day) and d_date_sk = cs_sold_date_sk and cs_ext_discount_amt > ( @@ -58,7 +58,7 @@ and cs_ext_discount_amt where cs_item_sk = i_item_sk and d_date between '[CSDATE]' and - (cast('[CSDATE]' as date) + 90 days) + date_add(cast('[CSDATE]' as date), interval 90 day) and d_date_sk = cs_sold_date_sk ) [_LIMITC]; diff --git a/tpcds/query_templates/query37.tpl b/tpcds/query_templates/query37.tpl index cee765e..c47ee25 100644 --- a/tpcds/query_templates/query37.tpl +++ b/tpcds/query_templates/query37.tpl @@ -45,7 +45,7 @@ where i_current_price between [PRICE] and [PRICE] + 30 and inv_item_sk = i_item_sk and d_date_sk=inv_date_sk - and d_date between cast('[INVDATE]' as date) and (cast('[INVDATE]' as date) + 60 days) + and d_date between cast('[INVDATE]' as date) and date_add(cast('[INVDATE]' as date), interval 60 day) and i_manufact_id in ([MANUFACT_ID.1],[MANUFACT_ID.2],[MANUFACT_ID.3],[MANUFACT_ID.4]) and inv_quantity_on_hand between 100 and 500 and cs_item_sk = i_item_sk diff --git a/tpcds/query_templates/query40.tpl b/tpcds/query_templates/query40.tpl index b00dd1e..0e6f2a5 100644 --- a/tpcds/query_templates/query40.tpl +++ b/tpcds/query_templates/query40.tpl @@ -39,9 +39,9 @@ [_LIMITA] select [_LIMITB] w_state ,i_item_id - ,sum(case when (cast(d_date as date) < cast ('[SALES_DATE]' as date)) + ,sum(case when (cast(d_date as date) < cast('[SALES_DATE]' as date)) then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before - ,sum(case when (cast(d_date as date) >= cast ('[SALES_DATE]' as date)) + ,sum(case when (cast(d_date as date) >= cast('[SALES_DATE]' as date)) then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after from catalog_sales left outer join catalog_returns on @@ -55,8 +55,8 @@ and i_item_sk = cs_item_sk and cs_warehouse_sk = w_warehouse_sk and cs_sold_date_sk = d_date_sk - and d_date between (cast ('[SALES_DATE]' as date) - 30 days) - and (cast ('[SALES_DATE]' as date) + 30 days) + and d_date between date_sub(cast('[SALES_DATE]' as date), interval 30 day) + and date_add(cast('[SALES_DATE]' as date), interval 30 day) group by w_state,i_item_id order by w_state,i_item_id diff --git a/tpcds/query_templates/query51.tpl b/tpcds/query_templates/query51.tpl index 4fbf865..398c7d5 100644 --- a/tpcds/query_templates/query51.tpl +++ b/tpcds/query_templates/query51.tpl @@ -71,7 +71,14 @@ from (select item_sk ,case when web.d_date is not null then web.d_date else store.d_date end d_date ,web.cume_sales web_sales ,store.cume_sales store_sales - from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + from web_v1 web Left join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + UNION + select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web Right join store_v1 store on (web.item_sk = store.item_sk and web.d_date = store.d_date) )x )y where web_cumulative > store_cumulative diff --git a/tpcds/query_templates/query54.tpl b/tpcds/query_templates/query54.tpl index d2181bb..06c23c1 100644 --- a/tpcds/query_templates/query54.tpl +++ b/tpcds/query_templates/query54.tpl @@ -84,7 +84,7 @@ group by c_customer_sk ) , segments as - (select cast((revenue/50) as int) as segment + (select cast((revenue/50) as signed) as segment from my_revenue ) [_LIMITA] select [_LIMITB] segment, count(*) as num_customers, segment*50 as segment_base diff --git a/tpcds/query_templates/query82.tpl b/tpcds/query_templates/query82.tpl index f322723..480a1aa 100644 --- a/tpcds/query_templates/query82.tpl +++ b/tpcds/query_templates/query82.tpl @@ -45,7 +45,7 @@ where i_current_price between [PRICE] and [PRICE]+30 and inv_item_sk = i_item_sk and d_date_sk=inv_date_sk - and d_date between cast('[INVDATE]' as date) and (cast('[INVDATE]' as date) + 60 days) + and d_date between cast('[INVDATE]' as date) and date_add(cast('[INVDATE]' as date), interval 60 day) and i_manufact_id in ([MANUFACT_ID.1],[MANUFACT_ID.2],[MANUFACT_ID.3],[MANUFACT_ID.4]) and inv_quantity_on_hand between 100 and 500 and ss_item_sk = i_item_sk diff --git a/tpcds/query_templates/query92.tpl b/tpcds/query_templates/query92.tpl index c05bcfb..aeb3208 100644 --- a/tpcds/query_templates/query92.tpl +++ b/tpcds/query_templates/query92.tpl @@ -48,7 +48,7 @@ where i_manufact_id = [IMID] and i_item_sk = ws_item_sk and d_date between '[WSDATE]' and - (cast('[WSDATE]' as date) + 90 days) + date_add(cast('[WSDATE]' as date), interval 90 day) and d_date_sk = ws_sold_date_sk and ws_ext_discount_amt > ( @@ -60,7 +60,7 @@ and ws_ext_discount_amt WHERE ws_item_sk = i_item_sk and d_date between '[WSDATE]' and - (cast('[WSDATE]' as date) + 90 days) + date_add(cast('[WSDATE]' as date), interval 90 day) and d_date_sk = ws_sold_date_sk ) order by sum(ws_ext_discount_amt) diff --git a/tpcds/query_templates/query94.tpl b/tpcds/query_templates/query94.tpl index 4081dfe..979c9b9 100644 --- a/tpcds/query_templates/query94.tpl +++ b/tpcds/query_templates/query94.tpl @@ -49,7 +49,7 @@ from ,web_site where d_date between '[YEAR]-[MONTH]-01' and - (cast('[YEAR]-[MONTH]-01' as date) + 60 days) + date_add(cast('[YEAR]-[MONTH]-01' as date), interval 60 day) and ws1.ws_ship_date_sk = d_date_sk and ws1.ws_ship_addr_sk = ca_address_sk and ca_state = '[STATE]' diff --git a/tpcds/query_templates/query95.tpl b/tpcds/query_templates/query95.tpl index 69e29f8..9711b16 100644 --- a/tpcds/query_templates/query95.tpl +++ b/tpcds/query_templates/query95.tpl @@ -54,7 +54,7 @@ from ,web_site where d_date between '[YEAR]-[MONTH]-01' and - (cast('[YEAR]-[MONTH]-01' as date) + 60 days) + date_add(cast('[YEAR]-[MONTH]-01' as date), interval 60 day) and ws1.ws_ship_date_sk = d_date_sk and ws1.ws_ship_addr_sk = ca_address_sk and ca_state = '[STATE]' diff --git a/tpcds/query_templates/query97.tpl b/tpcds/query_templates/query97.tpl index 4f38e17..c2c2583 100644 --- a/tpcds/query_templates/query97.tpl +++ b/tpcds/query_templates/query97.tpl @@ -57,6 +57,12 @@ group by cs_bill_customer_sk [_LIMITA] select [_LIMITB] sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog -from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk +from ssci left join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) + UNION +[_LIMITA] select [_LIMITB] sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci right join csci on (ssci.customer_sk=csci.customer_sk and ssci.item_sk = csci.item_sk) [_LIMITC]; diff --git a/tpcds/query_templates/query98.tpl b/tpcds/query_templates/query98.tpl index 08cfa2c..3320442 100644 --- a/tpcds/query_templates/query98.tpl +++ b/tpcds/query_templates/query98.tpl @@ -54,7 +54,7 @@ where and i_category in ('[CATEGORY.1]', '[CATEGORY.2]', '[CATEGORY.3]') and ss_sold_date_sk = d_date_sk and d_date between cast('[SDATE]' as date) - and (cast('[SDATE]' as date) + 30 days) + and date_add(cast('[SDATE]' as date), interval 30 day) group by i_item_id ,i_item_desc