Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion tpcds/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ This tpcds-kit is ported from [gregrahn/tpcds-kit](https://github.com/gregrahn/t
* https://github.com/gregrahn/tpcds-kit/issues/31
* https://github.com/gregrahn/tpcds-kit/issues/33

* Adjust some query templates to TiDB supported MySQL dialet, adjustments are ported from [FdeFabricio/tpcds-mysql](https://github.com/FdeFabricio/tpcds-mysql), thanks to [FdeFabricio](https://github.com/FdeFabricio/)

## 1. Setup

### 1.1 install required development tools
Expand Down Expand Up @@ -56,7 +58,7 @@ done

Query generation is done via `dsqgen` with query templetes, here we use a pre-written shell script file [genquery.sh](./genquery.sh), after running this script, queries are located in directory "queries":
```sh
./genquery.sh
./genquery.sh 1
```

All supported TPC-DS queries for TiDB are generated in `tools/queries`
Expand Down
24 changes: 20 additions & 4 deletions tpcds/genquery.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
set -eu

SCALE="1"
SCALE=$1
TEMPLATE_DIR="../query_templates"
OUTPUT_DIR="queries"
QUERY_ID=""
Expand All @@ -18,8 +18,23 @@ function generate_query()
mv "$OUTPUT_DIR/query_0.sql" "$OUTPUT_DIR/query_$QUERY_ID.sql"
}

#unsupported="87 17 38 8 18 22 27 21 16 32 37 40 82 92 94 12 20 36 49 44 53 63 67 70 86 89 98 1 2 4 5 11 14 23 24 30 31 33 39 47 51 54 56 57 58 59 60 64 74 75 77 78 80 81 83 95 97 13 6"
unsupported="87 17 38 8 18 22 27 21 16 32 37 40 82 92 94 12 20 36 49 44 53 63 67 70 86 89 98 1 2 4 5 11 14 23 24 30 31 33 39 47 51 54 56 57 58 59 60 64 74 75 77 78 80 81 83 95 97"
function split_sql()
{
cd $OUTPUT_DIR

count=`grep ';' query_$QUERY_ID.sql | wc -l`
if [[ $count -eq 1 ]]; then
cd -
return
fi

csplit --quiet --prefix=query_$QUERY_ID -k --suppress-matched --suffix-format="_%d.sql" query_$QUERY_ID.sql "/;/+1"
rm query_$QUERY_ID.sql
cd -
}

unsupported="14 18 22 27 36 5 67 70 77 80 86" # rollup function
unsupported=$unsupported" 2 11 23 39 4 59" # tidb bug

cd tools
rm -rf $OUTPUT_DIR
Expand All @@ -37,6 +52,7 @@ for i in {1..99}; do
fi
QUERY_ID="$i"
generate_query
split_sql
done
mv $OUTPUT_DIR ..
cd -
cd ..
2 changes: 1 addition & 1 deletion tpcds/query_templates/query12.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ where
and i_category in ('[CATEGORY.1]', '[CATEGORY.2]', '[CATEGORY.3]')
and ws_sold_date_sk = d_date_sk
and d_date between cast('[SDATE]' as date)
and (cast('[SDATE]' as date) + 30 days)
and date_add(cast('[SDATE]' as date), interval 30 day)
group by
i_item_id
,i_item_desc
Expand Down
2 changes: 1 addition & 1 deletion tpcds/query_templates/query16.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ from
,call_center
where
d_date between '[YEAR]-[MONTH]-01' and
(cast('[YEAR]-[MONTH]-01' as date) + 60 days)
date_add(cast('[YEAR]-[MONTH]-01' as date), interval 60 day)
and cs1.cs_ship_date_sk = d_date_sk
and cs1.cs_ship_addr_sk = ca_address_sk
and ca_state = '[STATE]'
Expand Down
4 changes: 2 additions & 2 deletions tpcds/query_templates/query2.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@
with wscs as
(select sold_date_sk
,sales_price
from select ws_sold_date_sk sold_date_sk
from (select ws_sold_date_sk sold_date_sk
,ws_ext_sales_price sales_price
from web_sales
union all
select cs_sold_date_sk sold_date_sk
,cs_ext_sales_price sales_price
from catalog_sales),
from catalog_sales)),
wswscs as
(select d_week_seq,
sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales,
Expand Down
2 changes: 1 addition & 1 deletion tpcds/query_templates/query20.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
and i_category in ('[CATEGORY.1]', '[CATEGORY.2]', '[CATEGORY.3]')
and cs_sold_date_sk = d_date_sk
and d_date between cast('[SDATE]' as date)
and (cast('[SDATE]' as date) + 30 days)
and date_add(cast('[SDATE]' as date), interval 30 day)
group by i_item_id
,i_item_desc
,i_category
Expand Down
8 changes: 4 additions & 4 deletions tpcds/query_templates/query21.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@
[_LIMITA] select [_LIMITB] *
from(select w_warehouse_name
,i_item_id
,sum(case when (cast(d_date as date) < cast ('[SALES_DATE]' as date))
,sum(case when (cast(d_date as date) < cast('[SALES_DATE]' as date))
then inv_quantity_on_hand
else 0 end) as inv_before
,sum(case when (cast(d_date as date) >= cast ('[SALES_DATE]' as date))
,sum(case when (cast(d_date as date) >= cast('[SALES_DATE]' as date))
then inv_quantity_on_hand
else 0 end) as inv_after
from inventory
Expand All @@ -53,8 +53,8 @@
and i_item_sk = inv_item_sk
and inv_warehouse_sk = w_warehouse_sk
and inv_date_sk = d_date_sk
and d_date between (cast ('[SALES_DATE]' as date) - 30 days)
and (cast ('[SALES_DATE]' as date) + 30 days)
and d_date between date_sub(cast('[SALES_DATE]' as date), interval 30 day)
and date_add(cast('[SALES_DATE]' as date), interval 30 day)
group by w_warehouse_name, i_item_id) x
where (case when inv_before > 0
then inv_after / inv_before
Expand Down
8 changes: 4 additions & 4 deletions tpcds/query_templates/query23.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
where ss_customer_sk = c_customer_sk
and ss_sold_date_sk = d_date_sk
and d_year in ([YEAR],[YEAR]+1,[YEAR]+2,[YEAR]+3)
group by c_customer_sk)),
group by c_customer_sk) temp1),
best_ss_customer as
(select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
from store_sales
Expand Down Expand Up @@ -84,7 +84,7 @@ from
and d_moy = [MONTH]
and ws_sold_date_sk = d_date_sk
and ws_item_sk in (select item_sk from frequent_ss_items)
and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))
and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) temp2
[_LIMITC];

with frequent_ss_items as
Expand All @@ -106,7 +106,7 @@ from
where ss_customer_sk = c_customer_sk
and ss_sold_date_sk = d_date_sk
and d_year in ([YEAR],[YEAR]+1,[YEAR]+2,[YEAR]+3)
group by c_customer_sk)),
group by c_customer_sk) temp3),
best_ss_customer as
(select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
from store_sales
Expand Down Expand Up @@ -139,6 +139,6 @@ from
and ws_item_sk in (select item_sk from frequent_ss_items)
and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)
and ws_bill_customer_sk = c_customer_sk
group by c_last_name,c_first_name)
group by c_last_name,c_first_name) temp4
order by c_last_name,c_first_name,sales
[_LIMITC];
4 changes: 2 additions & 2 deletions tpcds/query_templates/query32.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ where
i_manufact_id = [IMID]
and i_item_sk = cs_item_sk
and d_date between '[CSDATE]' and
(cast('[CSDATE]' as date) + 90 days)
date_add(cast('[CSDATE]' as date), interval 90 day)
and d_date_sk = cs_sold_date_sk
and cs_ext_discount_amt
> (
Expand All @@ -58,7 +58,7 @@ and cs_ext_discount_amt
where
cs_item_sk = i_item_sk
and d_date between '[CSDATE]' and
(cast('[CSDATE]' as date) + 90 days)
date_add(cast('[CSDATE]' as date), interval 90 day)
and d_date_sk = cs_sold_date_sk
)
[_LIMITC];
Expand Down
2 changes: 1 addition & 1 deletion tpcds/query_templates/query37.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
where i_current_price between [PRICE] and [PRICE] + 30
and inv_item_sk = i_item_sk
and d_date_sk=inv_date_sk
and d_date between cast('[INVDATE]' as date) and (cast('[INVDATE]' as date) + 60 days)
and d_date between cast('[INVDATE]' as date) and date_add(cast('[INVDATE]' as date), interval 60 day)
and i_manufact_id in ([MANUFACT_ID.1],[MANUFACT_ID.2],[MANUFACT_ID.3],[MANUFACT_ID.4])
and inv_quantity_on_hand between 100 and 500
and cs_item_sk = i_item_sk
Expand Down
8 changes: 4 additions & 4 deletions tpcds/query_templates/query40.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@
[_LIMITA] select [_LIMITB]
w_state
,i_item_id
,sum(case when (cast(d_date as date) < cast ('[SALES_DATE]' as date))
,sum(case when (cast(d_date as date) < cast('[SALES_DATE]' as date))
then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before
,sum(case when (cast(d_date as date) >= cast ('[SALES_DATE]' as date))
,sum(case when (cast(d_date as date) >= cast('[SALES_DATE]' as date))
then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after
from
catalog_sales left outer join catalog_returns on
Expand All @@ -55,8 +55,8 @@
and i_item_sk = cs_item_sk
and cs_warehouse_sk = w_warehouse_sk
and cs_sold_date_sk = d_date_sk
and d_date between (cast ('[SALES_DATE]' as date) - 30 days)
and (cast ('[SALES_DATE]' as date) + 30 days)
and d_date between date_sub(cast('[SALES_DATE]' as date), interval 30 day)
and date_add(cast('[SALES_DATE]' as date), interval 30 day)
group by
w_state,i_item_id
order by w_state,i_item_id
Expand Down
9 changes: 8 additions & 1 deletion tpcds/query_templates/query51.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,14 @@ from (select item_sk
,case when web.d_date is not null then web.d_date else store.d_date end d_date
,web.cume_sales web_sales
,store.cume_sales store_sales
from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk
from web_v1 web Left join store_v1 store on (web.item_sk = store.item_sk
and web.d_date = store.d_date)
UNION
select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk
,case when web.d_date is not null then web.d_date else store.d_date end d_date
,web.cume_sales web_sales
,store.cume_sales store_sales
from web_v1 web Right join store_v1 store on (web.item_sk = store.item_sk
and web.d_date = store.d_date)
)x )y
where web_cumulative > store_cumulative
Expand Down
2 changes: 1 addition & 1 deletion tpcds/query_templates/query54.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
group by c_customer_sk
)
, segments as
(select cast((revenue/50) as int) as segment
(select cast((revenue/50) as signed) as segment
from my_revenue
)
[_LIMITA] select [_LIMITB] segment, count(*) as num_customers, segment*50 as segment_base
Expand Down
2 changes: 1 addition & 1 deletion tpcds/query_templates/query82.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
where i_current_price between [PRICE] and [PRICE]+30
and inv_item_sk = i_item_sk
and d_date_sk=inv_date_sk
and d_date between cast('[INVDATE]' as date) and (cast('[INVDATE]' as date) + 60 days)
and d_date between cast('[INVDATE]' as date) and date_add(cast('[INVDATE]' as date), interval 60 day)
and i_manufact_id in ([MANUFACT_ID.1],[MANUFACT_ID.2],[MANUFACT_ID.3],[MANUFACT_ID.4])
and inv_quantity_on_hand between 100 and 500
and ss_item_sk = i_item_sk
Expand Down
4 changes: 2 additions & 2 deletions tpcds/query_templates/query92.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ where
i_manufact_id = [IMID]
and i_item_sk = ws_item_sk
and d_date between '[WSDATE]' and
(cast('[WSDATE]' as date) + 90 days)
date_add(cast('[WSDATE]' as date), interval 90 day)
and d_date_sk = ws_sold_date_sk
and ws_ext_discount_amt
> (
Expand All @@ -60,7 +60,7 @@ and ws_ext_discount_amt
WHERE
ws_item_sk = i_item_sk
and d_date between '[WSDATE]' and
(cast('[WSDATE]' as date) + 90 days)
date_add(cast('[WSDATE]' as date), interval 90 day)
and d_date_sk = ws_sold_date_sk
)
order by sum(ws_ext_discount_amt)
Expand Down
2 changes: 1 addition & 1 deletion tpcds/query_templates/query94.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ from
,web_site
where
d_date between '[YEAR]-[MONTH]-01' and
(cast('[YEAR]-[MONTH]-01' as date) + 60 days)
date_add(cast('[YEAR]-[MONTH]-01' as date), interval 60 day)
and ws1.ws_ship_date_sk = d_date_sk
and ws1.ws_ship_addr_sk = ca_address_sk
and ca_state = '[STATE]'
Expand Down
2 changes: 1 addition & 1 deletion tpcds/query_templates/query95.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ from
,web_site
where
d_date between '[YEAR]-[MONTH]-01' and
(cast('[YEAR]-[MONTH]-01' as date) + 60 days)
date_add(cast('[YEAR]-[MONTH]-01' as date), interval 60 day)
and ws1.ws_ship_date_sk = d_date_sk
and ws1.ws_ship_addr_sk = ca_address_sk
and ca_state = '[STATE]'
Expand Down
8 changes: 7 additions & 1 deletion tpcds/query_templates/query97.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ group by cs_bill_customer_sk
[_LIMITA] select [_LIMITB] sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only
,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only
,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog
from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk
from ssci left join csci on (ssci.customer_sk=csci.customer_sk
and ssci.item_sk = csci.item_sk)
UNION
[_LIMITA] select [_LIMITB] sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only
,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only
,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog
from ssci right join csci on (ssci.customer_sk=csci.customer_sk
and ssci.item_sk = csci.item_sk)
[_LIMITC];
2 changes: 1 addition & 1 deletion tpcds/query_templates/query98.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ where
and i_category in ('[CATEGORY.1]', '[CATEGORY.2]', '[CATEGORY.3]')
and ss_sold_date_sk = d_date_sk
and d_date between cast('[SDATE]' as date)
and (cast('[SDATE]' as date) + 30 days)
and date_add(cast('[SDATE]' as date), interval 30 day)
group by
i_item_id
,i_item_desc
Expand Down