Skip to content

Commit f6db658

Browse files
authored
ci: add benchmark for tpch1000 (#19057)
1 parent 96c2f82 commit f6db658

File tree

4 files changed

+438
-159
lines changed

4 files changed

+438
-159
lines changed

.github/actions/benchmark_cloud/action.yml

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ inputs:
1010
dataset:
1111
description: "hits/tpch"
1212
required: true
13+
database:
14+
description: "Database name for benchmark queries"
15+
required: false
1316
source:
1417
description: "pr/release"
1518
required: true
@@ -52,7 +55,11 @@ runs:
5255
echo "database=load_test_${{ inputs.run_id }}" >> $GITHUB_OUTPUT
5356
echo "tries=1" >> $GITHUB_OUTPUT
5457
else
55-
echo "database=clickbench" >> $GITHUB_OUTPUT
58+
database="${{ inputs.database }}"
59+
if [[ -z "$database" ]]; then
60+
database="clickbench"
61+
fi
62+
echo "database=$database" >> $GITHUB_OUTPUT
5663
echo "tries=3" >> $GITHUB_OUTPUT
5764
fi
5865
@@ -65,37 +72,24 @@ runs:
6572
BENCHMARK_VERSION: ${{ inputs.version }}
6673
BENCHMARK_DATABASE: ${{ steps.prepare.outputs.database }}
6774
BENCHMARK_TRIES: ${{ steps.prepare.outputs.tries }}
75+
BENCHMARK_SOURCE: ${{ inputs.source }}
76+
BENCHMARK_SOURCE_ID: ${{ inputs.source_id }}
77+
BENCHMARK_SHA: ${{ inputs.sha }}
6878
CLOUD_USER: ${{ inputs.cloud_user }}
6979
CLOUD_PASSWORD: ${{ inputs.cloud_password }}
7080
CLOUD_GATEWAY: ${{ inputs.cloud_gateway }}
7181
CLOUD_WAREHOUSE: benchmark-${{ inputs.run_id }}
7282
shell: bash
7383
run: |
74-
./benchmark_cloud.sh
75-
76-
- name: Prepare Metadata
77-
working-directory: benchmark/clickbench
78-
shell: bash
79-
run: |
80-
case ${{ inputs.source }} in
81-
pr)
82-
yq -i '.system = "Databend(PR#${{ inputs.source_id }})"' result.json
83-
;;
84-
release)
85-
yq -i '.system = "Databend(Release@${{ inputs.source_id }})"' result.json
86-
;;
87-
*)
88-
echo "Unsupported benchmark source: ${{ inputs.source }}"
89-
exit 1
90-
esac
91-
yq -i '.comment = "commit: ${{ inputs.sha }}"' result.json
92-
mv result.json result-${{ inputs.dataset }}-cloud-${{ inputs.size }}.json
84+
python3 benchmark_cloud.py
9385
9486
- name: Upload artifact
9587
uses: actions/upload-artifact@v4
9688
with:
9789
name: benchmark-${{ inputs.dataset }}-${{ inputs.size }}
98-
path: benchmark/clickbench/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}.json
90+
path: |
91+
benchmark/clickbench/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}.json
92+
benchmark/clickbench/result-${{ inputs.dataset }}-cloud-${{ inputs.size }}-*.ndjson
9993
10094
- name: Remove warehouse
10195
if: always()

.github/workflows/reuse.benchmark.yml

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ jobs:
119119
cloud_user: ${{ secrets.BENCHMARK_CLOUD_USER }}
120120
cloud_password: ${{ secrets.BENCHMARK_CLOUD_PASSWORD }}
121121
cloud_gateway: ${{ secrets.BENCHMARK_CLOUD_GATEWAY }}
122+
database: load
122123
- name: clean
123124
if: always()
124125
continue-on-error: true
@@ -136,10 +137,11 @@ jobs:
136137
strategy:
137138
matrix:
138139
include:
139-
- { dataset: hits, size: Small }
140-
- { dataset: hits, size: Large }
141-
- { dataset: tpch, size: Small }
142-
- { dataset: tpch, size: Large }
140+
- { dataset: hits, size: Small, database: clickbench }
141+
- { dataset: hits, size: Large, database: clickbench }
142+
- { dataset: tpch, size: Small, database: tpch }
143+
- { dataset: tpch, size: Large, database: tpch }
144+
- { dataset: tpch1000, size: Large, database: tpch_1000 }
143145
fail-fast: true
144146
max-parallel: 1
145147
steps:
@@ -167,6 +169,7 @@ jobs:
167169
cloud_user: ${{ secrets.BENCHMARK_CLOUD_USER }}
168170
cloud_password: ${{ secrets.BENCHMARK_CLOUD_PASSWORD }}
169171
cloud_gateway: ${{ secrets.BENCHMARK_CLOUD_GATEWAY }}
172+
database: ${{ matrix.database }}
170173
- name: clean
171174
if: always()
172175
continue-on-error: true
@@ -233,6 +236,7 @@ jobs:
233236
- "tpch"
234237
- "hits"
235238
- "load"
239+
- "tpch1000"
236240
# - "internal"
237241
steps:
238242
- uses: actions/checkout@v4
@@ -245,6 +249,20 @@ jobs:
245249
path: benchmark/clickbench/results
246250
pattern: benchmark-${{ matrix.dataset }}-*
247251
merge-multiple: true
252+
- name: Prepare results directory
253+
working-directory: benchmark/clickbench
254+
run: |
255+
shopt -s nullglob
256+
for result in results/*.json; do
257+
dataset=$(echo $result | sed -E 's/.*result-(\w+)-.*\.json/\1/')
258+
mkdir -p results/${dataset}/
259+
mv $result results/${dataset}/$(basename $result)
260+
done
261+
for ndjson in results/*.ndjson; do
262+
dataset=$(echo $ndjson | sed -E 's/.*result-(\w+)-.*\.ndjson/\1/')
263+
mkdir -p results/${dataset}/
264+
mv $ndjson results/${dataset}/$(basename $ndjson)
265+
done
248266
- name: Generate report and upload to R2
249267
working-directory: benchmark/clickbench
250268
env:
@@ -253,12 +271,6 @@ jobs:
253271
AWS_DEFAULT_REGION: auto
254272
AWS_ENDPOINT_URL: ${{ secrets.R2_ENDPOINT_URL }}
255273
run: |
256-
for result in results/*.json; do
257-
dataset=$(echo $result | sed -E 's/.*result-(\w+)-.*\.json/\1/')
258-
mkdir -p results/${dataset}/
259-
mv $result results/${dataset}/$(basename $result)
260-
done
261-
262274
aws s3 sync s3://benchmark/clickbench/release/${{ matrix.dataset }}/$(date --date='-1 month' -u +%Y)/$(date --date='-1 month' -u +%m)/ ./results/${{ matrix.dataset }}/
263275
aws s3 sync s3://benchmark/clickbench/release/${{ matrix.dataset }}/$(date -u +%Y)/$(date -u +%m)/ ./results/${{ matrix.dataset }}/
264276
./update_results.py --dataset ${{ matrix.dataset }} --release ${{ inputs.source_id }}
@@ -271,3 +283,20 @@ jobs:
271283
done
272284
273285
aws s3 cp ./results/${{ matrix.dataset }}.html s3://benchmark/clickbench/release/${{ matrix.dataset }}.html --no-progress --checksum-algorithm=CRC32
286+
- name: Upload NDJSON archives to R2
287+
working-directory: benchmark/clickbench
288+
env:
289+
AWS_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
290+
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
291+
AWS_DEFAULT_REGION: auto
292+
AWS_ENDPOINT_URL: ${{ secrets.R2_ENDPOINT_URL }}
293+
DATASET: ${{ matrix.dataset }}
294+
run: |
295+
shopt -s nullglob
296+
MONTH=$(date -u +%Y-%m)
297+
ARCHIVE_PREFIX="s3://benchmark/results/${DATASET}/${MONTH}/"
298+
IMPORT_PREFIX="s3://benchmark/results/import/"
299+
for file in ./results/${DATASET}/*.ndjson; do
300+
aws s3 cp $file "${ARCHIVE_PREFIX}$(basename $file)" --no-progress --checksum-algorithm=CRC32
301+
aws s3 cp $file "${IMPORT_PREFIX}$(basename $file)" --no-progress --checksum-algorithm=CRC32
302+
done

0 commit comments

Comments
 (0)