diff --git a/ci/kpi/bootkit-log.sh b/ci/kpi/bootkit-log.sh index f5a93be85..f88e867a3 100755 --- a/ci/kpi/bootkit-log.sh +++ b/ci/kpi/bootkit-log.sh @@ -16,6 +16,8 @@ user_name="$3" password="$4" duration=600 +# Maximum number of log entries per Loki request (Loki default hard limit is 5000) +LOKI_BATCH_LIMIT=5000 echo "Cluster FQDN: $cluster_fqdn" echo "Project Name: $project_name" @@ -55,26 +57,85 @@ echo "Port forwarding enabled" sleep 3 EN_LOKI_URL="localhost:8087" -echo "Start uOS_bootkitLogs" -curl -s -G "http://${EN_LOKI_URL}/loki/api/v1/query_range" \ - -H 'Accept: application/json' \ - -H "X-Scope-OrgID: ${projectID}" \ - --data-urlencode "start=${start_time}" \ - --data-urlencode "end=${end_time}" \ - --data-urlencode "direction=forward" \ - --data-urlencode 'query={file_type="uOS_bootkitLogs"}' \ - | jq -r '.data.result[]?.values[]? | .[]' > uOS_bootkit.log +# fetch_loki_logs: Fetches all log entries from Loki for a given log stream, +# paginating through results if the response hits the batch size limit. +# +# Arguments: +# $1 - log_type : Loki label selector value for file_type (e.g. "uOS_bootkitLogs") +# $2 - output_file : path to the output file where logs will be written +# $3 - range_start : start timestamp in nanoseconds (epoch) +# $4 - range_end : end timestamp in nanoseconds (epoch) +fetch_loki_logs() { + local log_type="$1" + local output_file="$2" + local range_start="$3" + local range_end="$4" + + local current_start="$range_start" + local batch_num=0 + local total_entries=0 + + # Truncate/create output file + > "$output_file" + + echo "Fetching logs for log_type=${log_type} ..." + + while true; do + batch_num=$((batch_num + 1)) + echo " Fetching batch #${batch_num} (start=${current_start}, end=${range_end}, limit=${LOKI_BATCH_LIMIT})..." + + local response + response=$(curl -s -G "http://${EN_LOKI_URL}/loki/api/v1/query_range" \ + -H 'Accept: application/json' \ + -H "X-Scope-OrgID: ${projectID}" \ + --data-urlencode "start=${current_start}" \ + --data-urlencode "end=${range_end}" \ + --data-urlencode "direction=forward" \ + --data-urlencode "limit=${LOKI_BATCH_LIMIT}" \ + --data-urlencode "query={file_type=\"${log_type}\"}") + + # Extract log lines and append to output file + echo "$response" | jq -r '.data.result[]?.values[]? | .[]' >> "$output_file" + + # Count entries returned in this batch by counting timestamps across all streams + local batch_count + batch_count=$(echo "$response" | jq '[.data.result[]?.values[]?] | length') + + total_entries=$((total_entries + batch_count)) + echo " Batch #${batch_num}: received ${batch_count} entries (total so far: ${total_entries})" + + # If fewer entries than the limit were returned, we have fetched all available data + if [ "$batch_count" -lt "$LOKI_BATCH_LIMIT" ]; then + echo " All entries fetched for log_type=${log_type} (total: ${total_entries})" + break + fi + + # Advance start_time to last entry timestamp + 1 nanosecond to avoid re-fetching + local last_ts + last_ts=$(echo "$response" | jq -r '[.data.result[]?.values[]?] | last | .[0]') + + if [ -z "$last_ts" ] || [ "$last_ts" = "null" ]; then + echo " WARNING: Could not determine last timestamp in batch #${batch_num}, stopping pagination." + break + fi + + current_start=$((last_ts + 1)) + + # Safety check: if the new start exceeds or equals end_time, stop + if [ "$current_start" -ge "$range_end" ]; then + echo " Reached end of time range, stopping pagination." + break + fi + done +} + +echo "Start uOS_bootkitLogs" +fetch_loki_logs "uOS_bootkitLogs" "uOS_bootkit.log" "$start_time" "$end_time" cat uOS_bootkit.log || true + echo "Start uOS_caddyLogs" -curl -s -G "http://${EN_LOKI_URL}/loki/api/v1/query_range" \ - -H 'Accept: application/json' \ - -H "X-Scope-OrgID: ${projectID}" \ - --data-urlencode "start=${start_time}" \ - --data-urlencode "end=${end_time}" \ - --data-urlencode "direction=forward" \ - --data-urlencode 'query={file_type="uOS_caddyLogs"}' \ - | jq -r '.data.result[]?.values[]? | .[]' > uOS_caddy.log +fetch_loki_logs "uOS_caddyLogs" "uOS_caddy.log" "$start_time" "$end_time" cat uOS_caddy.log || true kill $(lsof -t -i :8087) 2>/dev/null || true