diff --git a/Dockerfile b/Dockerfile index fe45f7e6c91f0..5676140c7b2ad 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1639,7 +1639,8 @@ COPY <<"EOF" /clean-logs.sh set -euo pipefail readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}" -readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}" +readonly RETENTION_DAYS="${AIRFLOW__LOG_RETENTION_DAYS:-15}" +readonly RETENTION_MINUTES="${AIRFLOW__LOG_RETENTION_MINUTES:-}" readonly FREQUENCY="${AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES:-15}" trap "exit" INT TERM @@ -1649,11 +1650,19 @@ readonly EVERY=$((FREQUENCY*60)) echo "Cleaning logs every $EVERY seconds" while true; do - echo "Trimming airflow logs to ${RETENTION} days." - find "${DIRECTORY}"/logs \ - -type d -name 'lost+found' -prune -o \ - -type f -mtime +"${RETENTION}" -name '*.log' -print0 | \ - xargs -0 rm -f || true + if [[ -n "${RETENTION_MINUTES}" && "${RETENTION_MINUTES}" -ge 0 ]]; then + echo "Trimming airflow logs older than ${RETENTION_MINUTES} minutes." + find "${DIRECTORY}"/logs \ + -type d -name 'lost+found' -prune -o \ + -type f -mmin +"${RETENTION_MINUTES}" -name '*.log' -print0 | \ + xargs -0 rm -f || true + else + echo "Trimming airflow logs to ${RETENTION_DAYS} days." + find "${DIRECTORY}"/logs \ + -type d -name 'lost+found' -prune -o \ + -type f -mtime +"${RETENTION_DAYS}" -name '*.log' -print0 | \ + xargs -0 rm -f || true + fi find "${DIRECTORY}"/logs -type d -empty -delete || true diff --git a/airflow-core/newsfragments/61814.feature.rst b/airflow-core/newsfragments/61814.feature.rst new file mode 100644 index 0000000000000..337e7facd864d --- /dev/null +++ b/airflow-core/newsfragments/61814.feature.rst @@ -0,0 +1 @@ +* Add ``AIRFLOW__LOG_RETENTION_MINUTES`` and Helm chart ``logGroomerSidecar.retentionMinutes`` for minute-level log retention (previously days only). Minutes take precedence over days. diff --git a/airflow-core/newsfragments/99999.feature.rst b/airflow-core/newsfragments/99999.feature.rst new file mode 100644 index 0000000000000..337e7facd864d --- /dev/null +++ b/airflow-core/newsfragments/99999.feature.rst @@ -0,0 +1 @@ +* Add ``AIRFLOW__LOG_RETENTION_MINUTES`` and Helm chart ``logGroomerSidecar.retentionMinutes`` for minute-level log retention (previously days only). Minutes take precedence over days. diff --git a/airflow-core/tests/unit/charts/log_groomer.py b/airflow-core/tests/unit/charts/log_groomer.py index fa66aed10e935..2e162114f2565 100644 --- a/airflow-core/tests/unit/charts/log_groomer.py +++ b/airflow-core/tests/unit/charts/log_groomer.py @@ -190,6 +190,34 @@ def test_log_groomer_retention_days_overrides(self, retention_days, retention_re else: assert len(jmespath.search("spec.template.spec.containers[1].env", docs[0])) == 2 + @pytest.mark.parametrize(("retention_minutes", "retention_result"), [(None, None), (60, "60")]) + def test_log_groomer_retention_minutes_overrides(self, retention_minutes, retention_result): + if self.obj_name == "dag-processor": + values = { + "dagProcessor": { + "enabled": True, + "logGroomerSidecar": {"retentionMinutes": retention_minutes}, + } + } + else: + values = {f"{self.folder}": {"logGroomerSidecar": {"retentionMinutes": retention_minutes}}} + + docs = render_chart( + values=values, + show_only=[f"templates/{self.folder}/{self.obj_name}-deployment.yaml"], + ) + + if retention_result: + assert ( + jmespath.search( + "spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_RETENTION_MINUTES'].value | [0]", + docs[0], + ) + == retention_result + ) + else: + assert len(jmespath.search("spec.template.spec.containers[1].env", docs[0])) == 2 + @pytest.mark.parametrize(("frequency_minutes", "frequency_result"), [(None, None), (20, "20")]) def test_log_groomer_frequency_minutes_overrides(self, frequency_minutes, frequency_result): if self.obj_name == "dag-processor": diff --git a/chart/templates/dag-processor/dag-processor-deployment.yaml b/chart/templates/dag-processor/dag-processor-deployment.yaml index d2a8254397fa3..da08452cc0d73 100644 --- a/chart/templates/dag-processor/dag-processor-deployment.yaml +++ b/chart/templates/dag-processor/dag-processor-deployment.yaml @@ -211,9 +211,14 @@ spec: args: {{- tpl (toYaml .Values.dagProcessor.logGroomerSidecar.args) . | nindent 12 }} {{- end }} env: - {{- if .Values.dagProcessor.logGroomerSidecar.retentionDays }} + {{- if or .Values.dagProcessor.logGroomerSidecar.retentionDays .Values.dagProcessor.logGroomerSidecar.retentionMinutes }} + {{- if .Values.dagProcessor.logGroomerSidecar.retentionMinutes }} + - name: AIRFLOW__LOG_RETENTION_MINUTES + value: "{{ .Values.dagProcessor.logGroomerSidecar.retentionMinutes }}" + {{- else }} - name: AIRFLOW__LOG_RETENTION_DAYS value: "{{ .Values.dagProcessor.logGroomerSidecar.retentionDays }}" + {{- end }} {{- end }} {{- if .Values.dagProcessor.logGroomerSidecar.frequencyMinutes }} - name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES diff --git a/chart/templates/scheduler/scheduler-deployment.yaml b/chart/templates/scheduler/scheduler-deployment.yaml index 8fc73b93c428a..d1e625accf124 100644 --- a/chart/templates/scheduler/scheduler-deployment.yaml +++ b/chart/templates/scheduler/scheduler-deployment.yaml @@ -286,9 +286,14 @@ spec: args: {{- tpl (toYaml .Values.scheduler.logGroomerSidecar.args) . | nindent 12 }} {{- end }} env: - {{- if .Values.scheduler.logGroomerSidecar.retentionDays }} + {{- if or .Values.scheduler.logGroomerSidecar.retentionDays .Values.scheduler.logGroomerSidecar.retentionMinutes }} + {{- if .Values.scheduler.logGroomerSidecar.retentionMinutes }} + - name: AIRFLOW__LOG_RETENTION_MINUTES + value: "{{ .Values.scheduler.logGroomerSidecar.retentionMinutes }}" + {{- else }} - name: AIRFLOW__LOG_RETENTION_DAYS value: "{{ .Values.scheduler.logGroomerSidecar.retentionDays }}" + {{- end }} {{- end }} {{- if .Values.scheduler.logGroomerSidecar.frequencyMinutes }} - name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES diff --git a/chart/templates/triggerer/triggerer-deployment.yaml b/chart/templates/triggerer/triggerer-deployment.yaml index b3287945058d9..cb5ed32b634c9 100644 --- a/chart/templates/triggerer/triggerer-deployment.yaml +++ b/chart/templates/triggerer/triggerer-deployment.yaml @@ -246,9 +246,14 @@ spec: args: {{- tpl (toYaml .Values.triggerer.logGroomerSidecar.args) . | nindent 12 }} {{- end }} env: - {{- if .Values.triggerer.logGroomerSidecar.retentionDays }} + {{- if or .Values.triggerer.logGroomerSidecar.retentionDays .Values.triggerer.logGroomerSidecar.retentionMinutes }} + {{- if .Values.triggerer.logGroomerSidecar.retentionMinutes }} + - name: AIRFLOW__LOG_RETENTION_MINUTES + value: "{{ .Values.triggerer.logGroomerSidecar.retentionMinutes }}" + {{- else }} - name: AIRFLOW__LOG_RETENTION_DAYS value: "{{ .Values.triggerer.logGroomerSidecar.retentionDays }}" + {{- end }} {{- end }} {{- if .Values.triggerer.logGroomerSidecar.frequencyMinutes }} - name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES diff --git a/chart/templates/workers/worker-deployment.yaml b/chart/templates/workers/worker-deployment.yaml index 0b628f24cc5d0..6c09e786ec5a9 100644 --- a/chart/templates/workers/worker-deployment.yaml +++ b/chart/templates/workers/worker-deployment.yaml @@ -357,9 +357,14 @@ spec: args: {{ tpl (toYaml .Values.workers.logGroomerSidecar.args) . | nindent 12 }} {{- end }} env: - {{- if .Values.workers.logGroomerSidecar.retentionDays }} + {{- if or .Values.workers.logGroomerSidecar.retentionDays .Values.workers.logGroomerSidecar.retentionMinutes }} + {{- if .Values.workers.logGroomerSidecar.retentionMinutes }} + - name: AIRFLOW__LOG_RETENTION_MINUTES + value: "{{ .Values.workers.logGroomerSidecar.retentionMinutes }}" + {{- else }} - name: AIRFLOW__LOG_RETENTION_DAYS value: "{{ .Values.workers.logGroomerSidecar.retentionDays }}" + {{- end }} {{- end }} {{- if .Values.workers.logGroomerSidecar.frequencyMinutes }} - name: AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES diff --git a/chart/values.schema.json b/chart/values.schema.json index 25fe563f24b2d..ca31ad8583cff 100644 --- a/chart/values.schema.json +++ b/chart/values.schema.json @@ -13727,6 +13727,11 @@ "type": "integer", "default": 15 }, + "retentionMinutes": { + "description": "Number of minutes to retain the logs when running the Airflow log groomer sidecar. Overrides retentionDays if set.", + "type": "integer", + "default": 0 + }, "frequencyMinutes": { "description": "Number of minutes between attempts to groom the Airflow logs in log groomer sidecar.", "type": "integer", diff --git a/chart/values.yaml b/chart/values.yaml index 350d352f9b44f..ddca2b695924b 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -981,6 +981,9 @@ workers: # Number of days to retain logs retentionDays: 15 + # Number of minutes to retain logs. Overrides retentionDays if set. + retentionMinutes: 0 + # Frequency to attempt to groom logs (in minutes) frequencyMinutes: 15 diff --git a/helm-tests/tests/chart_utils/log_groomer.py b/helm-tests/tests/chart_utils/log_groomer.py index a5aede282af6b..32d201d1dbb17 100644 --- a/helm-tests/tests/chart_utils/log_groomer.py +++ b/helm-tests/tests/chart_utils/log_groomer.py @@ -191,6 +191,34 @@ def test_log_groomer_retention_days_overrides(self, retention_days, retention_re else: assert len(jmespath.search("spec.template.spec.containers[1].env", docs[0])) == 2 + @pytest.mark.parametrize(("retention_minutes", "retention_result"), [(None, None), (60, "60")]) + def test_log_groomer_retention_minutes_overrides(self, retention_minutes, retention_result): + if self.obj_name == "dag-processor": + values = { + "dagProcessor": { + "enabled": True, + "logGroomerSidecar": {"retentionMinutes": retention_minutes}, + } + } + else: + values = {f"{self.folder}": {"logGroomerSidecar": {"retentionMinutes": retention_minutes}}} + + docs = render_chart( + values=values, + show_only=[f"templates/{self.folder}/{self.obj_name}-deployment.yaml"], + ) + + if retention_result: + assert ( + jmespath.search( + "spec.template.spec.containers[1].env[?name=='AIRFLOW__LOG_RETENTION_MINUTES'].value | [0]", + docs[0], + ) + == retention_result + ) + else: + assert len(jmespath.search("spec.template.spec.containers[1].env", docs[0])) == 2 + @pytest.mark.parametrize(("frequency_minutes", "frequency_result"), [(None, None), (20, "20")]) def test_log_groomer_frequency_minutes_overrides(self, frequency_minutes, frequency_result): if self.obj_name == "dag-processor": diff --git a/scripts/docker/clean-logs.sh b/scripts/docker/clean-logs.sh index 063b0a985b685..6fd6f7a21341b 100755 --- a/scripts/docker/clean-logs.sh +++ b/scripts/docker/clean-logs.sh @@ -20,7 +20,10 @@ set -euo pipefail readonly DIRECTORY="${AIRFLOW_HOME:-/usr/local/airflow}" -readonly RETENTION="${AIRFLOW__LOG_RETENTION_DAYS:-15}" +# Allow retention to be specified in either days (legacy) or minutes. +# Minutes take precedence when set to a positive integer. +readonly RETENTION_DAYS="${AIRFLOW__LOG_RETENTION_DAYS:-15}" +readonly RETENTION_MINUTES="${AIRFLOW__LOG_RETENTION_MINUTES:-}" readonly FREQUENCY="${AIRFLOW__LOG_CLEANUP_FREQUENCY_MINUTES:-15}" trap "exit" INT TERM @@ -30,11 +33,19 @@ readonly EVERY=$((FREQUENCY*60)) echo "Cleaning logs every $EVERY seconds" while true; do - echo "Trimming airflow logs to ${RETENTION} days." - find "${DIRECTORY}"/logs \ - -type d -name 'lost+found' -prune -o \ - -type f -mtime +"${RETENTION}" -name '*.log' -print0 | \ - xargs -0 rm -f || true + if [[ -n "${RETENTION_MINUTES}" && "${RETENTION_MINUTES}" -ge 0 ]]; then + echo "Trimming airflow logs older than ${RETENTION_MINUTES} minutes." + find "${DIRECTORY}"/logs \ + -type d -name 'lost+found' -prune -o \ + -type f -mmin +"${RETENTION_MINUTES}" -name '*.log' -print0 | \ + xargs -0 rm -f || true + else + echo "Trimming airflow logs to ${RETENTION_DAYS} days." + find "${DIRECTORY}"/logs \ + -type d -name 'lost+found' -prune -o \ + -type f -mtime +"${RETENTION_DAYS}" -name '*.log' -print0 | \ + xargs -0 rm -f || true + fi find "${DIRECTORY}"/logs -type d -empty -delete || true