c2siorg · nodesagar · Mar 23, 2026
diff --git a/dataloom-backend/app/api/endpoints/projects.py b/dataloom-backend/app/api/endpoints/projects.py
@@ -93,38 +93,24 @@ async def save_project(
     commit_message: str,
     db: Session = Depends(database.get_db),
 ):
-    """Save project changes as a checkpoint.
+    """Save the current working dataset as a checkpoint.
 
-    Replays all pending transformations from the change log onto the original
-    file and creates a checkpoint record marking the save point.
+    The working copy already reflects the user's latest accepted transforms, so
+    checkpoint creation should preserve that current dataset and only update the
+    checkpoint/log metadata for pending actions.
     """
     project = get_project_or_404(project_id, db)
 
-    # Load original file for replaying transformations
     original_path = get_original_path(project.file_path)
-    df = read_csv_safe(original_path)
-
-    # Get all unapplied logs for this project
-    logs = (
-        db.query(models.ProjectChangeLog)
-        .filter(
-            models.ProjectChangeLog.project_id == project_id,
-            models.ProjectChangeLog.applied == False,  # noqa: E712
+    if project.file_path == str(original_path):
+        logger.error(
+            "Project working copy unexpectedly points at original file: id=%s path=%s",
+            project_id,
+            project.file_path,
         )
-        .order_by(models.ProjectChangeLog.timestamp)
-        .all()
-    )
-
-    # Replay each logged transformation on the original
-    for log in logs:
-        df = apply_logged_transformation(df, log.action_type, log.action_details)
-
-    # Write transformations to the working copy (.copy.csv), not the original dataset.
-    # original_path must remain immutable as the baseline used for transformation replay.
-    assert project.file_path != str(original_path), (
-        "Invariant violation: attempted to write transformed data to original_path."
-    )
-    save_csv_safe(df, project.file_path)
+        raise HTTPException(status_code=500, detail="Project working copy is misconfigured")
+
+    df = read_csv_safe(project.file_path)
 
     # Create checkpoint (marks logs as applied)
     checkpoint = create_checkpoint(db, project_id, commit_message)

diff --git a/dataloom-backend/tests/test_save_revert.py b/dataloom-backend/tests/test_save_revert.py
@@ -1,10 +1,19 @@
 """Tests for save and revert logic in the project service."""
 
+import asyncio
+import shutil
+
+import pandas as pd
+
 from app import models
+from app.api.endpoints.projects import save_project
 from app.services.project_service import (
     create_checkpoint,
+    create_project,
     log_transformation,
 )
+from app.services.transformation_service import add_column, rename_column
+from app.utils.pandas_helpers import read_csv_safe, save_csv_safe
 
 
 class TestCheckpoint:
@@ -41,3 +50,46 @@ def test_checkpoint_message(self, db):
 
         checkpoint = create_checkpoint(db, project.project_id, "My save message")
         assert checkpoint.message == "My save message"
+
+
+class TestSaveEndpointRegressions:
+    def test_second_save_preserves_previously_checkpointed_transforms(self, db, tmp_path):
+        """A later save should keep earlier checkpointed transforms intact."""
+        original_path = tmp_path / "sample.csv"
+        copy_path = tmp_path / "sample_copy.csv"
+
+        pd.DataFrame(
+            {
+                "name": ["Alice", "Bob"],
+                "age": [30, 25],
+                "city": ["New York", "Los Angeles"],
+            }
+        ).to_csv(original_path, index=False)
+        shutil.copy2(original_path, copy_path)
+
+        project = create_project(db, "Cumulative Save", str(copy_path), "Regression for repeated saves")
+
+        renamed_df = rename_column(read_csv_safe(project.file_path), 1, "years")
+        save_csv_safe(renamed_df, project.file_path)
+        log_transformation(
+            db,
+            project.project_id,
+            "renameCol",
+            {"rename_col_params": {"col_index": 1, "new_name": "years"}},
+        )
+
+        first_save = asyncio.run(save_project(project.project_id, "first checkpoint", db))
+        assert first_save["columns"] == ["name", "years", "city"]
+
+        extended_df = add_column(read_csv_safe(project.file_path), 3, "country")
+        save_csv_safe(extended_df, project.file_path)
+        log_transformation(
+            db,
+            project.project_id,
+            "addCol",
+            {"add_col_params": {"index": 3, "name": "country"}},
+        )
+
+        second_save = asyncio.run(save_project(project.project_id, "second checkpoint", db))
+        assert second_save["columns"] == ["name", "years", "city", "country"]
+        assert read_csv_safe(project.file_path).columns.tolist() == ["name", "years", "city", "country"]