Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
288 changes: 111 additions & 177 deletions .github/workflows/lint.yml

Large diffs are not rendered by default.

38 changes: 29 additions & 9 deletions dataloom-backend/app/api/endpoints/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
create_project,
delete_project,
get_recent_projects,
update_project_name,
)
from app.services.transformation_service import apply_logged_transformation
from app.utils.logging import get_logger
from app.utils.pandas_helpers import dataframe_to_response, read_csv_safe, save_csv_safe
from app.utils.pandas_helpers import csv_file_stats, dataframe_to_response, read_csv_safe, save_csv_safe
from app.utils.security import validate_upload_file

logger = get_logger(__name__)
Expand Down Expand Up @@ -76,15 +77,22 @@ async def get_project_details(project_id: uuid.UUID, db: Session = Depends(datab
def recent_projects(db: Session = Depends(database.get_db)):
"""Get the most recently modified projects."""
projects = get_recent_projects(db, limit=10)
return [
schemas.LastResponse(
project_id=p.project_id,
name=p.name,
description=p.description,
last_modified=p.last_modified,
out: list[schemas.LastResponse] = []
for p in projects:
stats = csv_file_stats(p.file_path)
out.append(
schemas.LastResponse(
project_id=p.project_id,
name=p.name,
description=p.description,
last_modified=p.last_modified,
upload_date=p.upload_date,
file_size_bytes=stats["file_size_bytes"],
row_count=stats["row_count"],
column_count=stats["column_count"],
)
)
for p in projects
]
return out


@router.post("/{project_id}/save", response_model=schemas.ProjectResponse)
Expand Down Expand Up @@ -206,6 +214,18 @@ async def revert_to_checkpoint(
}


@router.patch("/{project_id}", response_model=schemas.ProjectRenameResponse)
def rename_project(
project_id: uuid.UUID,
body: schemas.ProjectRenameRequest,
db: Session = Depends(database.get_db),
):
"""Rename a project's display name."""
project = get_project_or_404(project_id, db)
update_project_name(db, project, body.name)
return schemas.ProjectRenameResponse(project_id=project.project_id, name=project.name)


@router.get("/{project_id}/export")
async def export_project(project_id: uuid.UUID, db: Session = Depends(database.get_db)):
"""Download the current working copy of a project as a CSV file."""
Expand Down
2 changes: 1 addition & 1 deletion dataloom-backend/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ async def lifespan(app):
CORSMiddleware,
allow_origins=get_settings().cors_origins,
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE"],
allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
allow_headers=["*"],
)

Expand Down
25 changes: 25 additions & 0 deletions dataloom-backend/app/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,31 @@ class LastResponse(BaseModel):
name: str
description: str | None
last_modified: datetime.datetime
upload_date: datetime.datetime | None = None
file_size_bytes: int = 0
row_count: int = 0
column_count: int = 0

class Config:
from_attributes = True


class ProjectRenameRequest(BaseModel):
"""Request body for renaming a project."""

name: str

@field_validator("name")
@classmethod
def name_not_blank(cls, v: str) -> str:
s = v.strip()
if not s:
raise ValueError("name must not be empty")
return s


class ProjectRenameResponse(BaseModel):
"""Response after renaming a project."""

project_id: uuid.UUID
name: str
12 changes: 12 additions & 0 deletions dataloom-backend/app/services/project_service.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Database operations for projects, logs, and checkpoints."""

import uuid
from datetime import UTC, datetime

from sqlmodel import Session

Expand Down Expand Up @@ -56,6 +57,17 @@ def get_recent_projects(db: Session, limit: int = 3) -> list[models.Project]:
return db.query(models.Project).order_by(models.Project.last_modified.desc()).limit(limit).all()


def update_project_name(db: Session, project: models.Project, name: str) -> models.Project:
"""Update a project's display name and bump last_modified."""
project.name = name.strip()
project.last_modified = datetime.now(UTC).replace(tzinfo=None)
db.add(project)
db.commit()
db.refresh(project)
logger.info("Renamed project: id=%s, name=%s", project.project_id, project.name)
return project


def delete_project(db: Session, project: models.Project) -> None:
"""Delete a project record from the database.

Expand Down
17 changes: 15 additions & 2 deletions dataloom-backend/app/services/transformation_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,13 +232,26 @@ def rename_column(df: pd.DataFrame, col_index: int, new_name: str) -> pd.DataFra

Returns:
DataFrame with the column renamed.

Raises:
TransformationError: If new_name is empty or whitespace.
TransformationError: If col_index is out of range.
TransformationError: If new_name already exists in df.columns
(unless new_name equals the current column name).
"""
if col_index < 0 or col_index >= len(df.columns):
raise TransformationError(f"Column index {col_index} out of range (0-{len(df.columns) - 1})")
raise TransformationError(
f"Column index {col_index} is out of range (DataFrame has {len(df.columns)} columns)."
)
if not new_name or not new_name.strip():
raise TransformationError("New column name cannot be empty")
raise TransformationError("New column name cannot be empty or whitespace.")

old_name = df.columns[col_index]

# Check if new_name already exists and is different from current column name
if new_name in df.columns and new_name != old_name:
raise TransformationError(f"Column '{new_name}' already exists. Please choose a different name.")

return df.rename(columns={old_name: new_name})


Expand Down
23 changes: 23 additions & 0 deletions dataloom-backend/app/utils/pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,29 @@
from fastapi import HTTPException


def csv_file_stats(path: str | Path) -> dict[str, int]:
"""Return row count, column count, and file size for a CSV path.

Used for lightweight project summaries (e.g. recent projects list).
On read errors, row/column counts default to 0; file size is still returned if the path exists.

Args:
path: Path to the CSV file (typically the project working copy).

Returns:
Dict with keys ``row_count``, ``column_count``, ``file_size_bytes``.
"""
p = Path(path)
if not p.exists():
return {"row_count": 0, "column_count": 0, "file_size_bytes": 0}
size = int(p.stat().st_size)
try:
df = pd.read_csv(p)
return {"row_count": len(df), "column_count": len(df.columns), "file_size_bytes": size}
except Exception:
return {"row_count": 0, "column_count": 0, "file_size_bytes": size}


def read_csv_safe(path: Path) -> pd.DataFrame:
"""Read a CSV file safely with error handling.

Expand Down
Binary file modified dataloom-backend/test.db
Binary file not shown.
59 changes: 59 additions & 0 deletions dataloom-backend/tests/test_new_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
cast_data_type,
rename_column,
)
from app.utils.pandas_helpers import csv_file_stats


@pytest.fixture
Expand Down Expand Up @@ -311,3 +312,61 @@ def test_delete_column_invalid_params(
},
)
assert response.status_code == expected_status


class TestCsvFileStats:
def test_stats_match_csv(self, tmp_path):
p = tmp_path / "t.csv"
p.write_text("a,b\n1,2\n3,4\n", encoding="utf-8")
stats = csv_file_stats(p)
assert stats["row_count"] == 2
assert stats["column_count"] == 2
assert stats["file_size_bytes"] == p.stat().st_size

def test_missing_file_returns_zeros(self):
stats = csv_file_stats("/nonexistent/path/file.csv")
assert stats["row_count"] == 0
assert stats["column_count"] == 0
assert stats["file_size_bytes"] == 0


class TestRenameAndRecentSummary:
def test_rename_project(self, client, sample_csv, db):
with open(sample_csv, "rb") as f:
response = client.post(
"/projects/upload",
files={"file": ("test.csv", f, "text/csv")},
data={"projectName": "Rename Me", "projectDescription": "rename test"},
)
assert response.status_code == 200
project_id = response.json()["project_id"]

patch = client.patch(f"/projects/{project_id}", json={"name": "Renamed Project"})
assert patch.status_code == 200
assert patch.json()["name"] == "Renamed Project"

recent = client.get("/projects/recent")
assert recent.status_code == 200
names = [p["name"] for p in recent.json()]
assert "Renamed Project" in names

def test_recent_includes_summary_fields(self, client, sample_csv, db):
with open(sample_csv, "rb") as f:
response = client.post(
"/projects/upload",
files={"file": ("test.csv", f, "text/csv")},
data={"projectName": "Summary Test", "projectDescription": "summary"},
)
assert response.status_code == 200

recent = client.get("/projects/recent")
assert recent.status_code == 200
items = recent.json()
assert len(items) >= 1
p = next(x for x in items if x["name"] == "Summary Test")
assert "file_size_bytes" in p
assert "row_count" in p
assert "column_count" in p
assert "upload_date" in p
assert p["row_count"] >= 1
assert p["column_count"] >= 1
24 changes: 24 additions & 0 deletions dataloom-backend/tests/test_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
drop_duplicates,
fill_empty,
pivot_table,
rename_column,
)


Expand Down Expand Up @@ -185,3 +186,26 @@ def test_simple_pivot(self):
result = pivot_table(df, "city", "sales", aggfunc="sum")
assert "city" in result.columns
assert "sales" in result.columns


class TestRenameColumn:
def test_rename_column_to_existing_name(self, sample_df):
with pytest.raises(TransformationError, match="already exists"):
rename_column(sample_df, 1, "name") # Try to rename "age" to "name"

def test_rename_column_to_same_name(self, sample_df):
# Renaming to the same name should succeed (no-op)
result = rename_column(sample_df, 0, "name")
assert result.shape == sample_df.shape
pd.testing.assert_frame_equal(result, sample_df)

def test_rename_column_case_sensitive(self, sample_df):
# Renaming to different case should succeed (case-sensitive)
result = rename_column(sample_df, 1, "Age") # Rename "age" to "Age"
assert list(result.columns) == ["name", "Age", "city"]
assert result.iloc[0]["Age"] == 30

def test_rename_column_with_preexisting_duplicate(self):
df = pd.DataFrame([[1, 2, 3]], columns=["name", "name", "age"])
with pytest.raises(TransformationError, match="already exists"):
rename_column(df, 2, "name")
Loading
Loading