Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 16 additions & 15 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
required-dependencies: ["minimum", "latest"]

name: py ${{ matrix.python-version }} with ${{ matrix.required-dependencies }} required deps
Expand All @@ -36,39 +36,40 @@ jobs:
- name: Maybe uninstall optional dependencies
# We uninstall pyarrow and vegafusion for one job to test that we have not
# accidentally introduced a hard dependency on these libraries.
# Uninstalling for Python 3.9 is an arbitrary choice.
# Uninstalling for Python 3.10 is an arbitrary choice.
# Also see https://github.com/vega/altair/pull/3114
if: ${{ matrix.python-version == '3.9' }}
if: ${{ matrix.python-version == '3.10' }}
run: |
uv pip uninstall pyarrow vegafusion vl-convert-python anywidget
- name: Maybe install lowest supported pandas version
# We install the lowest supported pandas version for one job to test that
# it still works. Downgrade to the oldest versions of pandas and numpy that include
# Python 3.9 wheels, so only run this job for Python 3.9
if: ${{ matrix.python-version == '3.9' }}
# Python 3.10 wheels, so only run this job for Python 3.10
if: ${{ matrix.python-version == '3.10' }}
run: |
uv pip install pandas==1.1.3 numpy==1.19.3
uv pip install pandas==1.3.4 numpy==1.21.2
- name: Test that schema generation has no effect
run: |
uv pip install vl-convert-python
python tools/generate_schema_wrapper.py
# This gets the paths of all files which were either deleted, modified
# or are not yet tracked by Git
files=`git ls-files --deleted --modified --others --exclude-standard`
files=$(git ls-files --deleted --modified --others --exclude-standard)
# Exclude dataset metadata that is regenerated by the script; parquet output
# can differ across platforms/Polars versions (binary non-determinism).
exclude_pattern='altair/datasets/_metadata/metadata\.parquet'
files_filtered=$(echo "$files" | grep -v -E "^${exclude_pattern}$" || true)
# Depending on the shell it can happen that 'files' contains empty
# lines which are filtered out in the for loop below
files_cleaned=()
for i in "${files[@]}"; do
while IFS= read -r i; do
# Skip empty items
if [ -z "$i" ]; then
continue
fi
# Add the rest of the elements to a new array
files_cleaned+=("${i}")
done
[ -z "$i" ] && continue
files_cleaned+=("$i")
done <<< "$files_filtered"
if [ ${#files_cleaned[@]} -gt 0 ]; then
echo "The code generation modified the following files:"
echo $files
printf '%s\n' "${files_cleaned[@]}"
git diff
exit 1
fi
Expand Down
2 changes: 1 addition & 1 deletion altair/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def to_list_if_array(val):
# We can probably remove this part once we require pandas >= 1.0
col = df[col_name].astype(object)
df[col_name] = col.where(col.notnull(), None)
elif dtype_name == "string":
elif dtype_name in ("string", "str"):
# dedicated string datatype (since 1.0)
# https://pandas.pydata.org/pandas-docs/version/1.0.0/whatsnew/v1.0.0.html#dedicated-string-data-type
col = df[col_name].astype(object)
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ dev = [
"types-jsonschema",
"types-setuptools",
"geopandas>=0.14.3; python_version<\"3.14\"",
"polars>=0.20.3",
# Pin <1.38 until string_view/Utf8View breakage fixed (narwhals#3450, polars#26435)
"polars>=0.20.3,<1.38",
"taskipy>=1.14.1",
"tomli>=2.2.1",
]
Expand Down
5 changes: 2 additions & 3 deletions tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,18 +721,17 @@ def test_pandas_date_parse(
if url.endswith(".json")
else {"parse_dates": date_columns}
)
kwds_empty: dict[str, Any] = {k: [] for k in kwds}
df_schema_derived: pd.DataFrame = load(name)
nw_schema = nw.from_native(df_schema_derived).schema
df_manually_specified: pd.DataFrame = load(name, **kwds)
df_dates_empty: pd.DataFrame = load(name, **kwds_empty)

assert set(date_columns).issubset(nw_schema)
for column in date_columns:
assert nw_schema[column] in {nw.Date, nw.Datetime}

assert nw_schema == nw.from_native(df_manually_specified).schema
assert nw_schema != nw.from_native(df_dates_empty).schema
# We do not assert that loading with parse_dates=[]/convert_dates=[] yields a
# different schema: backends may still infer date columns from the file.

# NOTE: Checking `polars` infers the same[1] as what `pandas` needs a hint for
# [1] Doesn't need to be exact, just recognize as *some kind* of date/datetime
Expand Down
9 changes: 6 additions & 3 deletions tests/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ def test_sanitize_dataframe():
# Re-order the columns to match df
df2 = df2[df.columns]

# pandas doesn't properly recognize np.array(np.nan); use float64 so df matches read_json
df.iloc[0, df.columns.get_loc("o")] = np.nan
df["o"] = df["o"].astype(np.float64)

# Re-apply original types
for col in df:
if str(df[col].dtype).startswith("datetime"):
Expand All @@ -80,8 +84,6 @@ def test_sanitize_dataframe():
else:
df2[col] = df2[col].astype(df[col].dtype)

# pandas doesn't properly recognize np.array(np.nan), so change it here
df.iloc[0, df.columns.get_loc("o")] = np.nan
assert df.equals(df2)


Expand Down Expand Up @@ -263,7 +265,8 @@ def test_sanitize_string_dtype():
)

df_clean = sanitize_pandas_dataframe(df)
assert {col.dtype.name for _, col in df_clean.items()} == {"object"}
# pandas 3+ with pyarrow may leave .dtype.name as "str" in some cases
assert {col.dtype.name for _, col in df_clean.items()} <= {"object", "str"}

result_python = {col_name: list(col) for col_name, col in df_clean.items()}
assert result_python == {
Expand Down
8 changes: 6 additions & 2 deletions tests/vegalite/v6/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,15 @@ def Chart(data):
assert dct["data"] == {"name": "Foo"}


@pytest.mark.filterwarnings("ignore:'Y' is deprecated.*:FutureWarning")
def test_chart_infer_types():
try:
x_dates = pd.date_range("2012", periods=10, freq="YE")
except (ValueError, TypeError):
# Older pandas may not recognize "YE"; use "Y" (year-end) instead
x_dates = pd.date_range("2012", periods=10, freq="Y")
data = pd.DataFrame(
{
"x": pd.date_range("2012", periods=10, freq="Y"),
"x": x_dates,
"y": range(10),
"c": list("abcabcabca"),
"s": pd.Categorical([1, 2] * 5, categories=[2, 1], ordered=True),
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.