diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml new file mode 100644 index 0000000000..88880a5306 --- /dev/null +++ b/.github/workflows/cancel.yml @@ -0,0 +1,13 @@ +name: Cancel Workflows on Push +on: + workflow_run: + workflows: ["Install and test"] + types: + - requested +jobs: + cancel: + runs-on: ubuntu-latest + steps: + - uses: styfle/cancel-workflow-action@0.9.1 + with: + workflow_id: ${{ github.event.workflow.id }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000000..10740a5bfb --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,182 @@ +name: Install and test + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + code-quality: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + - id: file_changes + uses: trilom/file-changes-action@v1.2.4 + with: + output: " " + - name: List changed files + run: echo '${{ steps.file_changes.outputs.files}}' + - uses: pre-commit/action@v3.0.0 + with: + extra_args: --files ${{ steps.file_changes.outputs.files}} + - name: Check for missing init files + run: build_tools/fail_on_missing_init_files.sh + shell: bash + + run-notebook-examples: + needs: code-quality + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install .[all_extras,binder,dev,mlflow] + - name: Run example notebooks + run: build_tools/run_examples.sh + shell: bash + + test-nosoftdeps: + needs: code-quality + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Display Python version + run: python -c "import sys; print(sys.version)" + + - name: Install sktime and dependencies + run: | + python -m pip install .[dev] + + - name: Show dependencies + run: python -m pip list + + - name: Run tests + run: make PYTESTOPTIONS="--cov --cov-report=xml --timeout=600" test_softdeps + + test-nosoftdeps-full: + needs: test-nosoftdeps + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Display Python version + run: python -c "import sys; print(sys.version)" + + - name: Install sktime and dependencies + run: | + python -m pip install .[dev] + + - name: Show dependencies + run: python -m pip list + + - name: Run tests + run: make PYTESTOPTIONS="--cov --cov-report=xml --timeout=600" test_softdeps_full + + test-mlflow: + needs: test-nosoftdeps + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Display Python version + run: python -c "import sys; print(sys.version)" + + - name: Install sktime and dependencies + run: | + python -m pip install .[all_extras,dev,mlflow_tests] --no-cache-dir + + - name: Show dependencies + run: python -m pip list + + - name: Run tests + run: make PYTESTOPTIONS="--cov --cov-report=xml --timeout=600" test_mlflow + + test-windows: + needs: test-nosoftdeps + runs-on: windows-2019 + strategy: + matrix: + python-version: ['3.7', '3.8', '3.9', '3.10'] + + steps: + - uses: actions/checkout@v3 + + - uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: ${{ matrix.python-version }} + channels: anaconda, conda-forge, + + - run: conda --version + - run: which python + + - name: Fix windows paths + if: ${{ runner.os == 'Windows' }} + run: echo "C:\Program Files (x86)\Windows Kits\10\bin\10.0.18362.0\x64" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + + - name: Install conda libpython + run: conda install -c anaconda libpython + + - name: Install sktime and dependencies + run: python -m pip install .[all_extras,dev] + + - name: Show dependencies + run: python -m pip list + + - name: Run tests + run: | + mkdir -p testdir/ + cp .coveragerc testdir/ + cp setup.cfg testdir/ + python -m pytest + + - name: Publish code coverage + uses: codecov/codecov-action@v3 + + test-unix: + needs: test-nosoftdeps + strategy: + matrix: + python-version: ['3.7', '3.8', '3.9', '3.10'] + os: [ubuntu-20.04, macOS-11] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Display Python version + run: python -c "import sys; print(sys.version)" + + - name: Install sktime and dependencies + run: | + python -m pip install .[all_extras,dev] --no-cache-dir + + - name: Show dependencies + run: python -m pip list + + - name: Run tests + run: make test + + - name: Publish code coverage + uses: codecov/codecov-action@v3 diff --git a/.github/workflows/update_contributors.yml b/.github/workflows/update_contributors.yml new file mode 100644 index 0000000000..629df68e1d --- /dev/null +++ b/.github/workflows/update_contributors.yml @@ -0,0 +1,33 @@ +name: Update Contributors + +on: + push: + branches: + - main + paths: + - '.all-contributorsrc' + +jobs: + generate-markdown-and-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Node + uses: actions/setup-node@v3 + with: + node-version: 18 + - name: Set up tool + run: npm install -g all-contributors-cli@6.24.0 + - name: Generate file + id: generate + run: npx all-contributors generate + - name: commit-and-push + id: candp + uses: stefanzweifel/git-auto-commit-action@v4 + with: + commit_message: '[AUTOMATED] update CONTRIBUTORS.md' + file_pattern: 'CONTRIBUTORS.md' + commit_user_name: github-actions[bot] + - name: Echo Results + if: steps.candp.outputs.changes_detected == 'true' + run: echo "changes detected and committed." diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml new file mode 100644 index 0000000000..91bc0ce21b --- /dev/null +++ b/.github/workflows/wheels.yml @@ -0,0 +1,151 @@ +name: Build wheels and publish to PyPI + +on: + release: + types: [published] + +jobs: + build_wheels: + name: Build wheels + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Build wheel + run: | + python -m pip install build + python -m build --wheel --sdist --outdir wheelhouse + + - name: Store wheels + uses: actions/upload-artifact@v2 + with: + name: wheels + path: wheelhouse/* + + test_unix_wheels: + needs: build_wheels + name: Test wheels on ${{ matrix.os }} with ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-20.04, macOS-11] + python-version: ['3.7', '3.8', '3.9', '3.10'] + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions/download-artifact@v2 + with: + name: wheels + path: wheelhouse + + - name: Get wheel filename + run: echo "WHEELNAME=$(ls ./wheelhouse/sktime-*none-any.whl)" >> $GITHUB_ENV + + - name: Install wheel and extras + run: python -m pip install "${{ env.WHEELNAME }}[all_extras,dev]" + + - name: Run tests + run: make test + + test_windows_wheels: + needs: build_wheels + name: Test wheels on ${{ matrix.os }} with ${{ matrix.python-version }} + runs-on: windows-2019 + strategy: + matrix: + include: + # Window 64 bit + - os: windows-2019 + python: 37 + python-version: '3.7' + bitness: 64 + platform_id: win_amd64 + - os: windows-2019 + python: 38 + python-version: '3.8' + bitness: 64 + platform_id: win_amd64 + - os: windows-2019 + python: 39 + python-version: '3.9' + bitness: 64 + platform_id: win_amd64 + - os: windows-2019 + python: 310 + python-version: '3.10' + bitness: 64 + platform_id: win_amd64 + - os: windows-2019 + python: 39 + python-version: 3.9 + bitness: 64 + platform_id: win_amd64 + + steps: + - uses: actions/checkout@v3 + - uses: conda-incubator/setup-miniconda@v2 + with: + activate-environment: test + auto-update-conda: true + python-version: ${{ matrix.python-version }} + channels: anaconda, conda-forge, + + - run: conda --version + - run: which python + + - uses: actions/download-artifact@v2 + with: + name: wheels + path: wheelhouse + + - name: Install conda libpython + run: conda install -c anaconda -n test -y libpython + + - name: Display downloaded artifacts + run: ls -l wheelhouse + + - name: Get wheel filename + run: echo "WHEELNAME=$(ls ./wheelhouse/sktime-*none-any.whl)" >> $env:GITHUB_ENV + + - name: Activate conda env + run: conda activate test + + - name: Install wheel and extras + run: python -m pip install "${env:WHEELNAME}[all_extras,dev]" + + - name: Show conda packages + run: conda list -n test + + - name: Run tests + run: | + mkdir -p testdir/ + cp .coveragerc testdir/ + cp setup.cfg testdir/ + python -m pytest + + upload_wheels: + name: Upload wheels to PyPI + runs-on: ubuntu-latest + needs: [build_wheels,test_unix_wheels,test_windows_wheels] + + steps: + - uses: actions/download-artifact@v2 + with: + name: wheels + path: wheelhouse + + - name: Publish package to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: ${{ secrets.PYPI_USERNAME }} + password: ${{ secrets.PYPI_PASSWORD }} + packages_dir: wheelhouse/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e3cb1276bd..1f0b195b06 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -59,11 +59,3 @@ repos: hooks: - id: pydocstyle args: ["--config=setup.cfg"] - - # We use the Python version instead of the original version which seems to require Docker - # https://github.com/koalaman/shellcheck-precommit - - repo: https://github.com/shellcheck-py/shellcheck-py - rev: v0.8.0.4 - hooks: - - id: shellcheck - name: shellcheck diff --git a/LICENSE b/LICENSE index 98de2cf3e2..595f47501e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2019 - 2020 The sktime developers. +Copyright (c) 2023 The scikit-time developers. All rights reserved. diff --git a/README.md b/README.md index a6c2d488a3..bcec9c56fa 100644 --- a/README.md +++ b/README.md @@ -1,203 +1 @@ - - -# Welcome to sktime - -> A unified interface for machine learning with time series - -:rocket: **Version 0.16.0 out now!** [Check out the release notes here](https://www.sktime.org/en/latest/changelog.html). - -sktime is a library for time series analysis in Python. It provides a unified interface for multiple time series learning tasks. Currently, this includes time series classification, regression, clustering, annotation and forecasting. It comes with [time series algorithms](https://www.sktime.org/en/stable/estimator_overview.html) and [scikit-learn] compatible tools to build, tune and validate time series models. - -[scikit-learn]: https://scikit-learn.org/stable/ - -| Overview | | -|---|---| -| **CI/CD** | [![github-actions](https://img.shields.io/github/actions/workflow/status/sktime/sktime/wheels.yml?logo=github)](https://github.com/sktime/sktime/actions/workflows/wheels.yml) [![!codecov](https://img.shields.io/codecov/c/github/sktime/sktime?label=codecov&logo=codecov)](https://codecov.io/gh/sktime/sktime) [![readthedocs](https://img.shields.io/readthedocs/sktime?logo=readthedocs)](https://www.sktime.org/en/latest/?badge=latest) [![platform](https://img.shields.io/conda/pn/conda-forge/sktime)](https://github.com/sktime/sktime) | -| **Code** | [![!pypi](https://img.shields.io/pypi/v/sktime?color=orange)](https://pypi.org/project/sktime/) [![!conda](https://img.shields.io/conda/vn/conda-forge/sktime)](https://anaconda.org/conda-forge/sktime) [![!python-versions](https://img.shields.io/pypi/pyversions/sktime)](https://www.python.org/) [![!black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/sktime/sktime/main?filepath=examples) | -| **Downloads**| [![Downloads](https://static.pepy.tech/personalized-badge/sktime?period=week&units=international_system&left_color=grey&right_color=blue&left_text=weekly%20(pypi))](https://pepy.tech/project/sktime) [![Downloads](https://static.pepy.tech/personalized-badge/sktime?period=month&units=international_system&left_color=grey&right_color=blue&left_text=monthly%20(pypi))](https://pepy.tech/project/sktime) [![Downloads](https://static.pepy.tech/personalized-badge/sktime?period=total&units=international_system&left_color=grey&right_color=blue&left_text=cumulative%20(pypi))](https://pepy.tech/project/sktime) | -| **Community** | [![!slack](https://img.shields.io/static/v1?logo=slack&label=slack&message=chat&color=lightgreen)](https://join.slack.com/t/sktime-group/shared_invite/zt-1cghagwee-sqLJ~eHWGYgzWbqUX937ig) [![!discord](https://img.shields.io/static/v1?logo=discord&label=discord&message=chat&color=lightgreen)](https://discord.com/invite/gqSab2K) [![!slack](https://img.shields.io/static/v1?logo=linkedin&label=LinkedIn&message=news&color=lightblue)](https://www.linkedin.com/company/sktime/) [![!twitter](https://img.shields.io/static/v1?logo=twitter&label=Twitter&message=news&color=lightblue)](https://twitter.com/sktime_toolbox) [![!youtube](https://img.shields.io/static/v1?logo=youtube&label=YouTube&message=tutorials&color=red)](https://www.youtube.com/playlist?list=PLKs3UgGjlWHqNzu0LEOeLKvnjvvest2d0) | -| **Citation** | [![!zenodo](https://zenodo.org/badge/DOI/10.5281/zenodo.3749000.svg)](https://doi.org/10.5281/zenodo.3749000) | - -## :books: Documentation - -| Documentation | | -| -------------------------- | -------------------------------------------------------------- | -| :star: **[Tutorials]** | New to sktime? Here's everything you need to know! | -| :clipboard: **[Binder Notebooks]** | Example notebooks to play with in your browser. | -| :woman_technologist: **[User Guides]** | How to use sktime and its features. | -| :scissors: **[Extension Templates]** | How to build your own estimator using sktime's API. | -| :control_knobs: **[API Reference]** | The detailed reference for sktime's API. | -| :tv: **[Video Tutorial]** | Our video tutorial from 2021 PyData Global. | -| :hammer_and_wrench: **[Changelog]** | Changes and version history. | -| :deciduous_tree: **[Roadmap]** | sktime's software and community development plan. | -| :pencil: **[Related Software]** | A list of related software. | - -[tutorials]: https://www.sktime.org/en/latest/tutorials.html -[binder notebooks]: https://mybinder.org/v2/gh/sktime/sktime/main?filepath=examples -[user guides]: https://www.sktime.org/en/latest/user_guide.html -[video tutorial]: https://github.com/sktime/sktime-tutorial-pydata-global-2021 -[api reference]: https://www.sktime.org/en/latest/api_reference.html -[changelog]: https://www.sktime.org/en/latest/changelog.html -[roadmap]: https://www.sktime.org/en/latest/roadmap.html -[related software]: https://www.sktime.org/en/latest/related_software.html - -## :speech_balloon: Where to ask questions - -Questions and feedback are extremely welcome! Please understand that we won't be able to provide individual support via email. We also believe that help is much more valuable if it's shared publicly, so that more people can benefit from it. - -| Type | Platforms | -| ------------------------------- | --------------------------------------- | -| :bug: **Bug Reports** | [GitHub Issue Tracker] | -| :sparkles: **Feature Requests & Ideas** | [GitHub Issue Tracker] | -| :woman_technologist: **Usage Questions** | [GitHub Discussions] · [Stack Overflow] | -| :speech_balloon: **General Discussion** | [GitHub Discussions] | -| :factory: **Contribution & Development** | [Slack], contributors channel · [Discord] | -| :globe_with_meridians: **Community collaboration session** | [Discord] - Fridays 1pm UTC, dev/meet-ups channel | - -[github issue tracker]: https://github.com/sktime/sktime/issues -[github discussions]: https://github.com/sktime/sktime/discussions -[stack overflow]: https://stackoverflow.com/questions/tagged/sktime -[discord]: https://discord.com/invite/gqSab2K -[slack]: https://join.slack.com/t/sktime-group/shared_invite/zt-1cghagwee-sqLJ~eHWGYgzWbqUX937ig - -## :dizzy: Features -Our aim is to make the time series analysis ecosystem more interoperable and usable as a whole. sktime provides a __unified interface for distinct but related time series learning tasks__. It features [__dedicated time series algorithms__](https://www.sktime.org/en/stable/estimator_overview.html) and __tools for composite model building__ including pipelining, ensembling, tuning and reduction that enables users to apply an algorithm for one task to another. - -sktime also provides **interfaces to related libraries**, for example [scikit-learn], [statsmodels], [tsfresh], [PyOD] and [fbprophet], among others. - -For **deep learning**, see our companion package: [sktime-dl](https://github.com/sktime/sktime-dl). - -[statsmodels]: https://www.statsmodels.org/stable/index.html -[tsfresh]: https://tsfresh.readthedocs.io/en/latest/ -[pyod]: https://pyod.readthedocs.io/en/latest/ -[fbprophet]: https://facebook.github.io/prophet/ - -| Module | Status | Links | -|---|---|---| -| **[Forecasting]** | stable | [Tutorial](https://www.sktime.org/en/latest/examples/01_forecasting.html) · [API Reference](https://www.sktime.org/en/latest/api_reference.html#sktime-forecasting-time-series-forecasting) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/forecasting.py) | -| **[Time Series Classification]** | stable | [Tutorial](https://github.com/sktime/sktime/blob/main/examples/02_classification.ipynb) · [API Reference](https://www.sktime.org/en/latest/api_reference.html#sktime-classification-time-series-classification) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/classification.py) | -| **[Time Series Regression]** | stable | [API Reference](https://www.sktime.org/en/latest/api_reference.html#sktime-classification-time-series-regression) | -| **[Transformations]** | stable | [API Reference](https://www.sktime.org/en/latest/api_reference.html#sktime-transformations-time-series-transformers) · [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/transformer.py) | -| **[Time Series Clustering]** | maturing | [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/clustering.py) | -| **[Time Series Distances/Kernels]** | experimental | [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/dist_kern_panel.py) | -| **[Annotation]** | experimental | [Extension Template](https://github.com/sktime/sktime/blob/main/extension_templates/annotation.py) | - -[forecasting]: https://github.com/sktime/sktime/tree/main/sktime/forecasting -[time series classification]: https://github.com/sktime/sktime/tree/main/sktime/classification -[time series regression]: https://github.com/sktime/sktime/tree/main/sktime/regression -[time series clustering]: https://github.com/sktime/sktime/tree/main/sktime/clustering -[annotation]: https://github.com/sktime/sktime/tree/main/sktime/annotation -[time series distances/kernels]: https://github.com/sktime/sktime/tree/main/sktime/dists_kernels -[transformations]: https://github.com/sktime/sktime/tree/main/sktime/transformations - - -## :hourglass_flowing_sand: Install sktime -For trouble shooting and detailed installation instructions, see the [documentation](https://www.sktime.org/en/latest/installation.html). - -- **Operating system**: macOS X · Linux · Windows 8.1 or higher -- **Python version**: Python 3.7, 3.8, 3.9, and 3.10 (only 64 bit) -- **Package managers**: [pip] · [conda] (via `conda-forge`) - -[pip]: https://pip.pypa.io/en/stable/ -[conda]: https://docs.conda.io/en/latest/ - -### pip -Using pip, sktime releases are available as source packages and binary wheels. You can see all available wheels [here](https://pypi.org/simple/sktime/). - -```bash -pip install sktime -``` - -or, with maximum dependencies, - -```bash -pip install sktime[all_extras] -``` - -### conda -You can also install sktime from `conda` via the `conda-forge` channel. For the feedstock including the build recipe and configuration, check out [this repository](https://github.com/conda-forge/sktime-feedstock). - -```bash -conda install -c conda-forge sktime -``` - -or, with maximum dependencies, - -```bash -conda install -c conda-forge sktime-all-extras -``` - -## :zap: Quickstart - -### Forecasting - -```python -from sktime.datasets import load_airline -from sktime.forecasting.base import ForecastingHorizon -from sktime.forecasting.model_selection import temporal_train_test_split -from sktime.forecasting.theta import ThetaForecaster -from sktime.performance_metrics.forecasting import mean_absolute_percentage_error - -y = load_airline() -y_train, y_test = temporal_train_test_split(y) -fh = ForecastingHorizon(y_test.index, is_relative=False) -forecaster = ThetaForecaster(sp=12) # monthly seasonal periodicity -forecaster.fit(y_train) -y_pred = forecaster.predict(fh) -mean_absolute_percentage_error(y_test, y_pred) ->>> 0.08661467738190656 -``` - -### Time Series Classification - -```python -from sktime.classification.interval_based import TimeSeriesForestClassifier -from sktime.datasets import load_arrow_head -from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score - -X, y = load_arrow_head() -X_train, X_test, y_train, y_test = train_test_split(X, y) -classifier = TimeSeriesForestClassifier() -classifier.fit(X_train, y_train) -y_pred = classifier.predict(X_test) -accuracy_score(y_test, y_pred) ->>> 0.8679245283018868 -``` - -## :wave: How to get involved - -There are many ways to join the sktime community. We follow the [all-contributors](https://github.com/all-contributors/all-contributors) specification: all kinds of contributions are welcome - not just code. - -| Documentation | | -| -------------------------- | -------------------------------------------------------------- | -| :gift_heart: **[Contribute]** | How to contribute to sktime. | -| :school_satchel: **[Mentoring]** | New to open source? Apply to our mentoring program! | -| :date: **[Meetings]** | Join our discussions, tutorials, workshops and sprints! | -| :woman_mechanic: **[Developer Guides]** | How to further develop sktime's code base. | -| :construction: **[Enhancement Proposals]** | Design a new feature for sktime. | -| :medal_sports: **[Contributors]** | A list of all contributors. | -| :raising_hand: **[Roles]** | An overview of our core community roles. | -| :money_with_wings: **[Donate]** | Fund sktime maintenance and development. | -| :classical_building: **[Governance]** | How and by whom decisions are made in sktime's community. | - -[contribute]: https://www.sktime.org/en/latest/get_involved/contributing.html -[donate]: https://opencollective.com/sktime -[extension templates]: https://github.com/sktime/sktime/tree/main/extension_templates -[developer guides]: https://www.sktime.org/en/latest/developer_guide.html -[contributors]: https://github.com/sktime/sktime/blob/main/CONTRIBUTORS.md -[governance]: https://www.sktime.org/en/latest/governance.html -[mentoring]: https://github.com/sktime/mentoring -[meetings]: https://calendar.google.com/calendar/u/0/embed?src=sktime.toolbox@gmail.com&ctz=UTC -[enhancement proposals]: https://github.com/sktime/enhancement-proposals -[roles]: https://www.sktime.org/en/latest/about/team.html - -## :bulb: Project vision - -* **by the community, for the community** -- developed by a friendly and collaborative community. -* the **right tool for the right task** -- helping users to diagnose their learning problem and suitable scientific model types. -* **embedded in state-of-art ecosystems** and **provider of interoperable interfaces** -- interoperable with [scikit-learn], [statsmodels], [tsfresh], and other community favourites. -* **rich model composition and reduction functionality** -- build tuning and feature extraction pipelines, solve forecasting tasks with [scikit-learn] regressors. -* **clean, descriptive specification syntax** -- based on modern object-oriented design principles for data science. -* **fair model assessment and benchmarking** -- build your models, inspect your models, check your models, avoid pitfalls. -* **easily extensible** -- easy extension templates to add your own algorithms compatible with sktime's API. +## DISCLAIMER: This is a fork of the sktime repository. Work is ongoing to change links and documentation to reflect this. We do not recommend using this repository in any production setting, but welcome any contributors willing to help us update the project. diff --git a/README_OLD.md b/README_OLD.md new file mode 100644 index 0000000000..968c2ce643 --- /dev/null +++ b/README_OLD.md @@ -0,0 +1,196 @@ + + +# Welcome to scikit-time + +> A unified interface for machine learning with time series + +:rocket: **Version 0.16.0 out now!** [Check out the release notes here](https://www.scikit-time.org/en/latest/changelog.html). + +scikit-time is a library for time series analysis in Python. It provides a unified interface for multiple time series learning tasks. Currently, this includes time series classification, regression, clustering, annotation and forecasting. It comes with [time series algorithms](https://www.scikit-time.org/en/stable/estimator_overview.html) and [scikit-learn] compatible tools to build, tune and validate time series models. + +[scikit-learn]: https://scikit-learn.org/stable/ + +| Overview | | +|---------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **CI/CD** | [![github-actions](https://img.shields.io/github/actions/workflow/status/scikit-time/scikit-time/wheels.yml?logo=github)](https://github.com/scikit-time/scikit-time/actions/workflows/wheels.yml) [![!codecov](https://img.shields.io/codecov/c/github/scikit-time/scikit-time?label=codecov&logo=codecov)](https://codecov.io/gh/scikit-time/scikit-time) [![readthedocs](https://img.shields.io/readthedocs/scikit-time?logo=readthedocs)](https://www.scikit-time.org/en/latest/?badge=latest) [![platform](https://img.shields.io/conda/pn/conda-forge/scikit-time)](https://github.com/scikit-time/scikit-time) | +| **Code** | [![!pypi](https://img.shields.io/pypi/v/scikit-time?color=orange)](https://pypi.org/project/scikit-time/) [![!conda](https://img.shields.io/conda/vn/conda-forge/scikit-time)](https://anaconda.org/conda-forge/scikit-time) [![!python-versions](https://img.shields.io/pypi/pyversions/scikit-time)](https://www.python.org/) [![!black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/scikit-time/scikit-time/main?filepath=examples) | +| **Downloads** | [![Downloads](https://static.pepy.tech/personalized-badge/scikit-time?period=week&units=international_system&left_color=grey&right_color=blue&left_text=weekly%20(pypi))](https://pepy.tech/project/scikit-time) [![Downloads](https://static.pepy.tech/personalized-badge/scikit-time?period=month&units=international_system&left_color=grey&right_color=blue&left_text=monthly%20(pypi))](https://pepy.tech/project/scikit-time) [![Downloads](https://static.pepy.tech/personalized-badge/scikit-time?period=total&units=international_system&left_color=grey&right_color=blue&left_text=cumulative%20(pypi))](https://pepy.tech/project/scikit-time) | +| **Community** | [![!slack](https://img.shields.io/static/v1?logo=slack&label=slack&message=chat&color=lightgreen)](https://join.slack.com/t/scikit-time-group/shared_invite/zt-1cghagwee-sqLJ~eHWGYgzWbqUX937ig) [![!slack](https://img.shields.io/static/v1?logo=linkedin&label=LinkedIn&message=news&color=lightblue)](https://www.linkedin.com/company/scikit-time/) [![!twitter](https://img.shields.io/static/v1?logo=twitter&label=Twitter&message=news&color=lightblue)](https://twitter.com/scikit-time_toolbox) | + +## :books: Documentation + +| Documentation | | +|----------------------------------------|----------------------------------------------------------| +| :star: **[Tutorials]** | New to scikit-time? Here's everything you need to know! | +| :clipboard: **[Binder Notebooks]** | Example notebooks to play with in your browser. | +| :woman_technologist: **[User Guides]** | How to use scikit-time and its features. | +| :scissors: **[Extension Templates]** | How to build your own estimator using scikit-time's API. | +| :control_knobs: **[API Reference]** | The detailed reference for scikit-time's API. | +| :hammer_and_wrench: **[Changelog]** | Changes and version history. | +| :deciduous_tree: **[Roadmap]** | scikit-time's software and community development plan. | +| :pencil: **[Related Software]** | A list of related software. | + +[tutorials]: https://www.scikit-time.org/en/latest/tutorials.html +[binder notebooks]: https://mybinder.org/v2/gh/scikit-time/scikit-time/main?filepath=examples +[user guides]: https://www.scikit-time.org/en/latest/user_guide.html +[api reference]: https://www.scikit-time.org/en/latest/api_reference.html +[changelog]: https://www.scikit-time.org/en/latest/changelog.html +[roadmap]: https://www.scikit-time.org/en/latest/roadmap.html +[related software]: https://www.scikit-time.org/en/latest/related_software.html + +## :speech_balloon: Where to ask questions + +Questions and feedback are extremely welcome! Please understand that we won't be able to provide individual support via email. We also believe that help is much more valuable if it's shared publicly, so that more people can benefit from it. + +| Type | Platforms | +|------------------------------------------------------------|------------------------| +| :bug: **Bug Reports** | [GitHub Issue Tracker] | +| :sparkles: **Feature Requests & Ideas** | [GitHub Issue Tracker] | +| :woman_technologist: **Usage Questions** | [GitHub Discussions] | +| :speech_balloon: **General Discussion** | [GitHub Discussions] | +| :factory: **Contribution & Development** | [Slack] | +| :globe_with_meridians: **Community collaboration session** | ? | + +[github issue tracker]: https://github.com/scikit-time/scikit-time/issues +[github discussions]: https://github.com/scikit-time/scikit-time/discussions +[slack]: https://join.slack.com/t/scikit-time-group/shared_invite/zt-1cghagwee-sqLJ~eHWGYgzWbqUX937ig + +## :dizzy: Features +Our aim is to make the time series analysis ecosystem more interoperable and usable as a whole. scikit-time provides a __unified interface for distinct but related time series learning tasks__. It features [__dedicated time series algorithms__](https://www.scikit-time.org/en/stable/estimator_overview.html) and __tools for composite model building__ including pipelining, ensembling, tuning and reduction that enables users to apply an algorithm for one task to another. + +scikit-time also provides **interfaces to related libraries**, for example [scikit-learn], [statsmodels], [tsfresh], [PyOD] and [fbprophet], among others. + +[statsmodels]: https://www.statsmodels.org/stable/index.html +[tsfresh]: https://tsfresh.readthedocs.io/en/latest/ +[pyod]: https://pyod.readthedocs.io/en/latest/ +[fbprophet]: https://facebook.github.io/prophet/ + +| Module | Status | Links | +|---|---|---| +| **[Forecasting]** | stable | [Tutorial](https://www.scikit-time.org/en/latest/examples/01_forecasting.html) · [API Reference](https://www.scikit-time.org/en/latest/api_reference.html#scikit-time-forecasting-time-series-forecasting) · [Extension Template](https://github.com/scikit-time/scikit-time/blob/main/extension_templates/forecasting.py) | +| **[Time Series Classification]** | stable | [Tutorial](https://github.com/scikit-time/scikit-time/blob/main/examples/02_classification.ipynb) · [API Reference](https://www.scikit-time.org/en/latest/api_reference.html#scikit-time-classification-time-series-classification) · [Extension Template](https://github.com/scikit-time/scikit-time/blob/main/extension_templates/classification.py) | +| **[Time Series Regression]** | stable | [API Reference](https://www.scikit-time.org/en/latest/api_reference.html#scikit-time-classification-time-series-regression) | +| **[Transformations]** | stable | [API Reference](https://www.scikit-time.org/en/latest/api_reference.html#scikit-time-transformations-time-series-transformers) · [Extension Template](https://github.com/scikit-time/scikit-time/blob/main/extension_templates/transformer.py) | +| **[Time Series Clustering]** | maturing | [Extension Template](https://github.com/scikit-time/scikit-time/blob/main/extension_templates/clustering.py) | +| **[Time Series Distances/Kernels]** | experimental | [Extension Template](https://github.com/scikit-time/scikit-time/blob/main/extension_templates/dist_kern_panel.py) | +| **[Annotation]** | experimental | [Extension Template](https://github.com/scikit-time/scikit-time/blob/main/extension_templates/annotation.py) | + +[forecasting]: https://github.com/scikit-time/scikit-time/tree/main/sktime/forecasting +[time series classification]: https://github.com/scikit-time/scikit-time/tree/main/sktime/classification +[time series regression]: https://github.com/scikit-time/scikit-time/tree/main/sktime/regression +[time series clustering]: https://github.com/scikit-time/scikit-time/tree/main/sktime/clustering +[annotation]: https://github.com/scikit-time/scikit-time/tree/main/sktime/annotation +[time series distances/kernels]: https://github.com/scikit-time/scikit-time/tree/main/sktime/dists_kernels +[transformations]: https://github.com/scikit-time/scikit-time/tree/main/sktime/transformations + + +## :hourglass_flowing_sand: Install scikit-time +For troubleshooting and detailed installation instructions, see the [documentation](https://www.scikit-time.org/en/latest/installation.html). + +- **Operating system**: Mac OS X · Linux · Windows 8.1 or higher +- **Python version**: Python 3.7, 3.8, 3.9, and 3.10 (only 64 bit) +- **Package managers**: [pip] · [conda] (via `conda-forge`) + +[pip]: https://pip.pypa.io/en/stable/ +[conda]: https://docs.conda.io/en/latest/ + +### pip +Using pip, scikit-time releases are available as source packages and binary wheels. You can see all available wheels [here](https://pypi.org/simple/scikit-time/). + +```bash +pip install scikit-time +``` + +or, with maximum dependencies, + +```bash +pip install scikit-time[all_extras] +``` + +### conda +You can also install scikit-time from `conda` via the `conda-forge` channel. For the feedstock including the build recipe and configuration, check out [this repository](https://github.com/conda-forge/scikit-time-feedstock). + +```bash +conda install -c conda-forge scikit-time +``` + +or, with maximum dependencies, + +```bash +conda install -c conda-forge scikit-time-all-extras +``` + +## :zap: Quickstart + +### Forecasting + +```python +from sktime.datasets import load_airline +from sktime.forecasting.base import ForecastingHorizon +from sktime.forecasting.model_selection import temporal_train_test_split +from sktime.forecasting.theta import ThetaForecaster +from sktime.performance_metrics.forecasting import mean_absolute_percentage_error + +y = load_airline() +y_train, y_test = temporal_train_test_split(y) +fh = ForecastingHorizon(y_test.index, is_relative=False) +forecaster = ThetaForecaster(sp=12) # monthly seasonal periodicity +forecaster.fit(y_train) +y_pred = forecaster.predict(fh) +mean_absolute_percentage_error(y_test, y_pred) +>>> 0.08661467738190656 +``` + +### Time Series Classification + +```python +from sktime.classification.interval_based import TimeSeriesForestClassifier +from sktime.datasets import load_arrow_head +from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score + +X, y = load_arrow_head() +X_train, X_test, y_train, y_test = train_test_split(X, y) +classifier = TimeSeriesForestClassifier() +classifier.fit(X_train, y_train) +y_pred = classifier.predict(X_test) +accuracy_score(y_test, y_pred) +>>> 0.8679245283018868 +``` + +## :wave: How to get involved + +There are many ways to join the scikit-time community. We follow the [all-contributors](https://github.com/all-contributors/all-contributors) specification: all kinds of contributions are welcome - not just code. + +| Documentation | | +| -------------------------- | -------------------------------------------------------------- | +| :gift_heart: **[Contribute]** | How to contribute to scikit-time. | +| :school_satchel: **[Mentoring]** | New to open source? Apply to our mentoring program! | +| :date: **[Meetings]** | Join our discussions, tutorials, workshops and sprints! | +| :woman_mechanic: **[Developer Guides]** | How to further develop scikit-time's code base. | +| :construction: **[Enhancement Proposals]** | Design a new feature for scikit-time. | +| :medal_sports: **[Contributors]** | A list of all contributors. | +| :raising_hand: **[Roles]** | An overview of our core community roles. | +| :money_with_wings: **[Donate]** | Fund scikit-time maintenance and development. | +| :classical_building: **[Governance]** | How and by whom decisions are made in scikit-time's community. | + +[contribute]: https://www.scikit-time.org/en/latest/get_involved/contributing.html +[donate]: https://opencollective.com/scikit-time +[extension templates]: https://github.com/scikit-time/scikit-time/tree/main/extension_templates +[developer guides]: https://www.scikit-time.org/en/latest/developer_guide.html +[contributors]: https://github.com/scikit-time/scikit-time/blob/main/CONTRIBUTORS.md +[governance]: https://www.scikit-time.org/en/latest/governance.html +[mentoring]: https://github.com/scikit-time/mentoring +[meetings]: https://calendar.google.com/calendar/u/0/embed?src=scikit-time.toolbox@gmail.com&ctz=UTC +[enhancement proposals]: https://github.com/scikit-time/enhancement-proposals +[roles]: https://www.scikit-time.org/en/latest/about/team.html + +## :bulb: Project vision + +* **by the community, for the community** -- developed by a friendly and collaborative community. +* the **right tool for the right task** -- helping users to diagnose their learning problem and suitable scientific model types. +* **embedded in state-of-art ecosystems** and **provider of interoperable interfaces** -- interoperable with [scikit-learn], [statsmodels], [tsfresh], and other community favourites. +* **rich model composition and reduction functionality** -- build tuning and feature extraction pipelines, solve forecasting tasks with [scikit-learn] regressors. +* **clean, descriptive specification syntax** -- based on modern object-oriented design principles for data science. +* **fair model assessment and benchmarking** -- build your models, inspect your models, check your models, avoid pitfalls. +* **easily extensible** -- easy extension templates to add your own algorithms compatible with scikit-time's API. diff --git a/docs/source/about/team.rst b/docs/source/about/team.rst index a5236dd8ba..ec3813ba2b 100644 --- a/docs/source/about/team.rst +++ b/docs/source/about/team.rst @@ -4,7 +4,7 @@ Roles ===== -The roles are described in sktime's :ref:`governance` document. +The roles are described in scikit-time's :ref:`governance` document. A list of all contributors can be found `here `_. Community Council @@ -14,23 +14,8 @@ Community Council * - Name - GitHub ID - * - Franz Király - - :user:`fkiraly` - -Community Council Observers ---------------------------- - -.. list-table:: - :header-rows: 1 - - * - Name - - GitHub ID - * - Leonidas Tsaprounis - - :user:`ltsaprounis` - * - Lovkush Agarwal - - :user:`lovkush-a` - * - Mirae Parker - - :user:`miraep8` + * - tbd + - :user:`tbd` Code of Conduct Committee ------------------------- @@ -40,8 +25,14 @@ Code of Conduct Committee * - Name - GitHub ID - * - Franz Király - - :user:`fkiraly` + * - Guzal Bulatova + - :user:`GuzalBulatova` + * - Katie Buchhorn + - :user:`KatieBuc` + * - Matthew Middlehurst + - :user:`mattewmiddlehurst` + * - Patrick Schäfer + - :user:`patrickzib` Core Developers --------------- @@ -51,35 +42,25 @@ Core Developers * - Name - GitHub ID + * - Anthony Bagnall + - :user:`TonyBagnall` * - Chris Holder - :user:`chrisholder` - * - Daniel Bartling - - :user:`danbartl` - * - Franz Király - - :user:`fkiraly` - * - Freddy A Boulton - - :user:`freddyaboulton` * - Guzal Bulatova - :user:`GuzalBulatova` - * - Lovkush Agarwal - - :user:`lovkush-a` + * - Leonidas Tsaprounis + - :user:`ltsaprounis` * - Łukasz Mentel - :user:`lmmentel` - * - Markus Löning - - :user:`mloning` - * - Mirae Parker - - :user:`miraep8` - * - Ryan Kuhns - - :user:`rnkuhns` - * - Sagar Mishra - - :user:`achieveordie` - * - Stanislav Khrapov - - :user:`khrapovs` - * - Svea Marie Meyer - - :user:`SveaMeyer13` + * - Martin Walter + - :user:`aiwalter` + * - Matthew Middlehurst + - :user:`mattewmiddlehurst` + * - Patrick Schäfer + - :user:`patrickzib` -Former Core Developers ----------------------- +Pre-fork `sktime` Core Developers +---------------------------------- .. list-table:: :header-rows: 1 @@ -88,8 +69,6 @@ Former Core Developers - GitHub ID * - Aaron Bostrom - :user:`abostrom` - * - Anthony Bagnall - - :user:`TonyBagnall` * - Ayushmaan Seth - :user:`ayushmaanseth` * - George Oastler @@ -100,19 +79,31 @@ Former Core Developers - :user:`james-large` * - Jason Lines - :user:`jasonlines` - * - Leonidas Tsaprounis - - :user:`ltsaprounis` - * - Martin Walter - - :user:`aiwalter` * - Mathew Smith - :user:`matteogales` - * - Matthew Middlehurst - - :user:`mattewmiddlehurst` * - Patrick Rockenschaub - :user:`prockenschaub` - * - Patrick Schäfer - - :user:`patrickzib` * - Sajaysurya Ganesh - :user:`sajaysurya` + * - Franz Király + - :user:`fkiraly` + * - Markus Löning + - :user:`mloning` + * - Mirae Parker + - :user:`miraep8` + * - Ryan Kuhns + - :user:`rnkuhns` + * - Sagar Mishra + - :user:`achieveordie` + * - Stanislav Khrapov + - :user:`khrapovs` + * - Svea Marie Meyer + - :user:`SveaMeyer13` + * - Lovkush Agarwal + - :user:`lovkush-a` + * - Freddy A Boulton + - :user:`freddyaboulton` + * - Daniel Bartling + - :user:`danbartl` * - Anonymous upon contributor's request - :user:`big-o` diff --git a/docs/source/conf.py b/docs/source/conf.py index 998e31c161..54bfbe6369 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -9,6 +9,13 @@ import sktime +rst_prolog = """.. attention:: + DISCLAIMER: This is a fork of the sktime repository. Work is ongoing to change links + and documentation to reflect this. We do not recommend using this repository in any + production setting, but welcome any contributors willing to help us update the + project. +""" + # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, diff --git a/docs/source/get_involved/code_of_conduct.rst b/docs/source/get_involved/code_of_conduct.rst index 3d3c3f14df..b4e6c76fd5 100644 --- a/docs/source/get_involved/code_of_conduct.rst +++ b/docs/source/get_involved/code_of_conduct.rst @@ -9,16 +9,14 @@ experience. Accordingly, everyone who participates in the sktime project is expected to show respect and courtesy to other community members at all times. -Dr Franz Király, as principal investigator of this project, and all -project members, are dedicated to a **harassment-free experience for -everyone**, regardless of gender, gender identity and expression, -sexual orientation, disability, physical appearance, body size, race, -age or religion. **We do not tolerate harassment by and/or of members of -our community in any form**. - -We are dedicated to a **fair and equal opportunities environment** for everyone, -and therefore are particularly motivated to support new collaborators, -people who are looking to learn and develop their skills, +The CoCC and all project members, are dedicated to a **harassment-free +experience for everyone**, regardless of gender, gender identity and +expression, sexual orientation, disability, physical appearance, body +size, race, age or religion. **We do not tolerate harassment by +and/or of members of our community in any form**. + +We are particularly motivated to support new and/or anxious +collaborators, people who are looking to learn and develop their skills, and anyone who has experienced discrimination in the past. To make clear what is expected, we ask all members of the community to @@ -29,64 +27,39 @@ conform to the following Code of Conduct. 1 Introduction -------------- -sktime is a community-oriented and -led project. - -We value the involvement of everyone in the community, and strive to be an open -community. We commit to provide fair and rewarding opportunities for everyone without -restriction, to participate, learn, and become leaders of the community. We expect from -leaders of our community to create such opportunities for others, and we stronly encourage -all members of our community to create opportunities as have been created for them. - -We are committed to scientific quality and strive to be an open forum on matters -of technical content and open community governance. All community members are expected -to adhere to the principle of free speech in academia and science, and, to the best -of their abilities, contribute to jointly creating -the best possible science and the best possible open community. - -We are committed to creating a friendly and respectful place for learning, -teaching and contributing. All participants in our in-person events and online -communications are expected to show respect and courtesy to others at all times. +sktime is a community-oriented and -led project. We value the +involvement of everyone in the community. We are committed to creating a +friendly and respectful place for learning, teaching and contributing. +All participants in our in-person events and online communications are +expected to show respect and courtesy to others at all times. To make clear what is expected, everyone participating in activities -associated with the ``sktime`` project is required to conform to this Code +associated with the sktime project is required to conform to this Code of Conduct. This Code of Conduct applies to all spaces managed by the sktime project including, but not limited to, in-person focus groups and workshops, and communications online via GitHub. -The lead investigator of the sktime - Dr Franz Király - is responsible -for enforcing the Code of Conduct. He can be contacted by emailing -sktime.toolbox@gmail.com. - -Incidents can be reported to the Code of Conduct Committee as outlined in Section 3, and -are handled in accordance with Sections 3 and 4. +The CoCC is responsible +for enforcing the Code of Conduct. It can be reached via info@sktime.org. -The community council of sktime (see current composition on "roles") -is responsible for ensuring that resolutions of the Code of Conduct committee -are adhered to, and can be contacted under sktime.toolbox@gmail.com. +Reports may be reviewed by other members of the core development team, +unless there is a conflict of interest, and will be kept confidential. 2 Code of Conduct ----------------- The sktime team are dedicated to providing a welcoming and supportive environment for all people, regardless of background or identity. As -such, we do not tolerate: - -* behaviour that is disrespectful to our - community members or that excludes, intimidates, or causes discomfort to others. -* discrimination or harassment based on protected characteristics, including - restriction of fair opportunities based on protected characteristics -* stifling of free speech on topics within the project scope (science and open community operations) - -In the rare cases where our ideals are in conflict, we strive to protect, -in the context of power dynamics, the less powerful party or parties. - -'Protected characteristics' as referred to above include, but are not limited to: -gender identity, gender expression, sex assigned at birth, biological sex, -genetic information, sexual orientation, disability status, physical appearance, -body size, citizenship, nationality, national origin, ethnic or social origin, pregnancy, -familial status, family background, veteran status, trade union membership, -religion or belief (or lack thereof), membership of a national minority, property, age, -socio-economic status, neurotypicality or -atypicality, education, and experience level. +such, we do not tolerate behaviour that is disrespectful to our +community members or that excludes, intimidates, or causes discomfort to +others. We do not tolerate discrimination or harassment based on +characteristics that include, but are not limited to: gender identity +and expression, sexual orientation, disability, physical appearance, +body size, citizenship, nationality, ethnic or social origin, pregnancy, +familial status, veteran status, genetic information, religion or belief +(or lack thereof), membership of a national minority, property, age, +education, socio-economic status, technical choices, and experience +level. Everyone who participates in the sktime project activities is required to conform to this Code of Conduct. This Code of Conduct applies to all @@ -108,7 +81,6 @@ hope you will consider to be appropriate community guidelines: - **Be respectful of different viewpoints and experiences**. Do not engage in homophobic, racist, transphobic, ageist, ableist, sexist, or otherwise exclusionary behaviour. - In disagreements, assume best intentions. - **Use welcoming and inclusive language**. Exclusionary comments or jokes, threats or violent language are not acceptable. Do not address others in an angry, intimidating, or demeaning manner. Be considerate @@ -130,11 +102,6 @@ hope you will consider to be appropriate community guidelines: succinct so as to allow maximum engagement by all participants. Do not interrupt others on the basis of disagreement; hold such comments until they have finished speaking. -- **Do not hesitate to voice critical opinions, and accept constructive - criticism gracefully**. A diverse space of opinions and observations is - crucial for fairness and diversity of our community, as well as its scientific quality. - Free speech and constant scrutiny directed at those in power is also essential to ensure - accountability and fair operations of our community, and to prevent groupthink or in-group dynamics. - **Don’t be a bystander**. If you see something inappropriate happening, speak up. If you don't feel comfortable intervening but feel someone should, please feel free to ask a member of the Code of @@ -155,7 +122,7 @@ Examples of unacceptable behaviour by sktime community members at any project event or platform include: - written or verbal comments which have the effect of excluding people - on the basis of protected characteristics + on the basis of membership of any specific group - causing someone to fear for their safety, such as through stalking, following, or intimidation - violent threats or language directed against another person @@ -164,21 +131,13 @@ project event or platform include: - nonconsensual or unwelcome physical contact - sustained disruption of talks, events or communications - insults or put downs -- sexist, racist, homophobic, transphobic, ableist, or exclusionary jokes +- sexist, racist, homophobic, transphobic, ableist, or exclusionary + jokes - excessive swearing - incitement to violence, suicide, or self-harm - continuing to initiate interaction (including photography or recording) with someone after being asked to stop - publication of private communication without consent -- discrimination in resource allocation, decision making that excludes people - on the basis of protected characteristics -- favouritism in resource allocation, decision making that confers advantages to personal networks, - and/or excludes others, such as preventing advertising of opportunities, not applying best practice in recruitment -- reducing transparency of operations of sktime, e.g., by obfuscation of the paper trail -- invoking the code of conduct dishonestly, or as a form of bullying, - especially from a position of power - -Some more principled guidance is given in the appendix "Guidance in applying the CoC". 2.3 Consequences of Unacceptable Behaviour ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -197,9 +156,8 @@ details). This Code of Conduct is not intended as a static set of rules by which everyone must abide. Rather, you are invited to make suggestions for -updates or clarifications by contacting Dr Franz Király at -sktime.toolbox@gmail.com, or by making a pull request to this document on -GitHub. +updates or clarifications by contacting the CoCC via info@sktime.org +or by making a pull request to this document on GitHub. 3 Incident Reporting Guidelines ------------------------------- @@ -207,14 +165,12 @@ GitHub. 3.1 Contact points ~~~~~~~~~~~~~~~~~~ -Please contact the current chair of the Code of Conduct Committee directly by email, see 4.1. +Please contact the CoCC directly by email, see 4.1. 3.2 Alternate contact points ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In case of conflict of interest of sktime CoC Committee members, you can report -through the `NumFOCUS CoC process `__, -or, in case of events, to the organisation at which the event is taking place (e.g., a university). +tbd 3.3 What to do if someone is in physical danger ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -237,18 +193,14 @@ team. It's used when we respond to an issue to make sure we're consistent and fair. Enforcement of the Code of Conduct should be respectful and not include any harassing behaviours. -4.1 The Code of Conduct Committee +4.1 The Code of Conduct Committee (COCC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The sktime Code of Conduct committee currently consists of: - -Dr Franz Király (f.kiraly@ucl.ac.uk) +The sktime Code of Conduct committee currently consists of members listed +`here `__. We encourage community members to step up to become a member of the committee. -You can use the `NumFOCUS CoC process `__ as an alternative entry point -for reporting, see 3.2. - 4.2 Urgent Situations: Acting Unilaterally ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -265,12 +217,11 @@ enforcement manual, including bans and blocks online, or removal from a physical space. In situations where an individual community member acts unilaterally, -they must inform Dr Franz Király as soon as possible, and report their +they must inform the CoCC as soon as possible, and report their actions for review within 24 hours. -and report their actions for review within 24 hours. -4.3 Code of Conduct Investigation Process -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +4.3 Less-Urgent Situations +~~~~~~~~~~~~~~~~~~~~~~~~~~ Upon receiving a report of an incident, the Code of Conduct committee will review the incident and determine, to the best of her ability: @@ -294,15 +245,11 @@ within one week. In the event that a resolution can't be determined in that time, a member of the Code of Conduct committee will respond to the reporter(s) with an update and projected timeline for resolution. -Reports of code of conduct committee case proceedings will be kept confidential. - 4.4 Resolutions ~~~~~~~~~~~~~~~ The Code of Conduct committee will seek to agree on a resolution by -consensus of all members investigating the report in question. If the -committee cannot reach consensus and deadlocks for over a week, Dr Franz -Király, as currently longest serving committee member, will break the tie. +consensus of all members investigating the report in question. Possible responses may include: @@ -319,13 +266,12 @@ Possible responses may include: the conversation will provide a written summary for record keeping. - A private written reprimand from a member of the research team to the individual(s) involved. In this case, the research team member will - deliver that reprimand to the individual(s) over email, cc'ing Dr - Franz Király for record keeping. + deliver that reprimand to the individual(s) over email. - A public announcement of an incident, ideally in the same venue that the violation occurred (i.e. on the listserv for a listserv violation; GitHub for a GitHub violation, etc.). The committee may choose to publish this message elsewhere for posterity. -- An imposed "time out" from online spaces. Dr Franz Király will +- An imposed "time out" from online spaces. CoCC will communicate this "time out" to the individual(s) involved. - A permanent or temporary ban from some or all sktime project spaces (GitHub, in-person events etc). The research team will maintain @@ -345,28 +291,14 @@ committee is not required to act on this feedback. 4.5 Conflicts of Interest ~~~~~~~~~~~~~~~~~~~~~~~~~ -In case of conflict of interest of sktime CoC committee members, you can report -through the `NumFOCUS CoC process `__, -or, in case of events, to the organisation at which the event is taking place (e.g., a university). - -4.6 Audit of Investigation Process -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -At any point in time, core developers may request the number of ongoing and concluded -investigations over the last year before the date of the query. -Any group of three core developers may also request access to all investigation reports -in a time period, or reports to cases with specified properties. -If there is no conflict of interest (as determined by the Code of Conduct Committee), -access to anonymized reports will be shared for a period of 2 weeks. Shared reports -must not be copied or otherwise proliferated. +In case of conflict of interest of sktime CoC committee members, you can report to the +Community Council, or, in case of events, to the organisation at which the event is +taking place (e.g., a university). 5 Acknowledgements ------------------ -This Code of Conduct is a living document, -maintanined and developed by the ``sktime`` project and its contributors. - -Its original form is adapted from the `The Turing +This code is adapted from the `The Turing Way `__ project with Dr Kirstie Whitaker as lead investigator and based on the `Carpentries Code of @@ -398,183 +330,14 @@ Guide `__ and we are also grateful for `this guidance from Ada Initiative `__. -We highly appreciate the work that all of the communities linked above -have put into creating code of conduct documents and processes. +We really appreciate the work that all of the communities linked above +have put into creating such a well considered process. This Code of Conduct is licensed under a `Creative Commons Attribution 4.0 International `__ (CC BY 4.0 CA) license which means you are free to share and adapt the work -so long as attribution is maintained to substantial sources of contribution: - -* The `Django project `__ -* The `Contributor Covenant `__, -* The `Citizen Lab `__ -* The `Alan Turing Institute Data Study Groups `__ organising team -* `The Turing Way `__ project -* `The Carpentries `__ -* The ``sktime`` project -* other sources and influences mentioned above - - -Appendix A: Guidance in applying the CoC ----------------------------------------- - -This appendix provides some additional guidance in applying the CoC. -It contains sections on: - -* how to apply the CoC in some common situations where principles might conflict -* investigating CoC violations in resource allocation - -A.1 Resolving conflicting CoC principles -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Sometimes, CoC requirements end up in a trade-off, or in conflict with each other. -We outline a few guiding examples in how these should be traded off. - -Examples: - -* contributor criticizing leadership for handing resources to personal network, leadership invoking "discomfort" -* passionate disagreement on technical content between contributors -* accidental violations of CoC causally due to protected characteristics of the person violating the CoC -* systemic discrimination issues whose resolution would cause discomfort in the community - -Guidance rules: - -* the less powerful party should be protected, i.e., CoC cannot be invoked by lead community member to silence critique of how funds are spent. - But, CoC should protect new contributors or early career members from more powerful community members, e.g., in operational or technical disputes. -* technical content is not invalidated by tone, but tone must remain civil at all times. - I.e., valid arguments are not discarded because of tone, but participants in a technical - dispute may be punished for tone that is unwelcoming, e.g., name calling etc. -* conditions that make compliance with certain aspects of CoC more difficult should be taken into account, - especially in parties that are the less powerful in a dynamic. Common examples are certain forms of neuroatypicality. - A more illustrative but very rare example would be Tourette's with the rare swearing tic that cannot be "turned off". -* issues of systemic exclusion or discrimination should be addressed when apparent, even at the cost of some community discomfort. - E.g., meetings should be moved to less convenient times, or processes made more "formal", - if not doing so would exclude others in a discriminatory fashion. - -Behaviour that is clearly racist, sexist, etc, is always a CoC violation, and never part of trade-offs. - - -A.2 Guidance on applying the CoC for resource allocation decisions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This section deals with resource allocation decisions by ``sktime`` community members, -and criteria for decision making on resource allocation violating the code of conduct. - -Legal status -^^^^^^^^^^^^ - -This guidance of applying the CoC for resource allocation decisions impacts only -adjudication by the CoC committee within the jurisdiction of the CoC committee. - -It does not impact the terms of the license by -which ``sktime`` - the software package - is distributed. - -Out-Of-Scope -^^^^^^^^^^^^ - -Out of scope are spaces outside ``sktime`` jurisdiction as per Section 1. - -In-Scope -^^^^^^^^ - -In scope for this section are all resource allocation decisions that are made -within ``sktime`` jurisdiction as per Section 1, including decisions that are: - -* made by elected members of the ``sktime`` community, i.e., core developers, - Community Council members, Code of Conduct committee members, or -* made as part of an ``sktime`` activity or event. - -A resource allocation is in scope if it satisfies at least one of the following criteria: - -* any decisions that allocate resources primarily obtained due to the ``sktime`` project and the volunteer contributions of the community. - This includes research grants awarded directly for the benefit of the ``sktime`` project, - commercial opportunities arising from or directly directed to the ``sktime`` project, - and any activities that use the ``sktime`` brand as a primary branding. - This condition applies even if relevant decisions are made in personal roles, - or roles other than ``sktime`` official roles, as long as it falls within the general jurisdiction scope outlined above. -* any decisions by members of the ``sktime`` developer community, in their execution of an official ``sktime`` role. - This especially includes core developer, Community Council, and Code of Conduct Committee roles. - -Examples for in-scope: - -* an academic allocating grant monies from a grant, or a data science consultant providing consultancy services, with a primary ``sktime`` branding -* hiring decisions for roles that are advertised with a primary ``sktime`` branding -* spending decisions from ``sktime`` community administered accounts -* decisions arising from academic collaboration or commercial requests directed to the ``sktime`` community, - e.g., via official email, or via digital ``sktime`` discussion tools (slack, GitHub discussion etc) - -"resources" in the above context are to be interpreted in the usual sense of the word, -i.e., valuable possessions, opportunities, qualities, that can be accessed and allocated. - -Examples include but are not limited to: - -* budget, money, financial credit -* job opportunities, business opportunities -* decision making power, soft or hard influence -* dedicated work time of subordinates or dependents -* material possessions of value -* information of value -* access to any of the above - -Best practice -^^^^^^^^^^^^^ - -To ensure decision making in line with the code of conduct, decision making must be: - -* by the ``sktime`` community, through ``sktime`` community decision making mechanisms. - E.g., following decision making outlined in the ``sktime`` governance document. -* in communication transparent to the ``sktime`` community, following communication requirements outlined in the ``sktime`` governance document. -* pursuant of and in line with the charitable mission of ``sktime`` and NumFOCUS. - -Adherence to widely accepted guidelines of anti-corruption and anti-bribery practice -(e.g., United Nations Anti-Corruption Guidelines) is strictly expected, -especially for resource allocation decisions of major magnitude, e.g., in hiring processes, -or decisions in the 5-digit dollar range and upwards. - -This strict requirement for best practice remains unchanged even if a local context may require less, -e.g., university administrations, -company policies, national anti-corruption and anti-bribery laws (or their absence), etc. - -For instance, it is in-principle possible to misappropriate ``sktime`` resource, while -being in formal compliance with specific national laws, institutional laws, and local policies. -Irrespective of this, such an action would still count as a severe CoC violation. - -Examples of not acceptable behaviour -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -* obtaining a resource opportunity through ``sktime``, - then retreating or resigning ``sktime`` roles to declare that the opportunity is now in a personal role and not on behalf of ``sktime``. -* pretending to communicate on behalf of ``sktime``, or unauthorized use of the ``sktime`` brand, - including but not limited to violations of the BSD 3-clause license -* soft refusal to implement best anti-corruption and anti-bribery practice, e.g., - soft refusal to make budgets, invoives, or spending records transparent to the ``sktime`` community -* attempts to suppress criticism of bad practice through invoking the code of conduct, e.g., tone policing or DARVO - -Investigating CoC violations in resource allocation decisions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -CoC violations when making resource allocation decisions are rarely accompanied by violation of communicative norms. - -Such CoC violations can be committed by an individual, or by a networked group of individuals. - -CoC investigations must focus on the facts. -For this, it is helpful to be aware of common obfuscation and misinformation tactics such as: -DARVO, intimidation, noise generation, and plausible deniability. - -Important pieces of circumstantial evidence for a CoC violation are: - -* cui bono - the individual or group influencing the decision being a direct or indirect - beneficiary of the changed/influenced resource allocation decision -* substantial "value" of the resource allocation decision in question, e.g., in the order of a month's living wages or above -* premeditation, i.e., indication of substantial, careful consideration and planning to change the outcome of decision making to one's benefit, - or prevent/hinder decision making by the ``sktime`` community -* removing the decision making capacity from the ``sktime`` community, - e.g., by creating accomplished facts or not complying with ``sktime`` community decisions. -* obfuscation of the paper trail, e.g., soft refusal or inability to provide paper documentation such as budgets, invoices or reports. -* hindering of the investigation itself, e.g., by not engaging with it to the fullest extent possible, creating community dissent around it, etc -* promoting decision making on ``sktime`` governance that would weaken accountability, - transparency, decision making capacity, investigation practices, or adherence to best practice in resource allocation -* despite better knowledge - evidence that the decision maker is aware of best practice and nonetheless acts against it. - E.g., if best practice has been pointed out clearly and explicitly to the decision maker prior to the decision. - Note: this does not affect the general point that ignorance of the CoC is no excuse. +so long as the attribution to `The Turing +Way `__ project +with Dr Kirstie Whitaker as the principal investigator is retained, +along with the attribution to the Carpentries, The Alan Turing Institute +Data Study Group organising team, Citizen Lab and the other resources. \ No newline at end of file diff --git a/docs/source/get_involved/governance.rst b/docs/source/get_involved/governance.rst index 95b35fbb00..3b64322793 100644 --- a/docs/source/get_involved/governance.rst +++ b/docs/source/get_involved/governance.rst @@ -179,7 +179,7 @@ give up their rights and responsibilities as algorithm maintainers. Unresponsiveness is defined as: * not engaging with decision making procedures within the reasonably time frames defined there -* not reacting to issues or bug reports related to the algorithm, within ten working days +* not reacting to issues or bug reports related to the algorithm, within 14 days .. _core-developers: @@ -232,8 +232,7 @@ place on the project’s private communication channels. The vote will be anonymous. While it is expected that most votes will be unanimous, a 2/3 majority of -the cast votes is enough. The vote needs to be open for five days excluding -weekends. +the cast votes is enough. The vote needs to be open for 7 days. End of tenure ^^^^^^^^^^^^^ @@ -287,9 +286,8 @@ Appointment ^^^^^^^^^^^ Membership of the CoC is by nomination by a core developer and a vote by -all core developers. A nomination will result in discussion which will stay -open for 5 days excluding weekends and then a vote by the core -developers which will stay open for 5 days excluding weekends. CoC committee +all core developers. A vote by the core +developers will stay open for 7 days. CoC committee membership votes are subject to: * a 2/3 majority of all cast votes, and @@ -301,6 +299,14 @@ anonymous. To avoid deadlocks if there is an even number of CoC committee members, one of them will have a tie breaking privilege. +End of tenure +^^^^^^^^^^^^^ + +CoC members can resign voluntarily at any point in time, by informing the CoC or core developers in writing. + +CoC membership ends automatically after a person has been in this role for 2 years. If the person would like to continue the membership of CoC, a new appointment vote has to happen following the procedure as for appointment of a new CoC member. +The vote can be requested by the affected CoC member or by any core developer. + .. _cc-members: CC members @@ -336,8 +342,7 @@ Eligibility Only core developers are eligible for appointment as CC members. Non-core-developers can be nominated, but this must be accompanied -by a nomination for core developer, and a core developer appointment vote -concurrent with the 5 day discussion period (see below). +by a nomination for core developer. .. _appointment-3: @@ -345,9 +350,8 @@ Appointment ^^^^^^^^^^^ Appointment to the CC is by nomination by a core developer and a vote by -all core developers. A nomination will result in discussion which stays open -for 5 days excluding weekends and then a vote by core developers which will -stay open for 5 days excluding weekends. Approval of appointment requires: +all core developers. A vote by core developers will +stay open 7 days. Approval of appointment requires: * a 2/3 majority of all cast votes, and * a simple majority approval of all the current CC members. @@ -367,7 +371,7 @@ and need not follow their anonymous vote if they have voted. End of tenure ^^^^^^^^^^^^^ -CC members can resign voluntarily at any point in time, by informing the CC in writing. +CC members can resign voluntarily at any point in time, by informing the CC or core developers in writing. CC members who do not actively engage with their CC member role responsibilities are expected to resign voluntarily. @@ -375,6 +379,9 @@ expected to resign voluntarily. Tenure also ends automatically when a CC member's tenure as core developer ends, e.g., through resignation or inactivity. +CC membership ends automatically after a person has been in this role for 2 years. If the person would like to continue the membership of CC, a new appointment vote has to happen following the procedure as for appointment of a new CC member. +The vote can be requested by the affected CC member or by any core developer. + Communications ^^^^^^^^^^^^^^ @@ -421,7 +428,7 @@ Appointment ^^^^^^^^^^^ Membership of the CC observers is by nomination by a core developer and a vote by CC members. A nomination will result in a vote by the CC members which will -stay open for 5 days excluding weekends. CC observer membership votes are +stay open for 7 days. CC observer membership votes are subject to a simple majority approval of all the current CC committee members. In case of ties, the CC member with shortest tenure breaks the tie. @@ -493,14 +500,14 @@ core developers. - Proposers are expected to give reasonable time for consideration, that is, time and opportunity for core developers to review and give their opinion on the PR. - Ten working days excluding week-ends constitute "reasonable time" in the above sense. + 14 days constitute "reasonable time" in the above sense. The period resets at every new change made to the PR. It starts only when all GitHub checks pass. - During this period, the PR can be merged if it has an approval and no rejection, but should be reverted if it receives a rejection in addition. - If the "reasonable time" period elapses and no approval or rejection has been expressed on a PR, the PR is scheduled at the top of agenda for the next developer meetup. - In that meeting, a core developer is assigned to review the PR and either approve or reject within five days of the meeting excluding weekends. + In that meeting, a core developer is assigned to review the PR and either approve or reject within 7 days of the meeting. Failure of lazy consensus, in the above sense, can arise only under the following condition: at least one approval and at least one rejection in the PR. @@ -519,7 +526,7 @@ Voting takes place: - The start of a voting period after stage 1 is at the moment the lazy consensus fails. - Start and end time of the vote must be announced in the core developer channel, and on the PR (if on a PR). -- The vote will conclude 5 days excluding weekends from the call for the vote. +- The vote will conclude 7 days from the call for the vote. - Votes are voluntary. Abstentions are allowed. Core developers can abstain by simply not casting a vote. - All votes are a binary vote: for or against accepting the proposal. @@ -546,12 +553,12 @@ If the proposed change cannot gather a 2/3 majority of the votes cast, the CC tries to resolve the deadlock. - The CC will use consensus seeking. -- If no consensus can be found within twenty working days excluding weekends - since the beginning of the stage-1 "reasonable time for consideration" period, +- If no consensus can be found within 21 days since the beginning of the +- stage-1 "reasonable time for consideration" period, the decision is made through a simple majority vote (with tie breaking) among the CC members. - Any proposal reaching stage 3 must be supported by an :ref:`steps`, - which has been made public at least 5 days, excluding weekends, before the vote. + which has been made public at least 7 days, before the vote. .. _steps: @@ -563,7 +570,7 @@ sktime enhancement proposals (STEPs) are required for: * certain types of proposed changes, by default, see `decision making process <#Decision-making>`__ * for all stage 3 decisions -If a STEP is required by a vote, it must have been made public at least 5 working days (excluding week-ends) before that vote. +If a STEP is required by a vote, it must have been made public at least 7 days before that vote. A STEP is a consolidated document, with a concise problem statement, a clear description of the proposed solution and a @@ -681,7 +688,7 @@ In addition, we plan to add more roles for managing/coordinating specific project: * Community manager (mentorship, outreach, social media, etc), -* Sub-councils for project-specific technical leadership (e.g.  for documentation, learning tasks, continuous integration) +* Sub-councils for project-specific technical leadership (e.g. for documentation, learning tasks, continuous integration) .. _references: @@ -696,4 +703,4 @@ structures. In particular, we’d like to acknowledge: * `The Art of Community `__ by Jono Bacon * The `astropy `__ project * The `nipy `__ project -* The `scikit-hep `__ project +* The `scikit-hep `__ project \ No newline at end of file diff --git a/docs/source/images/sktime-favicon.ico b/docs/source/images/sktime-favicon.ico index 078f5ba848..9b7cc204ca 100644 Binary files a/docs/source/images/sktime-favicon.ico and b/docs/source/images/sktime-favicon.ico differ diff --git a/docs/source/images/sktime-logo-no-text-vertical.png b/docs/source/images/sktime-logo-no-text-vertical.png index ce16bb0e65..f9325dc6dd 100644 Binary files a/docs/source/images/sktime-logo-no-text-vertical.png and b/docs/source/images/sktime-logo-no-text-vertical.png differ diff --git a/docs/source/images/sktime-logo-no-text.jpg b/docs/source/images/sktime-logo-no-text.jpg index 1e0a39119f..4e8f09888a 100644 Binary files a/docs/source/images/sktime-logo-no-text.jpg and b/docs/source/images/sktime-logo-no-text.jpg differ diff --git a/docs/source/images/sktime-logo-text-horizontal.png b/docs/source/images/sktime-logo-text-horizontal.png index 070bb31a76..2047e533c7 100644 Binary files a/docs/source/images/sktime-logo-text-horizontal.png and b/docs/source/images/sktime-logo-text-horizontal.png differ diff --git a/docs/source/images/sktime-logo.jpg b/docs/source/images/sktime-logo.jpg index c4a510d96e..1504111235 100644 Binary files a/docs/source/images/sktime-logo.jpg and b/docs/source/images/sktime-logo.jpg differ diff --git a/sktime/classification/dictionary_based/_boss.py b/sktime/classification/dictionary_based/_boss.py index 96a271b4f4..a977330b26 100644 --- a/sktime/classification/dictionary_based/_boss.py +++ b/sktime/classification/dictionary_based/_boss.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- """BOSS classifiers. -Dictionary based BOSS classifiers based on SFA transform. Contains a single -BOSS and a BOSS ensemble. +Dictionary based BOSS classifiers based on SFA transform. +Contains a single BOSS and a BOSS ensemble. """ -__author__ = ["MatthewMiddlehurst", "patrickzib"] +__author__ = ["patrickzib", "MatthewMiddlehurst"] __all__ = ["BOSSEnsemble", "IndividualBOSS", "pairwise_distances"] from itertools import compress @@ -63,7 +63,7 @@ class BOSSEnsemble(BaseClassifier): save_train_predictions : bool, default=False Save the ensemble member train predictions in fit for use in _get_train_probs leave-one-out cross-validation. - alphabet_size : default = 2 + alphabet_size : default = 4 Number of possible letters (values) for each word. n_jobs : int, default=1 The number of jobs to run in parallel for both `fit` and `predict`. @@ -140,7 +140,7 @@ def __init__( save_train_predictions=False, feature_selection="none", use_boss_distance=True, - alphabet_size=2, + alphabet_size=4, n_jobs=1, random_state=None, ): @@ -478,7 +478,7 @@ class IndividualBOSS(BaseClassifier): Length of word to use to use in BOSS algorithm. norm : bool, default = False Whether to normalize words by dropping the first Fourier coefficient. - alphabet_size : default = 2 + alphabet_size : default = 4 Number of possible letters (values) for each word. save_words : bool, default = True Whether to keep NumPy array of words in SFA transformation even after @@ -535,7 +535,7 @@ def __init__( window_size=10, word_length=8, norm=False, - alphabet_size=2, + alphabet_size=4, save_words=False, typed_dict="deprecated", use_boss_distance=True, diff --git a/sktime/classification/dictionary_based/_cboss.py b/sktime/classification/dictionary_based/_cboss.py index 1bdea2fee8..71a1a1e3ba 100644 --- a/sktime/classification/dictionary_based/_cboss.py +++ b/sktime/classification/dictionary_based/_cboss.py @@ -11,7 +11,6 @@ import math import time -import warnings import numpy as np from sklearn.utils import check_random_state @@ -63,15 +62,6 @@ class ContractableBOSS(BaseClassifier): contract_max_n_parameter_samples : int, default=np.inf Max number of parameter combinations to consider when time_limit_in_minutes is set. - typed_dict : bool, default="deprecated" - Use a numba TypedDict to store word counts. May increase memory usage, but will - be faster for larger datasets. As the Dict cannot be pickled currently, there - will be some overhead converting it to a python dict with multiple threads and - pickling. - - .. deprecated:: 0.13.3 - ``typed_dict`` was deprecated in version 0.13.3 and will be removed in 0.15. - save_train_predictions : bool, default=False Save the ensemble member train predictions in fit for use in _get_train_probs leave-one-out cross-validation. @@ -153,7 +143,6 @@ def __init__( min_window=10, time_limit_in_minutes=0.0, contract_max_n_parameter_samples=np.inf, - typed_dict="deprecated", save_train_predictions=False, feature_selection="none", n_jobs=1, @@ -166,7 +155,6 @@ def __init__( self.time_limit_in_minutes = time_limit_in_minutes self.contract_max_n_parameter_samples = contract_max_n_parameter_samples - self.typed_dict = typed_dict self.save_train_predictions = save_train_predictions self.n_jobs = n_jobs self.random_state = random_state @@ -215,12 +203,6 @@ def _fit(self, X, y): self.estimators_ = [] self.weights_ = [] - if self.typed_dict != "deprecated": - warnings.warn( - "``typed_dict`` was deprecated in version 0.13.3 and " - "will be removed in 0.15." - ) - # Window length parameter space dependent on series length max_window_searches = self.series_length_ / 4 max_window = int(self.series_length_ * self.max_win_len_prop) diff --git a/sktime/classification/dictionary_based/_weasel.py b/sktime/classification/dictionary_based/_weasel.py index a4de69ca8c..375d323b7f 100644 --- a/sktime/classification/dictionary_based/_weasel.py +++ b/sktime/classification/dictionary_based/_weasel.py @@ -68,20 +68,23 @@ class WEASEL(BaseClassifier): This is the p-value threshold to use for chi-squared test on bag-of-words (lower means more strict). 1 indicates that the test should not be performed. - alphabet_size : default = 2 + alphabet_size : default = 4 Number of possible letters (values) for each word. feature_selection: {"chi2", "none", "random"}, default: chi2 - Sets the feature selections strategy to be used. *Chi2* reduces the number - of words significantly and is thus much faster (preferred). If set to chi2, - p_threshold is applied. *Random* also reduces the number significantly. - *None* applies not feature selectiona and yields large bag of words, - e.g. much memory may be needed. + Sets the feature selections strategy to be used. Large amounts of memory may be + needed depending on the setting of bigrams (true is more) or + alpha (larger is more). + 'chi2' reduces the number of words, keeping those above the 'p_threshold'. + 'random' reduces the number to at most 'max_feature_count', + by randomly selecting features. + 'none' does not apply any feature selection and yields large bag of words support_probabilities: bool, default: False If set to False, a RidgeClassifierCV will be trained, which has higher accuracy and is faster, yet does not support predict_proba. If set to True, a LogisticRegression will be trained, which does support predict_proba(), yet is slower and typically less accuracy. predict_proba() is needed for example in Early-Classification like TEASER. + random_state: int or None, default=None Seed for random, integer @@ -133,7 +136,7 @@ def __init__( binning_strategy="information-gain", window_inc=2, p_threshold=0.05, - alphabet_size=2, + alphabet_size=4, n_jobs=1, feature_selection="chi2", support_probabilities=False, @@ -248,6 +251,11 @@ def _fit(self, X, y): ) self.clf.fit(all_words, y) + + self.total_features_count = all_words.shape[1] + if hasattr(self.clf, "best_score_"): + self.cross_val_score = self.clf.best_score_ + return self def _predict(self, X) -> np.ndarray: diff --git a/sktime/classification/dictionary_based/tests/test_boss.py b/sktime/classification/dictionary_based/tests/test_boss.py index 14ffb05f5b..3089be4bd8 100644 --- a/sktime/classification/dictionary_based/tests/test_boss.py +++ b/sktime/classification/dictionary_based/tests/test_boss.py @@ -1,73 +1,25 @@ # -*- coding: utf-8 -*- """BOSS test code.""" import numpy as np -import pytest +from sklearn.metrics import accuracy_score -from sktime.classification.dictionary_based import BOSSEnsemble, IndividualBOSS +from sktime.classification.dictionary_based import BOSSEnsemble from sktime.datasets import load_unit_test -@pytest.fixture -def dataset(): - """ - Load unit_test train and test data set from sktime. - - :return: tuple, (X_train, y_train, X_test, y_test). - """ - X_train, y_train = load_unit_test(split="train") - X_test, y_test = load_unit_test(split="test") - return (X_train, y_train, X_test, y_test) - - -@pytest.mark.parametrize( - "new_class,expected_dtype", - [ - ({"1": "Class1", "2": "Class2"}, object), - ({"1": 1, "2": 2}, int), - ({"1": 1.0, "2": 2.0}, float), - ({"1": True, "2": False}, bool), - ], -) -def test_individual_boss_classes(dataset, new_class, expected_dtype): - """Test Individual BOSS on unit_test data with different datatypes as classes.""" +def test_boss_train_estimate(): + """Test of BOSS train estimate on unit test data.""" # load unit test data - X_train, y_train, X_test, y_test = dataset - - # change class - y_train = np.array([new_class[y] for y in y_train]) - - # train iboss and predict X_test - iboss = IndividualBOSS() - iboss.fit(X_train, y_train) - y_pred = iboss.predict(X_test) - - # assert class type and names - assert y_pred.dtype == expected_dtype - assert set(y_pred) == set(y_train) - - -@pytest.mark.parametrize( - "new_class,expected_dtype", - [ - ({"1": "Class1", "2": "Class2"}, "= 0.6 diff --git a/sktime/transformations/panel/dictionary_based/_sfa_fast.py b/sktime/transformations/panel/dictionary_based/_sfa_fast.py index e86422239d..4fcfaccbf7 100644 --- a/sktime/transformations/panel/dictionary_based/_sfa_fast.py +++ b/sktime/transformations/panel/dictionary_based/_sfa_fast.py @@ -9,7 +9,6 @@ __all__ = ["SFAFast"] import math -import multiprocessing import sys from warnings import simplefilter @@ -21,7 +20,6 @@ njit, objmode, prange, - set_num_threads, ) from numba.core import types from numba.typed import Dict @@ -83,17 +81,25 @@ class SFAFast(BaseTransformer): variance. If False, the first Fourier coefficients are selected. Only applicable if labels are given + dilation: int, default = 0 + When set to dilation > 1, adds dilation to the sliding window operation. + save_words: boolean, default = False whether to save the words generated for each series (default False) bigrams: boolean, default = False - whether to create bigrams of SFA words - - feature_selection: {"chi2", "none", "random"}, default: chi2 - Sets the feature selections strategy to be used. Chi2 reduces the number - of words significantly and is thus much faster (preferred). Random also - reduces the number significantly. None applies not feature selectiona and - yields large bag of words, e.g. much memory may be needed. + whether to create bigrams of SFA words. + + feature_selection: {"chi2", "chi2_top_k", "none", "random"}, default: none + Sets the feature selections strategy to be used. Large amounts of memory + may be needed depending on the setting of bigrams (true is more) or + alpha (larger is more). + 'chi2' reduces the number of words, keeping those above the 'p_threshold'. + 'chi2_top_k' reduces the number of words to at most 'max_feature_count', + dropping values based on p-value. + 'random' reduces the number to at most 'max_feature_count', + by randomly selecting features. + 'none' does not apply any feature selection and yields large bag of words, p_threshold: int, default=0.05 (disabled by default) If feature_selection=chi2 is chosen, feature selection is applied based on @@ -165,6 +171,8 @@ def __init__( remove_repeat_words=False, lower_bounding=True, save_words=False, + dilation=0, + first_difference=False, feature_selection="none", max_feature_count=256, p_threshold=0.05, @@ -204,6 +212,9 @@ def __init__( self.series_length = 0 self.letter_bits = 0 + self.dilation = dilation + self.first_difference = first_difference + # Feature selection part self.feature_selection = feature_selection self.max_feature_count = max_feature_count @@ -217,13 +228,6 @@ def __init__( self.return_pandas_data_series = return_pandas_data_series self.random_state = random_state - - if self.n_jobs < 1 or self.n_jobs > multiprocessing.cpu_count(): - n_jobs = multiprocessing.cpu_count() - else: - n_jobs = self.n_jobs - set_num_threads(n_jobs) - super(SFAFast, self).__init__() if not return_pandas_data_series: @@ -265,12 +269,17 @@ def fit_transform(self, X, y=None): X = check_X(X, enforce_univariate=True, coerce_to_numpy=True) X = X.squeeze(1) - self.n_instances, self.series_length = X.shape - self.breakpoints = self._binning(X, y) + if self.dilation >= 1 or self.first_difference: + X2, self.X_index = _dilation(X, self.dilation, self.first_difference) + else: + X2, self.X_index = X, np.arange(X.shape[-1]) + + self.n_instances, self.series_length = X2.shape + self.breakpoints = self._binning(X2, y) self._is_fitted = True words, dfts = _transform_case( - X, + X2, self.window_size, self.dft_length, self.word_length_actual, @@ -328,8 +337,13 @@ def transform(self, X, y=None): X = check_X(X, enforce_univariate=True, coerce_to_numpy=True) X = X.squeeze(1) + if self.dilation >= 1 or self.first_difference: + X2, self.X_index = _dilation(X, self.dilation, self.first_difference) + else: + X2, self.X_index = X, np.arange(X.shape[-1]) + words, dfts = _transform_case( - X, + X2, self.window_size, self.dft_length, self.word_length_actual, @@ -358,11 +372,11 @@ def transform(self, X, y=None): # transform bags = create_bag_transform( + self.X_index, self.feature_count, self.feature_selection, self.relevant_features if self.relevant_features else empty_dict, words, - self.bigrams, self.remove_repeat_words, )[0] @@ -380,9 +394,12 @@ def transform_to_bag(self, words, word_len, y=None): rng = check_random_state(self.random_state) if self.feature_selection == "none" and ( - self.breakpoints.shape[1] <= 2 and not self.bigrams + self.breakpoints.shape[1] <= 2 + and not self.bigrams + and self.word_length <= 8 ): bag_of_words = create_bag_none( + self.X_index, self.breakpoints, words.shape[0], words, @@ -396,6 +413,7 @@ def transform_to_bag(self, words, word_len, y=None): feature_count = len(list(feature_names)) relevant_features_idx = np.arange(feature_count, dtype=np.uint32) bag_of_words, self.relevant_features = create_bag_feature_selection( + self.X_index, words.shape[0], relevant_features_idx, np.array(list(feature_names)), @@ -410,6 +428,7 @@ def transform_to_bag(self, words, word_len, y=None): len(feature_names), replace=False, size=feature_count ) bag_of_words, self.relevant_features = create_bag_feature_selection( + self.X_index, words.shape[0], relevant_features_idx, np.array(list(feature_names)), @@ -417,29 +436,40 @@ def transform_to_bag(self, words, word_len, y=None): self.remove_repeat_words, ) - # Chi-squared feature selection - elif self.feature_selection == "chi2": - feature_count = len(list(feature_names)) + # Chi-squared feature selection taking + # a) the top-k features + # b) a p-threshold + elif ( + self.feature_selection == "chi2_top_k" + or self.feature_selection == "chi2" + ): + feature_names_array = np.array(list(feature_names)) + feature_count = len(feature_names_array) relevant_features_idx = np.arange(feature_count, dtype=np.uint32) bag_of_words, _ = create_bag_feature_selection( + self.X_index, words.shape[0], relevant_features_idx, - np.array(list(feature_names)), + feature_names_array, words, self.remove_repeat_words, ) + # apply chi2-based feature selection chi2_statistics, p = chi2(bag_of_words, y) - relevant_features_idx = np.where(p <= self.p_threshold)[0] - self.relevant_features = Dict.empty( - key_type=types.uint32, value_type=types.uint32 - ) - for k, v in zip( - np.array(list(feature_names))[relevant_features_idx], - np.arange(len(relevant_features_idx)), - ): - self.relevant_features[k] = v + # p-threshold using 'p_threshold' + if self.feature_selection == "chi2": + relevant_features_idx = np.where(p <= self.p_threshold)[0] + + # top-k using 'max_feature_count' + else: + relevant_features_idx = np.argsort(p)[: self.max_feature_count] + + self.relevant_features = create_dict( + feature_names_array[relevant_features_idx], + np.arange(len(relevant_features_idx), dtype=np.uint32), + ) # select subset of features bag_of_words = bag_of_words[:, relevant_features_idx] @@ -707,7 +737,7 @@ def _fast_fourier_transform(X, norm, dft_length, inverse_sqrt_win_size): for i in range(len(stds)): stds[i] = np.std(X[i]) # stds = np.std(X, axis=1) # not available in numba - stds = np.where(stds < 1e-8, 1e-8, stds) + stds = np.where(stds < 1e-8, 1, stds) with objmode(X_ffts="complex128[:,:]"): X_ffts = np.fft.rfft(X, axis=1) # complex128 @@ -791,7 +821,7 @@ def _calc_incremental_mean_std(series, end, window_size): r_window_length = 1.0 / window_size mean = series_sum * r_window_length buf = math.sqrt(max(square_sum * r_window_length - mean * mean, 0.0)) - stds[0] = buf if buf > 1e-8 else 1e-8 + stds[0] = buf if buf > 1e-8 else 1 for w in range(1, end): series_sum += series[w + window_size - 1] - series[w - 1] @@ -801,7 +831,7 @@ def _calc_incremental_mean_std(series, end, window_size): - series[w - 1] * series[w - 1] ) buf = math.sqrt(max(square_sum * r_window_length - mean * mean, 0.0)) - stds[w] = buf if buf > 1e-8 else 1e-8 + stds[w] = buf if buf > 1e-8 else 1 return stds @@ -869,15 +899,6 @@ def generate_words( return words -@njit(cache=True, fastmath=True) -def create_feature_names(sfa_words): - feature_names = set() - for t_words in sfa_words: - for t_word in t_words: - feature_names.add(t_word) - return feature_names - - @njit(fastmath=True, cache=True) def _mft( X, @@ -958,9 +979,54 @@ def _mft( ] +def _dilation(X, d, first_difference): + padding = np.zeros((len(X), 10)) + X = np.concatenate((padding, X, padding), axis=1) + + # using only first order differences + if first_difference: + X = np.diff(X, axis=1, prepend=0) + + # adding dilation + X_dilated = _dilation2(X, d) + X_index = _dilation2( + np.arange(X_dilated.shape[-1], dtype=np.float_).reshape(1, -1), d + )[0] + + return ( + X_dilated, + X_index, + ) + + +@njit(cache=True, fastmath=True) +def _dilation2(X, d): + # dilation on actual data + if d > 1: + start = 0 + data = np.zeros(X.shape, dtype=np.float_) + for i in range(0, d): + curr = X[:, i::d] + end = curr.shape[1] + data[:, start : start + end] = curr + start += end + return data + else: + return X.astype(np.float_) + + +@njit(cache=True, fastmath=True) +def create_feature_names(sfa_words): + feature_names = set() + for t_words in sfa_words: + for t_word in t_words: + feature_names.add(t_word) + return feature_names + + @njit(cache=True, fastmath=True) def create_bag_none( - breakpoints, n_instances, sfa_words, word_length, remove_repeat_words + X_index, breakpoints, n_instances, sfa_words, word_length, remove_repeat_words ): feature_count = np.uint32(breakpoints.shape[1] ** word_length) all_win_words = np.zeros((n_instances, feature_count), dtype=np.uint32) @@ -980,7 +1046,12 @@ def create_bag_none( @njit(cache=True, fastmath=True) def create_bag_feature_selection( - n_instances, relevant_features_idx, feature_names, sfa_words, remove_repeat_words + X_index, + n_instances, + relevant_features_idx, + feature_names, + sfa_words, + remove_repeat_words, ): relevant_features = Dict.empty(key_type=types.uint32, value_type=types.uint32) for k, v in zip( @@ -1003,14 +1074,13 @@ def create_bag_feature_selection( @njit(cache=True, fastmath=True) def create_bag_transform( + X_index, feature_count, feature_selection, relevant_features, sfa_words, - bigrams, remove_repeat_words, ): - # merging arrays all_win_words = np.zeros((len(sfa_words), feature_count), np.uint32) for j in prange(sfa_words.shape[0]): if len(relevant_features) == 0 and feature_selection == "none": @@ -1035,6 +1105,18 @@ def create_bag_transform( return all_win_words, all_win_words.shape[1] +@njit(fastmath=True, cache=True) +def create_dict(feature_names, features_idx): + relevant_features = Dict.empty(key_type=types.uint32, value_type=types.uint32) + for k, v in zip( + feature_names[features_idx], + np.arange(len(features_idx), dtype=np.uint32), + ): + relevant_features[k] = v + + return relevant_features + + @njit(fastmath=True, cache=True) def shorten_words(words, amount, letter_bits): new_words = np.zeros((words.shape[0], words.shape[1]), dtype=np.uint32) diff --git a/sktime/utils/validation/__init__.py b/sktime/utils/validation/__init__.py index 551fe87eb2..4e39be3a0e 100644 --- a/sktime/utils/validation/__init__.py +++ b/sktime/utils/validation/__init__.py @@ -126,24 +126,28 @@ def all_inputs_are_time_like(args: list) -> bool: def check_n_jobs(n_jobs: int) -> int: """Check `n_jobs` parameter according to the scikit-learn convention. + https://scikit-learn.org/stable/glossary.html#term-n_jobs + Parameters ---------- - n_jobs : int, positive or -1 + n_jobs : int or None The number of jobs for parallelization. + If None or 0, 1 is used. + If negative, (n_cpus + 1 + n_jobs) is used. In such a case, -1 would use all + available CPUs and -2 would use all but one. If the number of CPUs used would + fall under 1, 1 is returned instead. Returns ------- n_jobs : int - Checked number of jobs. + The number of threads to be used. """ - # scikit-learn convention - # https://scikit-learn.org/stable/glossary.html#term-n-jobs - if n_jobs is None: + if n_jobs is None or n_jobs == 0: return 1 elif not is_int(n_jobs): raise ValueError(f"`n_jobs` must be None or an integer, but found: {n_jobs}") elif n_jobs < 0: - return os.cpu_count() - n_jobs + 1 + return max(1, os.cpu_count() + 1 + n_jobs) else: return n_jobs