diff --git a/.cursor/Dockerfile b/.cursor/Dockerfile new file mode 100644 index 0000000..695fb6e --- /dev/null +++ b/.cursor/Dockerfile @@ -0,0 +1,29 @@ +FROM debian:bullseye-slim + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + bash \ + build-essential \ + ca-certificates \ + curl \ + firefox-esr \ + git \ + libbz2-dev \ + libffi-dev \ + libfreetype6-dev \ + libjpeg62-turbo-dev \ + liblzma-dev \ + libncursesw5-dev \ + libreadline-dev \ + libsqlite3-dev \ + libssl-dev \ + libtiff5-dev \ + libxml2-dev \ + libxslt1-dev \ + tk-dev \ + uuid-dev \ + wget \ + xvfb \ + xz-utils \ + zlib1g-dev && \ + rm -rf /var/lib/apt/lists/* diff --git a/.cursor/environment.json b/.cursor/environment.json new file mode 100644 index 0000000..fa729ce --- /dev/null +++ b/.cursor/environment.json @@ -0,0 +1,10 @@ +{ + "build": { + "dockerfile": "Dockerfile", + "context": ".." + }, + "install": "bash ./scripts/bootstrap-agent-env.sh", + "env": { + "PIP_DISABLE_PIP_VERSION_CHECK": "1" + } +} diff --git a/.gitignore b/.gitignore index 1bf5bef..c2fba25 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.pyc *.png +.venv/ diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..a616492 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +2.7.18 diff --git a/docs/environment.md b/docs/environment.md new file mode 100644 index 0000000..f7a5866 --- /dev/null +++ b/docs/environment.md @@ -0,0 +1,46 @@ +# Environment setup + +This repository is a legacy Python 2 automation project. The source files use +Python 2 syntax (`print` statements, `urllib2`, `StringIO`) and will not run on +the default Python 3 toolchain that ships with most modern cloud agents. + +## What the repo-local Cursor environment does + +- builds a Debian Bullseye image with the native libraries needed by lxml, + Pillow, and the browser automation scripts; +- installs Firefox ESR and Xvfb for Selenium-driven flows; +- bootstraps `pyenv` and uses it to install Python 2.7.18; +- creates a repo-local virtualenv in `.venv`; +- installs the pinned Python dependencies from `requirements.txt`. + +The entry point for Cursor cloud agents is `.cursor/environment.json`, which +runs `scripts/bootstrap-agent-env.sh` during environment setup. + +## Manual bootstrap + +If you need to reproduce the setup in a shell, first make sure the native build +packages from `.cursor/Dockerfile` are installed on the host. The bootstrap +script assumes those headers and libraries already exist. + +Then run: + +```bash +bash ./scripts/bootstrap-agent-env.sh +source .venv/bin/activate +python -V +``` + +Expected Python version: + +```text +Python 2.7.18 +``` + +## Notes + +- `formasaurus` is included in `requirements.txt` because it is imported by + `registration_form_filler.py` but was previously undeclared. +- The Selenium dependency is very old (`2.48.0`). Firefox is provided so the + browser scripts have the expected runtime pieces, but the demo spiders may + still need code modernization before they work reliably against modern sites + and drivers. diff --git a/requirements.txt b/requirements.txt index 6824e6d..927a327 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ beautifulsoup4==4.4.1 fake-factory==0.5.3 +formasaurus==0.2 lxml==3.4.4 Pillow==3.0.0 requests==2.8.1 diff --git a/scripts/bootstrap-agent-env.sh b/scripts/bootstrap-agent-env.sh new file mode 100755 index 0000000..2fa70a9 --- /dev/null +++ b/scripts/bootstrap-agent-env.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PYENV_ROOT="${PYENV_ROOT:-$HOME/.pyenv}" +PYTHON_VERSION="2.7.18" +VENV_DIR="$REPO_ROOT/.venv" + +export PYENV_ROOT +export PATH="$PYENV_ROOT/bin:$PATH" + +if [ ! -d "$PYENV_ROOT" ]; then + git clone --depth 1 --single-branch "$PYENV_ROOT" +fi + +PYENV_BIN="$PYENV_ROOT/bin/pyenv" +if ! "$PYENV_BIN" install -s "$PYTHON_VERSION"; then + printf '%s\n' \ + "Python $PYTHON_VERSION failed to build." \ + "This bootstrap expects the system packages from .cursor/Dockerfile, or an" \ + "equivalent host setup that provides OpenSSL, SQLite, bzip2, readline, and" \ + "other Python 2 build headers." \ + "For Cursor cloud agents, run the repo through .cursor/environment.json so" \ + "the image is built before install runs." >&2 + exit 1 +fi + +PYTHON_BIN="$("$PYENV_BIN" prefix "$PYTHON_VERSION")/bin/python2.7" +"$PYTHON_BIN" -m ensurepip --upgrade >/dev/null 2>&1 || true + +"$PYTHON_BIN" -m pip install --upgrade \ + "pip<21" \ + "setuptools<45" \ + "virtualenv<20.22" \ + "wheel<1" + +recreate_venv=0 +if [ ! -x "$VENV_DIR/bin/python" ]; then + recreate_venv=1 +else + version_output="$("$VENV_DIR/bin/python" -V 2>&1 || true)" + case "$version_output" in + "Python 2.7."*) + ;; + *) + recreate_venv=1 + ;; + esac +fi + +if [ "$recreate_venv" -eq 1 ]; then + rm -rf "$VENV_DIR" + "$PYTHON_BIN" -m virtualenv "$VENV_DIR" +fi + +"$VENV_DIR/bin/pip" install --upgrade \ + "pip<21" \ + "setuptools<45" \ + "wheel<1" +"$VENV_DIR/bin/pip" install -r "$REPO_ROOT/requirements.txt" + +PROFILE_FILE="$HOME/.autoregister-agent-env" +cat > "$PROFILE_FILE" </dev/null 2>&1; then + eval "\$(pyenv init - 2>/dev/null || true)" +fi +EOF + +if [ -f "$HOME/.bashrc" ]; then + bashrc_contents="$(<"$HOME/.bashrc")" + case "$bashrc_contents" in + *autoregister-agent-env*) + ;; + *) + if ! printf '\n[ -f "%s" ] && . "%s"\n' "$PROFILE_FILE" "$PROFILE_FILE" >> "$HOME/.bashrc"; then + echo "Warning: Failed to update .bashrc. You may need to manually source $PROFILE_FILE" >&2 + fi + ;; + esac +fi