You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I played a bit with Paperless NGX which seems to be a very nice and popular document management system (easy to install on a raspberry).
Only as a proof of concept, I vibe coded a small python script that polls a TruSpace workspace and inserts new files a Paperless folder. Works like a charm on a raspberry 💯
#!/usr/bin/env python3"""paperless_sync_truspace.pyTruSpace → Paperless-ngx bridge- Lists documents from TruSpace: /api/documents?workspace=...- Detects new/updated by (docId, meta.version)- Downloads file bytes via: /api/documents/version/{cid} (preferred on your instance) (falls back to several docId routes and then IPFS gateway if provided)- Uploads to Paperless-ngx via: /api/documents/post_document/- Tracks progress in a local state file (no duplicate uploads)ENV or flags: # TruSpace TRUSPACE_URL (default http://smartspace.local:8000) TRUSPACE_WORKSPACE_ID (required if not passed via --workspace-id) TRUSPACE_CSRFTOKEN TRUSPACE_SESSIONID TRUSPACE_AUTH_TOKEN TRUSPACE_LOGIN_JSON TRUSPACE_IPFS_GATEWAY (optional, default http://smartspace.local:8080) # Paperless PAPERLESS_URL (required) PAPERLESS_TOKEN (required) PAPERLESS_STORAGE_PATH_ID (optional) # General PAPERLESS_INTERVAL (seconds, default 600) STATE_FILE (default .truspace_paperless_state.json) LOG_LEVEL (default INFO)"""importargparseimportjsonimportloggingimportosfrompathlibimportPathimportsignalimportsysimporttimefromtypingimportDict, List, OptionalimportrequestsSTATE_FILE_NAME=".truspace_paperless_state.json"STOP=False# -------------------- signal handling --------------------def_sigterm_handler(signum, frame):
globalSTOPSTOP=Truelogging.info("Received signal %s; will exit after this iteration.", signum)
forsigin (signal.SIGINT, signal.SIGTERM):
try:
signal.signal(sig, _sigterm_handler)
exceptException:
pass# -------------------- state helpers --------------------defload_state(state_path: Path) ->Dict[str, Dict]:
ifstate_path.exists():
try:
returnjson.loads(state_path.read_text(encoding="utf-8"))
exceptException:
logging.warning("State file corrupted; starting fresh.")
return {}
defsave_state(state_path: Path, state: Dict[str, Dict]) ->None:
tmp=state_path.with_suffix(".tmp")
tmp.write_text(json.dumps(state, indent=2, sort_keys=True), encoding="utf-8")
tmp.replace(state_path)
# -------------------- paperless API --------------------defupload_bytes_to_paperless(
data: bytes,
filename: str,
base_url: str,
token: str,
storage_path_id: Optional[int] =None,
timeout: int=180,
api_version: int=9,
) ->Optional[str]:
""" Upload a single file (bytes) to paperless-ngx. Returns the task UUID (string) if accepted, else None. Handles responses that are: - {"task_id": "..."} or {"id": "..."} or {"uuid": "..."} - plain JSON string: "...." - plain text body with UUID - empty body (rare) """url=base_url.rstrip("/") +"/api/documents/post_document/"headers= {
"Authorization": f"Token {token}",
"Accept": f"application/json; version={api_version}",
}
form= {}
ifstorage_path_idisnotNone:
form["storage_path"] =str(storage_path_id)
files= {"document": (filename, data)}
try:
r=requests.post(url, headers=headers, data=form, files=files, timeout=timeout)
exceptrequests.RequestExceptionase:
logging.error("Paperless HTTP error uploading %s: %s", filename, e)
returnNoneifr.status_code>=400:
logging.error("Paperless upload failed for %s: %s %s", filename, r.status_code, r.text[:500])
returnNonetask_id: Optional[str] =Nonetry:
js=r.json() # may be dict OR strifisinstance(js, dict):
task_id=js.get("task_id") orjs.get("id") orjs.get("uuid")
elifisinstance(js, str):
task_id=js.strip().strip('"')
exceptValueError:
passifnottask_id:
body= (r.textor"").strip()
ifbody:
task_id=body.strip('"')
else:
logging.info("Paperless returned empty body for %s (HTTP %s).", filename, r.status_code)
returntask_idorNonedefpoll_paperless_task_for_document_id(
base_url: str, token: str, task_id: str, timeout: int=120, api_version: int=9
) ->Optional[int]:
""" Polls /api/tasks/?task_id=<uuid> for up to `timeout` seconds. Returns the created document ID (int) if available. """url=base_url.rstrip("/") +"/api/tasks/"headers= {
"Authorization": f"Token {token}",
"Accept": f"application/json; version={api_version}",
}
deadline=time.time() +timeoutlast_state=Nonewhiletime.time() <deadlineandnotSTOP:
try:
r=requests.get(url, headers=headers, params={"task_id": task_id}, timeout=15)
ifr.status_code<400:
js=r.json()
results=js.get("results") ifisinstance(js, dict) elseNoneifresults:
res=results[0]
state=res.get("state")
last_state=stateifstate=="SUCCESS":
result=res.get("result") orres.get("task_result") or {}
ifisinstance(result, dict):
doc_id=result.get("document_id") orresult.get("id")
ifisinstance(doc_id, int):
returndoc_idreturnNoneifstatein {"FAILURE", "REVOKED"}:
logging.error("Paperless task %s ended in %s: %s", task_id, state, res)
returnNonetime.sleep(2)
exceptrequests.RequestException:
time.sleep(3)
iflast_state:
logging.info("Paperless task %s last seen state: %s", task_id, last_state)
returnNone# -------------------- TruSpace API --------------------deftruspace_headers(base_url: str, csrftoken: Optional[str], jwt: Optional[str]) ->Dict[str, str]:
h= {
"Accept": "application/json",
"X-Requested-With": "XMLHttpRequest",
"Referer": base_url.rstrip("/") +"/",
}
ifcsrftoken:
h["X-CSRFToken"] =csrftokenh["X-CSRF-Token"] =csrftokenifjwt:
# Usually not needed if cookies are present, but harmless:h["Authorization"] =f"Bearer {jwt}"returnhdeftruspace_cookies(csrftoken, sessionid, auth_token, login) ->Dict[str, str]:
cookies= {}
ifcsrftoken: cookies["csrftoken"] =csrftokenifsessionid: cookies["sessionid"] =sessionidifauth_token: cookies["auth_token"] =auth_tokeniflogin: cookies["login"] =loginreturncookiesdeflist_truspace_documents(
base_url: str,
workspace_id: str,
cookies: Dict[str, str],
headers: Dict[str, str],
start: int=0,
limit: int=50,
search: str="",
timeout: int=20,
) ->Dict:
""" Returns JSON payload from /api/documents with { count, data: [...] }. """url=base_url.rstrip("/") +"/api/documents"params= {
"from": str(start),
"limit": str(limit),
"workspace": workspace_id,
"search": search,
}
r=requests.get(url, params=params, headers=headers, cookies=cookies, timeout=timeout)
r.raise_for_status()
returnr.json()
defiterate_truspace_pages(
base_url: str, workspace_id: str, cookies: Dict[str, str], headers: Dict[str, str], page_size: int=50
) ->List[Dict]:
""" Fetches all documents for the workspace (paged). """start=0out: List[Dict] = []
whileTrue:
payload=list_truspace_documents(base_url, workspace_id, cookies, headers, start=start, limit=page_size)
data=payload.get("data") or []
out.extend(data)
count=int(payload.get("count", len(out)))
ifstart+page_size>=countornotdata:
breakstart+=page_sizereturnoutdefdiscover_new_or_updated(items: List[Dict], state: Dict[str, Dict]) ->List[Dict]:
""" Decide which documents to transfer based on docId + meta.version. Carry cid and filename for download/upload. """to_transfer= []
foritinitems:
ifnotisinstance(it, dict):
continuedoc_id=it.get("docId") orit.get("id")
meta=it.get("meta") or {}
version=str(meta.get("version") or"")
filename=meta.get("filename") orit.get("filename") or (doc_idandf"{doc_id}.bin")
cid=it.get("cid") ormeta.get("cid")
ifnotdoc_idornotfilename:
continueprev=state.get(doc_id)
ifnotprevorstr(prev.get("version", "")) !=version:
to_transfer.append({"docId": doc_id, "filename": filename, "version": version, "cid": cid})
returnto_transferdefdownload_truspace_document_file(
base_url: str,
doc_id: str,
cookies: Dict[str, str],
headers: Dict[str, str],
timeout: int=180,
ipfs_gateway: Optional[str] =None,
cid: Optional[str] =None,
) ->bytes:
""" Your instance serves files by CID at /api/documents/version/{cid}. Use that first; then try docId routes; lastly IPFS gateway if provided. """base=base_url.rstrip("/")
h=dict(headers)
h["Accept"] ="application/octet-stream"errors= []
# 1) CID-based route (works on your instance)ifcid:
url=f"{base}/api/documents/version/{cid}"try:
r=requests.get(url, headers=h, cookies=cookies, timeout=timeout, stream=True)
ifr.status_code==200:
returnr.contenterrors.append(f"{url} -> {r.status_code}{r.text[:120]!r}")
exceptrequests.RequestExceptionase:
errors.append(f"{url} -> {e}")
# 2) docId routes (in case CID missing)forpathin (
f"/api/documents/{doc_id}/file",
f"/api/documents/{doc_id}/download",
f"/api/document/{doc_id}/file",
f"/api/document/{doc_id}/download",
):
url=base+pathtry:
r=requests.get(url, headers=h, cookies=cookies, timeout=timeout, stream=True)
ifr.status_code==200:
returnr.contenterrors.append(f"{url} -> {r.status_code}{r.text[:120]!r}")
exceptrequests.RequestExceptionase:
errors.append(f"{url} -> {e}")
# 3) IPFS fallback (optional)ifipfs_gatewayandcid:
ipfs_url=f"{ipfs_gateway.rstrip('/')}/ipfs/{cid}"try:
r=requests.get(ipfs_url, timeout=timeout, stream=True)
r.raise_for_status()
returnr.contentexceptrequests.RequestExceptionase:
errors.append(f"{ipfs_url} -> {e}")
raiserequests.HTTPError("All TruSpace download attempts failed:\n"+"\n".join(errors))
# -------------------- core sync --------------------defrun_once(
truspace_base: str,
workspace_id: str,
cookies: Dict[str, str],
headers: Dict[str, str],
paperless_url: str,
paperless_token: str,
storage_path_id: Optional[int],
state: Dict[str, Dict],
state_path: Path,
) ->None:
# 1) list TruSpace docsall_items=iterate_truspace_pages(truspace_base, workspace_id, cookies, headers, page_size=50)
# 2) find new/updatedto_xfer=discover_new_or_updated(all_items, state)
ifnotto_xfer:
logging.info("No new/updated TruSpace documents.")
returnlogging.info("Found %d new/updated document(s) to transfer.", len(to_xfer))
# 3) transfer eachforiteminto_xfer:
ifSTOP:
breakdoc_id=item["docId"]
filename=item["filename"]
version=item["version"]
cid=item.get("cid")
try:
logging.info("Downloading TruSpace document %s (%s) ...", doc_id, filename)
blob=download_truspace_document_file(
truspace_base, doc_id, cookies, headers,
ipfs_gateway=os.getenv("TRUSPACE_IPFS_GATEWAY", "http://smartspace.local:8080"),
cid=cid
)
logging.info("Uploading to Paperless: %s", filename)
task_id=upload_bytes_to_paperless(
data=blob,
filename=filename,
base_url=paperless_url,
token=paperless_token,
storage_path_id=storage_path_id,
)
iftask_id:
logging.info("Paperless accepted upload (task %s). Polling ...", task_id)
doc_no=poll_paperless_task_for_document_id(paperless_url, paperless_token, task_id)
ifdoc_noisnotNone:
logging.info("Paperless ingested #%s from %s", doc_no, filename)
else:
logging.info("Paperless ingestion complete (document id not available yet).")
else:
logging.warning("Paperless did not return a task id for %s", filename)
# Update state on success to avoid re-uploading the same versionstate[doc_id] = {"version": version, "filename": filename, "cid": cid}
save_state(state_path, state)
exceptrequests.HTTPErrorase:
logging.error("Download failed for %s: %s", doc_id, e)
exceptExceptionase:
logging.exception("Failed processing %s: %s", doc_id, e)
# -------------------- CLI --------------------defparse_args() ->argparse.Namespace:
p=argparse.ArgumentParser(description="Sync new/updated TruSpace documents to Paperless-ngx.")
# TruSpacep.add_argument("--truspace-url", default=os.getenv("TRUSPACE_URL", "http://smartspace.local:8000"))
p.add_argument("--workspace-id", default=os.getenv("TRUSPACE_WORKSPACE_ID"))
p.add_argument("--csrftoken", default=os.getenv("TRUSPACE_CSRFTOKEN"))
p.add_argument("--sessionid", default=os.getenv("TRUSPACE_SESSIONID"))
p.add_argument("--auth-token", default=os.getenv("TRUSPACE_AUTH_TOKEN"))
p.add_argument("--login", default=os.getenv("TRUSPACE_LOGIN_JSON"))
# Paperlessp.add_argument("--url", default=os.getenv("PAPERLESS_URL"), help="Paperless base URL")
p.add_argument("--token", default=os.getenv("PAPERLESS_TOKEN"), help="Paperless API token")
p.add_argument("--storage-path-id", type=int, default=os.getenv("PAPERLESS_STORAGE_PATH_ID") andint(os.getenv("PAPERLESS_STORAGE_PATH_ID")))
# Generalp.add_argument("--interval", type=int, default=int(os.getenv("PAPERLESS_INTERVAL", "600")))
p.add_argument("--state-file", default=os.getenv("STATE_FILE", STATE_FILE_NAME))
p.add_argument("--log-level", default=os.getenv("LOG_LEVEL", "INFO"))
returnp.parse_args()
defmain():
args=parse_args()
logging.basicConfig(
level=getattr(logging, args.log_level.upper(), logging.INFO),
format="%(asctime)s %(levelname)s %(message)s",
)
ifnotargs.workspace_id:
print("Error: --workspace-id or TRUSPACE_WORKSPACE_ID is required.", file=sys.stderr)
sys.exit(2)
ifnotargs.urlornotargs.token:
print("Error: Paperless --url and --token are required (or env vars).", file=sys.stderr)
sys.exit(2)
headers=truspace_headers(args.truspace_url, args.csrftoken, args.auth_token)
cookies=truspace_cookies(args.csrftoken, args.sessionid, args.auth_token, args.login)
state_path=Path(args.state_file).expanduser().resolve()
state=load_state(state_path)
logging.info(
"Starting bridge. TruSpace=%s Workspace=%s Paperless=%s Interval=%ss",
args.truspace_url, args.workspace_id, args.url, args.interval,
)
whilenotSTOP:
try:
run_once(
truspace_base=args.truspace_url,
workspace_id=args.workspace_id,
cookies=cookies,
headers=headers,
paperless_url=args.url,
paperless_token=args.token,
storage_path_id=args.storage_path_id,
state=state,
state_path=state_path,
)
exceptExceptionase:
logging.exception("Iteration failed: %s", e)
remaining=args.intervalwhileremaining>0andnotSTOP:
time.sleep(min(5, remaining))
remaining-=5logging.info("Exiting.")
if__name__=="__main__":
main()
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
Uh oh!
There was an error while loading. Please reload this page.
-
I played a bit with Paperless NGX which seems to be a very nice and popular document management system (easy to install on a raspberry).
Only as a proof of concept, I vibe coded a small python script that polls a TruSpace workspace and inserts new files a Paperless folder. Works like a charm on a raspberry 💯
Beta Was this translation helpful? Give feedback.
All reactions