Files
paperless-ngx/pyproject.toml
Trenton H 701735f6e5 Chore: Drop old signal and unneeded apps, transition to parser registry instead (#12405)
* refactor: switch consumer and callers to ParserRegistry (Phase 4)

Replace all Django signal-based parser discovery with direct registry
calls. Removes `_parser_cleanup`, `parser_is_new_style` shims, and all
old-style isinstance checks. All parser instantiation now uses the
`with parser_class() as parser:` context manager pattern.

- documents/parsers.py: delegate to get_parser_registry(); drop lru_cache
- documents/consumer.py: use registry + context manager; remove shims
- documents/tasks.py: same pattern
- documents/management/commands/document_thumbnails.py: same pattern
- documents/views.py: get_metadata uses context manager
- documents/checks.py: use get_parser_registry().all_parsers()
- paperless/parsers/registry.py: add all_parsers() public method
- tests: update mocks to target documents.consumer.get_parser_class_for_mime_type

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* refactor: drop get_parser_class_for_mime_type; callers use registry directly

All callers now call get_parser_registry().get_parser_for_file() with
the actual filename and path, enabling score() to use file extension
hints. The MIME-only helper is removed.

- consumer.py: passes self.filename + self.working_copy
- tasks.py: passes document.original_filename + document.source_path
- document_thumbnails.py: same pattern
- views.py: passes Path(file).name + Path(file)
- parsers.py: internal helpers inline the registry call with filename=""
- test_parsers.py: drop TestParserDiscovery (was testing mock behavior);
  TestParserAvailability uses registry directly
- test_consumer.py: mocks switch to documents.consumer.get_parser_registry

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* refactor: remove document_consumer_declaration signal infrastructure

Remove the document_consumer_declaration signal that was previously used
for parser registration. Each parser app no longer connects to this signal,
and the signal declaration itself has been removed from documents/signals.

Changes:
- Remove document_consumer_declaration from documents/signals/__init__.py
- Remove ready() methods and signal imports from all parser app configs
- Delete signal shim files (signals.py) from all parser apps:
  - paperless_tesseract/signals.py
  - paperless_text/signals.py
  - paperless_tika/signals.py
  - paperless_mail/signals.py
  - paperless_remote/signals.py

Parser discovery now happens exclusively through the ParserRegistry
system introduced in the previous refactor phases.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* refactor: remove empty paperless_text and paperless_tika Django apps

After parser classes were moved to paperless/parsers/ in the plugin
refactor, these Django apps contained only empty AppConfig classes
with no models, views, tasks, migrations, or other functionality.

- Remove paperless_text and paperless_tika from INSTALLED_APPS
- Delete empty app directories entirely
- Update pyproject.toml test exclusions
- Clean stale mypy baseline entries for moved parser files

paperless_remote app is retained as it contains meaningful system
checks for Azure AI configuration.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* Moves the checks and tests to the main application and removes the old applications

* Adds a comment to satisy Sonar

* refactor: remove automatic log_summary() call from get_parser_registry()

The summary was logged once per process, causing it to appear repeatedly
during Docker startup (management commands, web server, each Celery
worker subprocess). External parsers are already announced individually
at INFO when discovered; the full summary is redundant noise.
log_summary() is retained on ParserRegistry for manual/debug use.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* Cleans up the duplicate test file/fixture

* Fixes a race condition where webserver threads could race to populate the registry

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-22 06:53:32 -07:00

352 lines
9.3 KiB
TOML

[project]
name = "paperless-ngx"
version = "2.20.13"
description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
readme = "README.md"
requires-python = ">=3.11"
classifiers = [
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
]
# TODO: Move certain things to groups and then utilize that further
# This will allow testing to not install a webserver, mysql, etc
dependencies = [
"azure-ai-documentintelligence>=1.0.2",
"babel>=2.17",
"bleach~=6.3.0",
"celery[redis]~=5.6.2",
"channels~=4.2",
"channels-redis~=4.2",
"concurrent-log-handler~=0.9.25",
"dateparser~=1.2",
# WARNING: django does not use semver.
# Only patch versions are guaranteed to not introduce breaking changes.
"django~=5.2.10",
"django-allauth[mfa,socialaccount]~=65.15.0",
"django-auditlog~=3.4.1",
"django-cachalot~=2.9.0",
"django-celery-results~=2.6.0",
"django-compression-middleware~=0.5.0",
"django-cors-headers~=4.9.0",
"django-extensions~=4.1",
"django-filter~=25.1",
"django-guardian~=3.3.0",
"django-multiselectfield~=1.0.1",
"django-rich~=2.2.0",
"django-soft-delete~=1.0.18",
"django-treenode>=0.23.2",
"djangorestframework~=3.16",
"djangorestframework-guardian~=0.4.0",
"drf-spectacular~=0.28",
"drf-spectacular-sidecar~=2026.3.1",
"drf-writable-nested~=0.7.1",
"faiss-cpu>=1.10",
"filelock~=3.25.2",
"flower~=2.0.1",
"gotenberg-client~=0.13.1",
"httpx-oauth~=0.16",
"ijson>=3.2",
"imap-tools~=1.11.0",
"jinja2~=3.1.5",
"langdetect~=1.0.9",
"llama-index-core>=0.14.12",
"llama-index-embeddings-huggingface>=0.6.1",
"llama-index-embeddings-openai>=0.5.1",
"llama-index-llms-ollama>=0.9.1",
"llama-index-llms-openai>=0.6.13",
"llama-index-vector-stores-faiss>=0.5.2",
"nltk~=3.9.1",
"ocrmypdf~=17.3.0",
"openai>=1.76",
"pathvalidate~=3.3.1",
"pdf2image~=1.17.0",
"python-dateutil~=2.9.0",
"python-dotenv~=1.2.1",
"python-gnupg~=0.5.4",
"python-ipware~=3.0.0",
"python-magic~=0.4.27",
"rapidfuzz~=3.14.0",
"redis[hiredis]~=5.2.1",
"regex>=2025.9.18",
"scikit-learn~=1.8.0",
"sentence-transformers>=4.1",
"setproctitle~=1.3.4",
"tika-client~=0.10.0",
"torch~=2.10.0",
"watchfiles>=1.1.1",
"whitenoise~=6.11",
"whoosh-reloaded>=2.7.5",
"zxing-cpp~=3.0.0",
]
optional-dependencies.mariadb = [
"mysqlclient~=2.2.7",
]
optional-dependencies.postgres = [
"psycopg[c,pool]==3.3",
# Direct dependency for proper resolution of the pre-built wheels
"psycopg-c==3.3",
"psycopg-pool==3.3",
]
optional-dependencies.webserver = [
"granian[uvloop]~=2.7.0",
]
[dependency-groups]
dev = [
{ "include-group" = "docs" },
{ "include-group" = "testing" },
{ "include-group" = "lint" },
]
docs = [
"zensical>=0.0.21",
]
testing = [
"daphne",
"factory-boy~=3.3.1",
"faker~=40.8.0",
"imagehash",
"pytest~=9.0.0",
"pytest-cov~=7.0.0",
"pytest-django~=4.12.0",
"pytest-env~=1.5.0",
"pytest-httpx",
"pytest-mock~=3.15.1",
#"pytest-randomly~=4.0.1",
"pytest-rerunfailures~=16.1",
"pytest-sugar",
"pytest-xdist~=3.8.0",
]
lint = [
"prek~=0.3.0",
"ruff~=0.15.0",
]
typing = [
"celery-types",
"django-filter-stubs",
"django-stubs[compatible-mypy]",
"djangorestframework-stubs[compatible-mypy]",
"lxml-stubs",
"microsoft-python-type-stubs @ git+https://github.com/microsoft/python-type-stubs.git",
"mypy",
"mypy-baseline",
"pyrefly",
"types-bleach",
"types-channels",
"types-colorama",
"types-dateparser",
"types-markdown",
"types-pygments",
"types-python-dateutil",
"types-pytz",
"types-redis",
"types-setuptools",
]
[tool.uv]
required-version = ">=0.9.0"
package = false
environments = [
"sys_platform == 'darwin'",
"sys_platform == 'linux'",
]
[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true
[tool.uv.sources]
# Markers are chosen to select these almost exclusively when building the Docker image
psycopg-c = [
{ url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-trixie-3.3.0/psycopg_c-3.3.0-cp312-cp312-linux_x86_64.whl", marker = "sys_platform == 'linux' and platform_machine == 'x86_64' and python_version == '3.12'" },
{ url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-trixie-3.3.0/psycopg_c-3.3.0-cp312-cp312-linux_aarch64.whl", marker = "sys_platform == 'linux' and platform_machine == 'aarch64' and python_version == '3.12'" },
]
torch = [
{ index = "pytorch-cpu" },
]
[tool.ruff]
target-version = "py311"
line-length = 88
src = [
"src",
]
respect-gitignore = true
# https://docs.astral.sh/ruff/settings/
fix = true
show-fixes = true
output-format = "grouped"
# https://docs.astral.sh/ruff/rules/
lint.extend-select = [
"COM", # https://docs.astral.sh/ruff/rules/#flake8-commas-com
"DJ", # https://docs.astral.sh/ruff/rules/#flake8-django-dj
"EXE", # https://docs.astral.sh/ruff/rules/#flake8-executable-exe
"FBT", # https://docs.astral.sh/ruff/rules/#flake8-boolean-trap-fbt
"FLY", # https://docs.astral.sh/ruff/rules/#flynt-fly
"G201", # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g
"I", # https://docs.astral.sh/ruff/rules/#isort-i
"ICN", # https://docs.astral.sh/ruff/rules/#flake8-import-conventions-icn
"INP", # https://docs.astral.sh/ruff/rules/#flake8-no-pep420-inp
"ISC", # https://docs.astral.sh/ruff/rules/#flake8-implicit-str-concat-isc
"PIE", # https://docs.astral.sh/ruff/rules/#flake8-pie-pie
"PLC", # https://docs.astral.sh/ruff/rules/#pylint-pl
"PLE", # https://docs.astral.sh/ruff/rules/#pylint-pl
"PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth
"Q", # https://docs.astral.sh/ruff/rules/#flake8-quotes-q
"RSE", # https://docs.astral.sh/ruff/rules/#flake8-raise-rse
"RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf
"SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim
"T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20
"TC", # https://docs.astral.sh/ruff/rules/#flake8-type-checking-tc
"TID", # https://docs.astral.sh/ruff/rules/#flake8-tidy-imports-tid
"UP", # https://docs.astral.sh/ruff/rules/#pyupgrade-up
"W", # https://docs.astral.sh/ruff/rules/#pycodestyle-e-w
]
lint.ignore = [
"DJ001",
"PLC0415",
"RUF012",
"SIM105",
]
# Migrations
lint.per-file-ignores."*/migrations/*.py" = [
"E501",
"SIM",
"T201",
]
# Testing
lint.per-file-ignores."*/tests/*.py" = [
"E501",
"SIM117",
]
lint.per-file-ignores.".github/scripts/*.py" = [
"E501",
"INP001",
"SIM117",
]
# Docker specific
lint.per-file-ignores."docker/rootfs/usr/local/bin/wait-for-redis.py" = [
"INP001",
"T201",
]
lint.per-file-ignores."docker/wait-for-redis.py" = [
"INP001",
"T201",
]
lint.per-file-ignores."src/documents/models.py" = [
"SIM115",
]
lint.isort.force-single-line = true
[tool.codespell]
write-changes = true
ignore-words-list = "criterias,afterall,valeu,ureue,equest,ure,assertIn,Oktober,commitish"
skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/paperless/tests/samples/mail/*,src/documents/tests/samples/*,*.po,*.json"
[tool.pytest]
minversion = "9.0"
pythonpath = [ "src" ]
strict_config = true
strict_markers = true
strict_parametrization_ids = true
strict_xfail = true
testpaths = [
"src/documents/tests/",
"src/paperless/tests/",
"src/paperless_mail/tests/",
"src/paperless_ai/tests",
]
addopts = [
"--pythonwarnings=all",
"--cov",
"--cov-report=html",
"--cov-report=xml",
"--numprocesses=auto",
"--maxprocesses=16",
"--dist=loadscope",
"--durations=50",
"--durations-min=0.5",
"--junitxml=junit.xml",
"-o",
"junit_family=legacy",
]
norecursedirs = [ "src/locale/", ".venv/", "src-ui/" ]
DJANGO_SETTINGS_MODULE = "paperless.settings"
markers = [
"live: Integration tests requiring external services (Gotenberg, Tika, nginx, etc)",
"nginx: Tests that make HTTP requests to the local nginx service",
"gotenberg: Tests requiring Gotenberg service",
"tika: Tests requiring Tika service",
"greenmail: Tests requiring Greenmail service",
"date_parsing: Tests which cover date parsing from content or filename",
"management: Tests which cover management commands/functionality",
]
[tool.pytest_env]
PAPERLESS_DISABLE_DBHANDLER = "true"
PAPERLESS_CACHE_BACKEND = "django.core.cache.backends.locmem.LocMemCache"
PAPERLESS_CHANNELS_BACKEND = "channels.layers.InMemoryChannelLayer"
[tool.coverage.report]
exclude_also = [
"if settings.AUDIT_LOG_ENABLED:",
"if AUDIT_LOG_ENABLED:",
"if TYPE_CHECKING:",
]
[tool.coverage.run]
source = [
"src/",
]
omit = [
"*/tests/*",
"manage.py",
"paperless/wsgi.py",
"paperless/auth.py",
]
[tool.mypy]
mypy_path = "src"
plugins = [
"mypy_django_plugin.main",
"mypy_drf_plugin.main",
]
check_untyped_defs = true
disallow_any_generics = true
disallow_incomplete_defs = true
disallow_untyped_defs = true
warn_redundant_casts = true
warn_unused_ignores = true
[tool.pyrefly]
search-path = [ "src" ]
baseline = ".pyrefly-baseline.json"
python-platform = "linux"
[tool.django-stubs]
django_settings_module = "paperless.settings"
[tool.mypy-baseline]
baseline_path = ".mypy-baseline.txt"
sort_baseline = true
ignore_categories = [ "note" ]