181 lines
7.3 KiB
YAML
181 lines
7.3 KiB
YAML
services:
|
|
broker:
|
|
image: docker.io/library/redis:8
|
|
restart: unless-stopped
|
|
volumes:
|
|
- redisdata:/data
|
|
|
|
db:
|
|
image: docker.io/library/postgres:17
|
|
restart: unless-stopped
|
|
volumes:
|
|
- pgdata:/var/lib/postgresql/data
|
|
- backup:/backup
|
|
environment:
|
|
POSTGRES_DB: paperless
|
|
POSTGRES_USER: paperless
|
|
POSTGRES_PASSWORD: paperless
|
|
|
|
webserver:
|
|
image: ghcr.io/paperless-ngx/paperless-ngx:2.19.3
|
|
restart: unless-stopped
|
|
depends_on:
|
|
- db
|
|
- broker
|
|
- gotenberg
|
|
- tika
|
|
ports:
|
|
- "8010:8000"
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 5
|
|
labels:
|
|
- "com.centurylinklabs.watchtower.monitor-only=true"
|
|
volumes:
|
|
- data:/usr/src/paperless/data
|
|
- media:/usr/src/paperless/media
|
|
- export:/usr/src/paperless/export
|
|
- consume:/usr/src/paperless/consume
|
|
environment:
|
|
PAPERLESS_REDIS: redis://broker:6379
|
|
PAPERLESS_DBHOST: db
|
|
PAPERLESS_TIKA_ENABLED: 1
|
|
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
|
|
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
|
|
PAPERLESS_TIME_ZONE: Europe/Berlin
|
|
PAPERLESS_URL: https://dm.scheidel.biz
|
|
#PAPERLESS_ALLOWED_HOSTS: https://dm.scheidel.biz,http://192.168.2.102
|
|
PAPERLESS_ALLOWED_HOSTS: 192.168.2.102
|
|
PAPERLESS_DISABLE_REGULAR_LOGIN: true
|
|
PAPERLESS_LOGOUT_REDIRECT_URL: https://authentik.scheidel.biz/application/o/paperless-dm/end-session/
|
|
PAPERLESS_APPS: "allauth.socialaccount.providers.openid_connect"
|
|
PAPERLESS_SOCIALACCOUNT_PROVIDERS: '{"openid_connect": {"APPS": [{"provider_id": "authentik","name": "Authentik SSO","client_id": "FJBRykmuqdnpYtw3pcc38tTVJCTU8MtKK6xbr44P","secret": "VtolGxOkit38Id83NIf81gPfqxc61nlggSbAmlAAmz291MvOCXVPcLMLFFGw2toqVatVoCFteX0vmwOhZadmeTA1VMoWdHaPUNUDqKQVkcziZTbTbIoPAh3GpdwHidHm","settings": { "server_url": "https://authentik.scheidel.biz/application/o/paperless-dm/.well-known/openid-configuration"}}]}}'
|
|
#PAPERLESS_ADMIN_USER: michael
|
|
#PAPERLESS_ADMIN_PASSWORD: ?Aichwald01
|
|
PAPERLESS_OCR_LANGUAGE: deu+eng
|
|
PAPERLESS_CONSUMPTION_DIR: ../consume
|
|
#PAPERLESS_FILENAME_FORMAT: '{created_year}/{correspondent}/{title}'
|
|
PAPERLESS_OCR_USER_ARGS: '{"invalidate_digital_signatures": true}'
|
|
PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE: true
|
|
PAPERLESS_EMAIL_HOST: smtp.strato.de
|
|
PAPERLESS_EMAIL_PORT: 465
|
|
PAPERLESS_EMAIL_HOST_USER: michael.scheidel@scheidel.biz
|
|
PAPERLESS_EMAIL_FROM: paperless-ngx@scheidel.biz
|
|
PAPERLESS_EMAIL_HOST_PASSWORD: ?EAM?Yes!01
|
|
PAPERLESS_EMAIL_USE_TLS: false
|
|
PAPERLESS_EMAIL_USE_SSL: true
|
|
|
|
#PAPERLESS_FILENAME_FORMAT: created_year/correspondent/title
|
|
|
|
#PAPERLESS_FILENAME_FORMAT: {created_year}/{correspondent}/{title}
|
|
#PAPERLESS_OCR_USER_ARGS: {"invalidate_digital_signatures": true}
|
|
# The UID and GID of the user used to run paperless in the container. Set this
|
|
# to your UID and GID on the host so that you have write access to the
|
|
# consumption directory.
|
|
USERMAP_UID: 1000
|
|
USERMAP_GID: 100
|
|
# Additional languages to install for text recognition, separated by a
|
|
# whitespace. Note that this is
|
|
# different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the
|
|
# language used for OCR.
|
|
# The container installs English, German, Italian, Spanish and French by
|
|
# default.
|
|
# See https://packages.debian.org/search?keywords=tesseract-ocr-&searchon=names&suite=buster
|
|
# for available languages.
|
|
#PAPERLESS_OCR_LANGUAGES: tur ces
|
|
# Adjust this key if you plan to make paperless available publicly. It should
|
|
# be a very long sequence of random characters. You don't need to remember it.
|
|
#PAPERLESS_SECRET_KEY: change-me
|
|
# Use this variable to set a timezone for the Paperless Docker containers. If not specified, defaults to UTC.
|
|
#PAPERLESS_TIME_ZONE: America/Los_Angeles
|
|
# The default language to use for OCR. Set this to the language most of your
|
|
# documents are written in.
|
|
#PAPERLESS_OCR_LANGUAGE: eng
|
|
|
|
gotenberg:
|
|
image: docker.io/gotenberg/gotenberg:8.23.0
|
|
restart: unless-stopped
|
|
|
|
# The gotenberg chromium route is used to convert .eml files. We do not
|
|
# want to allow external content like tracking pixels or even javascript.
|
|
command:
|
|
- "gotenberg"
|
|
- "--chromium-disable-javascript=true"
|
|
- "--chromium-allow-list=file:///tmp/.*"
|
|
|
|
tika:
|
|
image: docker.io/apache/tika:latest
|
|
restart: unless-stopped
|
|
|
|
paperless-gpt:
|
|
# Use one of these image sources:
|
|
image: icereed/paperless-gpt:latest # Docker Hub
|
|
# image: ghcr.io/icereed/paperless-gpt:latest # GitHub Container Registry
|
|
environment:
|
|
PAPERLESS_BASE_URL: "http://192.168.2.102:8010"
|
|
PAPERLESS_API_TOKEN: "1aed46ed5666fbc215463032ce5f011cffdf78f6"
|
|
#PAPERLESS_PUBLIC_URL: "http://paperless.mydomain.com" # Optional
|
|
MANUAL_TAG: "paperless-gpt" # Optional, default: paperless-gpt
|
|
AUTO_TAG: "paperless-gpt-auto" # Optional, default: paperless-gpt-auto
|
|
# LLM Configuration - Choose one:
|
|
|
|
# Option 1: Standard OpenAI
|
|
LLM_PROVIDER: "openai"
|
|
LLM_MODEL: "gpt-4o"
|
|
OPENAI_API_KEY: "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"
|
|
|
|
# Optional LLM Settings
|
|
LLM_LANGUAGE: "German" # Optional, default: English
|
|
LLM_REQUESTS_PER_MINUTE: "60"
|
|
LLM_MAX_RETRIES: "3"
|
|
LLM_BACKOFF_MAX_WAIT: "30s"
|
|
TOKEN_LIMIT: "8000"
|
|
#GIN_MODE: release
|
|
|
|
# OCR Configuration - Choose one:
|
|
# Option 1: LLM-based OCR
|
|
OCR_PROVIDER: "llm" # Default OCR provider
|
|
VISION_LLM_PROVIDER: "openai" # openai or ollama
|
|
VISION_LLM_MODEL: "gpt-4o" # minicpm-v (ollama) or gpt-4o (openai)
|
|
|
|
# OCR Processing Mode
|
|
OCR_PROCESS_MODE: "image" # Optional, default: image, other options: pdf, whole_pdf
|
|
PDF_SKIP_EXISTING_OCR: "false" # Optional, skip OCR for PDFs with existing OCR
|
|
|
|
# Enhanced OCR Features
|
|
CREATE_LOCAL_HOCR: "false" # Optional, save hOCR files locally
|
|
LOCAL_HOCR_PATH: "/app/hocr" # Optional, path for hOCR files
|
|
CREATE_LOCAL_PDF: "false" # Optional, save enhanced PDFs locally
|
|
LOCAL_PDF_PATH: "/app/pdf" # Optional, path for PDF files
|
|
PDF_UPLOAD: "false" # Optional, upload enhanced PDFs to paperless-ngx
|
|
PDF_REPLACE: "false" # Optional and DANGEROUS, delete original after upload
|
|
PDF_COPY_METADATA: "true" # Optional, copy metadata from original document
|
|
PDF_OCR_TAGGING: "true" # Optional, add tag to processed documents
|
|
PDF_OCR_COMPLETE_TAG: "paperless-gpt-ocr-complete" # Optional, tag name
|
|
AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default: paperless-gpt-ocr-auto
|
|
OCR_LIMIT_PAGES: "5" # Optional, default: 5. Set to 0 for no limit.
|
|
LOG_LEVEL: "info" # Optional: debug, warn, error
|
|
volumes:
|
|
- prompts:/app/prompts # Mount the prompts directory
|
|
- hocr:/app/hocr # Only if CREATE_LOCAL_HOCR is true
|
|
- pdf:/app/pdf # Only if CREATE_LOCAL_PDF is true
|
|
ports:
|
|
- "8180:8080"
|
|
depends_on:
|
|
- webserver
|
|
|
|
volumes:
|
|
backup:
|
|
data:
|
|
media:
|
|
pgdata:
|
|
redisdata:
|
|
export:
|
|
consume:
|
|
prompts:
|
|
hocr:
|
|
pdf:
|
|
|