Files
homelab-docker/office/paperless-ngx/internet/docker-compose.yml

181 lines
7.3 KiB
YAML

services:
broker:
image: docker.io/library/redis:8
restart: unless-stopped
volumes:
- redisdata:/data
db:
image: docker.io/library/postgres:17
restart: unless-stopped
volumes:
- pgdata:/var/lib/postgresql/data
- backup:/backup
environment:
POSTGRES_DB: paperless
POSTGRES_USER: paperless
POSTGRES_PASSWORD: paperless
webserver:
image: ghcr.io/paperless-ngx/paperless-ngx:2.19.4
restart: unless-stopped
depends_on:
- db
- broker
- gotenberg
- tika
ports:
- "8010:8000"
healthcheck:
test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"]
interval: 30s
timeout: 10s
retries: 5
labels:
- "com.centurylinklabs.watchtower.monitor-only=true"
volumes:
- data:/usr/src/paperless/data
- media:/usr/src/paperless/media
- export:/usr/src/paperless/export
- consume:/usr/src/paperless/consume
environment:
PAPERLESS_REDIS: redis://broker:6379
PAPERLESS_DBHOST: db
PAPERLESS_TIKA_ENABLED: 1
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
PAPERLESS_TIKA_ENDPOINT: http://tika:9998
PAPERLESS_TIME_ZONE: Europe/Berlin
PAPERLESS_URL: https://dm.scheidel.biz
#PAPERLESS_ALLOWED_HOSTS: https://dm.scheidel.biz,http://192.168.2.102
PAPERLESS_ALLOWED_HOSTS: 192.168.2.102
PAPERLESS_DISABLE_REGULAR_LOGIN: true
PAPERLESS_LOGOUT_REDIRECT_URL: https://authentik.scheidel.biz/application/o/paperless-dm/end-session/
PAPERLESS_APPS: "allauth.socialaccount.providers.openid_connect"
PAPERLESS_SOCIALACCOUNT_PROVIDERS: '{"openid_connect": {"APPS": [{"provider_id": "authentik","name": "Authentik SSO","client_id": "FJBRykmuqdnpYtw3pcc38tTVJCTU8MtKK6xbr44P","secret": "VtolGxOkit38Id83NIf81gPfqxc61nlggSbAmlAAmz291MvOCXVPcLMLFFGw2toqVatVoCFteX0vmwOhZadmeTA1VMoWdHaPUNUDqKQVkcziZTbTbIoPAh3GpdwHidHm","settings": { "server_url": "https://authentik.scheidel.biz/application/o/paperless-dm/.well-known/openid-configuration"}}]}}'
#PAPERLESS_ADMIN_USER: michael
#PAPERLESS_ADMIN_PASSWORD: ?Aichwald01
PAPERLESS_OCR_LANGUAGE: deu+eng
PAPERLESS_CONSUMPTION_DIR: ../consume
#PAPERLESS_FILENAME_FORMAT: '{created_year}/{correspondent}/{title}'
PAPERLESS_OCR_USER_ARGS: '{"invalidate_digital_signatures": true}'
PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE: true
PAPERLESS_EMAIL_HOST: smtp.strato.de
PAPERLESS_EMAIL_PORT: 465
PAPERLESS_EMAIL_HOST_USER: michael.scheidel@scheidel.biz
PAPERLESS_EMAIL_FROM: paperless-ngx@scheidel.biz
PAPERLESS_EMAIL_HOST_PASSWORD: ?EAM?Yes!01
PAPERLESS_EMAIL_USE_TLS: false
PAPERLESS_EMAIL_USE_SSL: true
#PAPERLESS_FILENAME_FORMAT: created_year/correspondent/title
#PAPERLESS_FILENAME_FORMAT: {created_year}/{correspondent}/{title}
#PAPERLESS_OCR_USER_ARGS: {"invalidate_digital_signatures": true}
# The UID and GID of the user used to run paperless in the container. Set this
# to your UID and GID on the host so that you have write access to the
# consumption directory.
USERMAP_UID: 1000
USERMAP_GID: 100
# Additional languages to install for text recognition, separated by a
# whitespace. Note that this is
# different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the
# language used for OCR.
# The container installs English, German, Italian, Spanish and French by
# default.
# See https://packages.debian.org/search?keywords=tesseract-ocr-&searchon=names&suite=buster
# for available languages.
#PAPERLESS_OCR_LANGUAGES: tur ces
# Adjust this key if you plan to make paperless available publicly. It should
# be a very long sequence of random characters. You don't need to remember it.
#PAPERLESS_SECRET_KEY: change-me
# Use this variable to set a timezone for the Paperless Docker containers. If not specified, defaults to UTC.
#PAPERLESS_TIME_ZONE: America/Los_Angeles
# The default language to use for OCR. Set this to the language most of your
# documents are written in.
#PAPERLESS_OCR_LANGUAGE: eng
gotenberg:
image: docker.io/gotenberg/gotenberg:8.23.0
restart: unless-stopped
# The gotenberg chromium route is used to convert .eml files. We do not
# want to allow external content like tracking pixels or even javascript.
command:
- "gotenberg"
- "--chromium-disable-javascript=true"
- "--chromium-allow-list=file:///tmp/.*"
tika:
image: docker.io/apache/tika:latest
restart: unless-stopped
paperless-gpt:
# Use one of these image sources:
image: icereed/paperless-gpt:latest # Docker Hub
# image: ghcr.io/icereed/paperless-gpt:latest # GitHub Container Registry
environment:
PAPERLESS_BASE_URL: "http://192.168.2.102:8010"
PAPERLESS_API_TOKEN: "1aed46ed5666fbc215463032ce5f011cffdf78f6"
#PAPERLESS_PUBLIC_URL: "http://paperless.mydomain.com" # Optional
MANUAL_TAG: "paperless-gpt" # Optional, default: paperless-gpt
AUTO_TAG: "paperless-gpt-auto" # Optional, default: paperless-gpt-auto
# LLM Configuration - Choose one:
# Option 1: Standard OpenAI
LLM_PROVIDER: "openai"
LLM_MODEL: "gpt-4o"
OPENAI_API_KEY: "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"
# Optional LLM Settings
LLM_LANGUAGE: "German" # Optional, default: English
LLM_REQUESTS_PER_MINUTE: "60"
LLM_MAX_RETRIES: "3"
LLM_BACKOFF_MAX_WAIT: "30s"
TOKEN_LIMIT: "8000"
#GIN_MODE: release
# OCR Configuration - Choose one:
# Option 1: LLM-based OCR
OCR_PROVIDER: "llm" # Default OCR provider
VISION_LLM_PROVIDER: "openai" # openai or ollama
VISION_LLM_MODEL: "gpt-4o" # minicpm-v (ollama) or gpt-4o (openai)
# OCR Processing Mode
OCR_PROCESS_MODE: "image" # Optional, default: image, other options: pdf, whole_pdf
PDF_SKIP_EXISTING_OCR: "false" # Optional, skip OCR for PDFs with existing OCR
# Enhanced OCR Features
CREATE_LOCAL_HOCR: "false" # Optional, save hOCR files locally
LOCAL_HOCR_PATH: "/app/hocr" # Optional, path for hOCR files
CREATE_LOCAL_PDF: "false" # Optional, save enhanced PDFs locally
LOCAL_PDF_PATH: "/app/pdf" # Optional, path for PDF files
PDF_UPLOAD: "false" # Optional, upload enhanced PDFs to paperless-ngx
PDF_REPLACE: "false" # Optional and DANGEROUS, delete original after upload
PDF_COPY_METADATA: "true" # Optional, copy metadata from original document
PDF_OCR_TAGGING: "true" # Optional, add tag to processed documents
PDF_OCR_COMPLETE_TAG: "paperless-gpt-ocr-complete" # Optional, tag name
AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default: paperless-gpt-ocr-auto
OCR_LIMIT_PAGES: "5" # Optional, default: 5. Set to 0 for no limit.
LOG_LEVEL: "info" # Optional: debug, warn, error
volumes:
- prompts:/app/prompts # Mount the prompts directory
- hocr:/app/hocr # Only if CREATE_LOCAL_HOCR is true
- pdf:/app/pdf # Only if CREATE_LOCAL_PDF is true
ports:
- "8180:8080"
depends_on:
- webserver
volumes:
backup:
data:
media:
pgdata:
redisdata:
export:
consume:
prompts:
hocr:
pdf: