From b77d42218fe4df00aa7f57d08e4644b1b53a0524 Mon Sep 17 00:00:00 2001 From: michael Date: Sun, 2 Nov 2025 14:00:08 +0100 Subject: [PATCH] =?UTF-8?q?paperless-ngx/docker-compose-internet.yml=20hin?= =?UTF-8?q?zugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paperless-ngx/docker-compose-internet.yml | 180 ++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 paperless-ngx/docker-compose-internet.yml diff --git a/paperless-ngx/docker-compose-internet.yml b/paperless-ngx/docker-compose-internet.yml new file mode 100644 index 0000000..d51e842 --- /dev/null +++ b/paperless-ngx/docker-compose-internet.yml @@ -0,0 +1,180 @@ +services: + broker: + image: docker.io/library/redis:8 + restart: unless-stopped + volumes: + - redisdata:/data + + db: + image: docker.io/library/postgres:17 + restart: unless-stopped + volumes: + - pgdata:/var/lib/postgresql/data + - backup:/backup + environment: + POSTGRES_DB: paperless + POSTGRES_USER: paperless + POSTGRES_PASSWORD: paperless + + webserver: + image: ghcr.io/paperless-ngx/paperless-ngx:2.19.3 + restart: unless-stopped + depends_on: + - db + - broker + - gotenberg + - tika + ports: + - "8010:8000" + healthcheck: + test: ["CMD", "curl", "-fs", "-S", "--max-time", "2", "http://localhost:8000"] + interval: 30s + timeout: 10s + retries: 5 + labels: + - "com.centurylinklabs.watchtower.monitor-only=true" + volumes: + - data:/usr/src/paperless/data + - media:/usr/src/paperless/media + - export:/usr/src/paperless/export + - consume:/usr/src/paperless/consume + environment: + PAPERLESS_REDIS: redis://broker:6379 + PAPERLESS_DBHOST: db + PAPERLESS_TIKA_ENABLED: 1 + PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000 + PAPERLESS_TIKA_ENDPOINT: http://tika:9998 + PAPERLESS_TIME_ZONE: Europe/Berlin + PAPERLESS_URL: https://dm.scheidel.biz + #PAPERLESS_ALLOWED_HOSTS: https://dm.scheidel.biz,http://192.168.2.102 + PAPERLESS_ALLOWED_HOSTS: 192.168.2.102 + PAPERLESS_DISABLE_REGULAR_LOGIN: true + PAPERLESS_LOGOUT_REDIRECT_URL: https://authentik.scheidel.biz/application/o/paperless-dm/end-session/ + PAPERLESS_APPS: "allauth.socialaccount.providers.openid_connect" + PAPERLESS_SOCIALACCOUNT_PROVIDERS: '{"openid_connect": {"APPS": [{"provider_id": "authentik","name": "Authentik SSO","client_id": "FJBRykmuqdnpYtw3pcc38tTVJCTU8MtKK6xbr44P","secret": "VtolGxOkit38Id83NIf81gPfqxc61nlggSbAmlAAmz291MvOCXVPcLMLFFGw2toqVatVoCFteX0vmwOhZadmeTA1VMoWdHaPUNUDqKQVkcziZTbTbIoPAh3GpdwHidHm","settings": { "server_url": "https://authentik.scheidel.biz/application/o/paperless-dm/.well-known/openid-configuration"}}]}}' + #PAPERLESS_ADMIN_USER: michael + #PAPERLESS_ADMIN_PASSWORD: ?Aichwald01 + PAPERLESS_OCR_LANGUAGE: deu+eng + PAPERLESS_CONSUMPTION_DIR: ../consume + #PAPERLESS_FILENAME_FORMAT: '{created_year}/{correspondent}/{title}' + PAPERLESS_OCR_USER_ARGS: '{"invalidate_digital_signatures": true}' + PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE: true + PAPERLESS_EMAIL_HOST: smtp.strato.de + PAPERLESS_EMAIL_PORT: 465 + PAPERLESS_EMAIL_HOST_USER: michael.scheidel@scheidel.biz + PAPERLESS_EMAIL_FROM: paperless-ngx@scheidel.biz + PAPERLESS_EMAIL_HOST_PASSWORD: ?EAM?Yes!01 + PAPERLESS_EMAIL_USE_TLS: false + PAPERLESS_EMAIL_USE_SSL: true + + #PAPERLESS_FILENAME_FORMAT: created_year/correspondent/title + + #PAPERLESS_FILENAME_FORMAT: {created_year}/{correspondent}/{title} + #PAPERLESS_OCR_USER_ARGS: {"invalidate_digital_signatures": true} +# The UID and GID of the user used to run paperless in the container. Set this +# to your UID and GID on the host so that you have write access to the +# consumption directory. + USERMAP_UID: 1000 + USERMAP_GID: 100 +# Additional languages to install for text recognition, separated by a +# whitespace. Note that this is +# different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the +# language used for OCR. +# The container installs English, German, Italian, Spanish and French by +# default. +# See https://packages.debian.org/search?keywords=tesseract-ocr-&searchon=names&suite=buster +# for available languages. + #PAPERLESS_OCR_LANGUAGES: tur ces +# Adjust this key if you plan to make paperless available publicly. It should +# be a very long sequence of random characters. You don't need to remember it. + #PAPERLESS_SECRET_KEY: change-me +# Use this variable to set a timezone for the Paperless Docker containers. If not specified, defaults to UTC. + #PAPERLESS_TIME_ZONE: America/Los_Angeles +# The default language to use for OCR. Set this to the language most of your +# documents are written in. + #PAPERLESS_OCR_LANGUAGE: eng + + gotenberg: + image: docker.io/gotenberg/gotenberg:8.23.0 + restart: unless-stopped + + # The gotenberg chromium route is used to convert .eml files. We do not + # want to allow external content like tracking pixels or even javascript. + command: + - "gotenberg" + - "--chromium-disable-javascript=true" + - "--chromium-allow-list=file:///tmp/.*" + + tika: + image: docker.io/apache/tika:latest + restart: unless-stopped + + paperless-gpt: + # Use one of these image sources: + image: icereed/paperless-gpt:latest # Docker Hub + # image: ghcr.io/icereed/paperless-gpt:latest # GitHub Container Registry + environment: + PAPERLESS_BASE_URL: "http://192.168.2.102:8010" + PAPERLESS_API_TOKEN: "1aed46ed5666fbc215463032ce5f011cffdf78f6" + #PAPERLESS_PUBLIC_URL: "http://paperless.mydomain.com" # Optional + MANUAL_TAG: "paperless-gpt" # Optional, default: paperless-gpt + AUTO_TAG: "paperless-gpt-auto" # Optional, default: paperless-gpt-auto + # LLM Configuration - Choose one: + + # Option 1: Standard OpenAI + LLM_PROVIDER: "openai" + LLM_MODEL: "gpt-4o" + OPENAI_API_KEY: "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" + + # Optional LLM Settings + LLM_LANGUAGE: "German" # Optional, default: English + LLM_REQUESTS_PER_MINUTE: "60" + LLM_MAX_RETRIES: "3" + LLM_BACKOFF_MAX_WAIT: "30s" + TOKEN_LIMIT: "8000" + #GIN_MODE: release + + # OCR Configuration - Choose one: + # Option 1: LLM-based OCR + OCR_PROVIDER: "llm" # Default OCR provider + VISION_LLM_PROVIDER: "openai" # openai or ollama + VISION_LLM_MODEL: "gpt-4o" # minicpm-v (ollama) or gpt-4o (openai) + + # OCR Processing Mode + OCR_PROCESS_MODE: "image" # Optional, default: image, other options: pdf, whole_pdf + PDF_SKIP_EXISTING_OCR: "false" # Optional, skip OCR for PDFs with existing OCR + + # Enhanced OCR Features + CREATE_LOCAL_HOCR: "false" # Optional, save hOCR files locally + LOCAL_HOCR_PATH: "/app/hocr" # Optional, path for hOCR files + CREATE_LOCAL_PDF: "false" # Optional, save enhanced PDFs locally + LOCAL_PDF_PATH: "/app/pdf" # Optional, path for PDF files + PDF_UPLOAD: "false" # Optional, upload enhanced PDFs to paperless-ngx + PDF_REPLACE: "false" # Optional and DANGEROUS, delete original after upload + PDF_COPY_METADATA: "true" # Optional, copy metadata from original document + PDF_OCR_TAGGING: "true" # Optional, add tag to processed documents + PDF_OCR_COMPLETE_TAG: "paperless-gpt-ocr-complete" # Optional, tag name + AUTO_OCR_TAG: "paperless-gpt-ocr-auto" # Optional, default: paperless-gpt-ocr-auto + OCR_LIMIT_PAGES: "5" # Optional, default: 5. Set to 0 for no limit. + LOG_LEVEL: "info" # Optional: debug, warn, error + volumes: + - prompts:/app/prompts # Mount the prompts directory + - hocr:/app/hocr # Only if CREATE_LOCAL_HOCR is true + - pdf:/app/pdf # Only if CREATE_LOCAL_PDF is true + ports: + - "8180:8080" + depends_on: + - webserver + +volumes: + backup: + data: + media: + pgdata: + redisdata: + export: + consume: + prompts: + hocr: + pdf: +