diff --git a/README.md b/README.md index e69de29..863cdd9 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,526 @@ +# ZUGFeRD-Service + +A REST API service for extracting and validating ZUGFeRD/Factur-X invoice data from PDF files. Built with FastAPI and Python 3.11+. + +## Overview + +ZUGFeRD-Service provides a simple HTTP API to: + +- Extract structured invoice data from ZUGFeRD-enabled PDFs +- Detect and identify ZUGFeRD profiles (MINIMUM, BASIC, BASIC WL, EN16931, EXTENDED) +- Validate invoice data against business rules and regulatory requirements +- Compare XML data against PDF text content + +ZUGFeRD (Zentraler User Guide der Forums Elektronische Rechnung Deutschland) is a German standard for electronic invoices, using the Cross Industry Invoice (CII) XML format embedded in PDF files (also known as Factur-X in France). + +## Quick Start + +### Docker + +The quickest way to get started is using Docker: + +```bash +# Build the image +docker build -t zugferd-service . + +# Run the service +docker run -p 5000:5000 zugferd-service + +# Or use Docker Compose +docker-compose up -d +``` + +### Nix + +If you're using Nix, build and run with: + +```bash +# Build the package +nix build .#zugferd-service + +# Run the service +nix run .#zugferd-service + +# Enter development shell +nix develop +``` + +### Python (Development) + +For local development: + +```bash +# Install dependencies +pip install -e . + +# Run directly +python -m src.main + +# Or use the installed script +zugferd-service +``` + +The service starts on `http://0.0.0.0:5000` by default. + +## API Reference + +### GET /health + +Check if the service is running. + +**Response:** +```json +{ + "status": "healthy", + "version": "1.0.0" +} +``` + +**Example:** +```bash +curl http://localhost:5000/health +``` + +### POST /extract + +Extract ZUGFeRD data from a base64-encoded PDF file. + +**Request:** +```json +{ + "pdf_base64": "JVBERi0xLjQKJeLjz9MK..." +} +``` + +**Response (ZUGFeRD PDF):** +```json +{ + "is_zugferd": true, + "zugferd_profil": "EN16931", + "xml_raw": "", + "xml_data": { + "invoice_number": "RE-2024-001", + "invoice_date": "2024-02-01", + "due_date": "2024-02-28", + "supplier": { + "name": "Acme Corp", + "street": "Main Street 123", + "postal_code": "12345", + "city": "Berlin", + "country": "DE", + "vat_id": "DE123456789", + "email": "billing@acme-corp.de" + }, + "buyer": { + "name": "Customer GmbH", + "street": "Market Square 5", + "postal_code": "54321", + "city": "Hamburg", + "country": "DE", + "vat_id": "DE987654321" + }, + "line_items": [ + { + "position": 1, + "article_number": "ART-001", + "article_number_buyer": null, + "description": "Consulting Services", + "quantity": 10.0, + "unit": "HUR", + "unit_price": 100.0, + "line_total": 1000.0, + "vat_rate": 19.0, + "vat_amount": 190.0 + } + ], + "totals": { + "line_total_sum": 1000.0, + "net": 1000.0, + "vat_total": 190.0, + "gross": 1190.0, + "vat_breakdown": [ + { + "rate": 19.0, + "base": 1000.0, + "amount": 190.0 + } + ] + }, + "currency": "EUR", + "payment_terms": { + "iban": "DE89370400440532013000", + "bic": "COBADEFFXXX", + "account_holder": "Acme Corp" + }, + "notes": "Payment due within 30 days" + }, + "pdf_text": "Invoice RE-2024-001\nAcme Corp...", + "extraction_meta": { + "pages": 2, + "xml_attachment_name": "factur-x.xml", + "extraction_time_ms": 45 + } +} +``` + +**Response (Non-ZUGFeRD PDF):** +```json +{ + "is_zugferd": false, + "zugferd_profil": null, + "xml_raw": null, + "xml_data": null, + "pdf_text": "Regular PDF content...", + "extraction_meta": { + "pages": 1, + "xml_attachment_name": null, + "extraction_time_ms": 20 + } +} +``` + +**Example:** +```bash +# Convert PDF to base64 and extract +PDF_BASE64=$(base64 -w 0 invoice.pdf) + +curl -X POST http://localhost:5000/extract \ + -H "Content-Type: application/json" \ + -d "{\"pdf_base64\": \"$PDF_BASE64\"}" +``` + +### POST /validate + +Validate invoice data against business rules and regulatory requirements. + +**Request:** +```json +{ + "xml_data": { + "invoice_number": "RE-2024-001", + "invoice_date": "2024-02-01", + "due_date": "2024-02-28", + "supplier": { + "name": "Acme Corp", + "vat_id": "DE123456789" + }, + "buyer": { + "name": "Customer GmbH", + "vat_id": "DE987654321" + }, + "line_items": [ + { + "position": 1, + "description": "Consulting Services", + "quantity": 10.0, + "unit": "HUR", + "unit_price": 100.0, + "line_total": 1000.0, + "vat_rate": 19.0 + } + ], + "totals": { + "line_total_sum": 1000.0, + "net": 1000.0, + "vat_total": 190.0, + "gross": 1190.0, + "vat_breakdown": [ + { + "rate": 19.0, + "base": 1000.0, + "amount": 190.0 + } + ] + }, + "currency": "EUR" + }, + "pdf_text": "Invoice RE-2024-001\nTotal: 1190.00 EUR", + "checks": ["pflichtfelder", "betraege", "ustid", "pdf_abgleich"] +} +``` + +**Response:** +```json +{ + "result": { + "is_valid": true, + "errors": [], + "warnings": [], + "summary": { + "total_checks": 4, + "checks_passed": 4, + "checks_failed": 0, + "critical_errors": 0, + "warnings": 0 + }, + "validation_time_ms": 12 + } +} +``` + +**Example:** +```bash +curl -X POST http://localhost:5000/validate \ + -H "Content-Type: application/json" \ + -d '{ + "xml_data": {"invoice_number": "RE-001", ...}, + "checks": ["pflichtfelder", "betraege"] + }' +``` + +## Validation Checks + +The service supports four validation checks: + +### 1. pflichtfelder (Required Fields) + +Validates that all critical invoice fields are present and non-empty: + +- **Critical errors:** invoice_number, invoice_date, supplier.name, supplier.vat_id, buyer.name, totals.net, totals.gross, totals.vat_total, line_items array, line item fields +- **Warnings:** due_date, payment_terms.iban + +### 2. betraege (Amount Calculations) + +Verifies all monetary calculations are correct: + +- Line total = quantity × unit_price (for each line item) +- totals.net = sum of all line totals +- VAT breakdown amount = base × (rate/100) (for each VAT entry) +- totals.vat_total = sum of VAT breakdown amounts +- totals.gross = totals.net + totals.vat_total + +Uses a tolerance of 0.01 for floating-point comparison. + +### 3. ustid (VAT ID Format) + +Validates VAT ID format for supported countries: + +- **Germany (DE):** DE followed by 9 digits (e.g., `DE123456789`) +- **Austria (AT):** ATU followed by 8 digits (e.g., `ATU12345678`) +- **Switzerland (CH):** CHE followed by 9 digits and MWST/TVA/IVA suffix (e.g., `CHE123456789MWST`) + +### 4. pdf_abgleich (PDF Comparison) + +Compares XML data against extracted PDF text: + +- Invoice number exact match +- Totals (net, gross, vat_total) within tolerance +- Returns warnings (not errors) for mismatches + +## ZUGFeRD Profiles + +The service detects and reports the following ZUGFeRD 2.x profiles: + +| Profile | Description | +|---------|-------------| +| MINIMUM | Minimal profile with basic invoice data | +| BASIC | Basic profile for simple B2B invoicing | +| BASIC WL | Basic profile with additional buyer data | +| EN16931 | Full profile compliant with EN 16931 standard | +| EXTENDED | Extended profile with additional optional fields | + +The profile is automatically detected from the embedded XML metadata. + +## Configuration + +### Environment Variables + +The service supports the following environment variables: + +| Variable | Default | Description | +|----------|---------|-------------| +| `HOST` | `0.0.0.0` | Host address to bind to | +| `PORT` | `5000` | Port to listen on | +| `LOG_LEVEL` | `INFO` | Logging level (DEBUG, INFO, WARNING, ERROR) | + +### Docker Compose + +The provided `docker-compose.yml` includes: + +- Port mapping: `5000:5000` +- Health check endpoint +- Read-only source mount for development +- Restart policy: `unless-stopped` + +### Nix + +The flake provides: + +- `packages.zugferd-service`: Production build +- `devShells.default`: Development shell with all dependencies + +## NixOS Deployment + +Example NixOS module configuration: + +```nix +{ config, pkgs, ... }: + +let + zugferd-service = (import ./zugferd-service {}).packages.zugferd-service; +in { + systemd.services.zugferd-service = { + enable = true; + description = "ZUGFeRD Invoice Service"; + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; + + serviceConfig = { + ExecStart = "${zugferd-service}/bin/zugferd-service"; + Restart = "always"; + RestartSec = "10"; + DynamicUser = true; + ProtectSystem = "strict"; + ProtectHome = true; + PrivateTmp = true; + NoNewPrivileges = true; + }; + + environment = { + HOST = "127.0.0.1"; + PORT = "5000"; + LOG_LEVEL = "INFO"; + }; + }; +} +``` + +For production, consider adding: + +- Reverse proxy (nginx/caddy) with HTTPS +- Authentication middleware +- Rate limiting +- Logging aggregation + +## Development + +### Running Tests + +```bash +# Run all tests +pytest + +# Run with coverage +pytest --cov=src + +# Run specific test file +pytest tests/test_extract.py +``` + +### Project Structure + +``` +zugferd-service/ +├── src/ +│ ├── __init__.py +│ ├── main.py # FastAPI application and endpoints +│ ├── models.py # Pydantic models for requests/responses +│ ├── extractor.py # ZUGFeRD XML extraction logic +│ ├── validator.py # Invoice validation logic +│ ├── pdf_parser.py # PDF text extraction +│ └── utils.py # Utility functions +├── tests/ +│ ├── test_extract.py +│ ├── test_validate.py +│ └── fixtures/ # Test PDF files +├── pyproject.toml # Project metadata and dependencies +├── Dockerfile # Multi-stage Docker build +├── docker-compose.yml # Docker Compose configuration +└── flake.nix # Nix flake for reproducible builds +``` + +### Dependencies + +**Core:** +- fastapi>=0.109.0 - Web framework +- uvicorn>=0.27.0 - ASGI server +- pydantic>=2.5.0 - Data validation +- factur-x>=2.5 - ZUGFeRD/Factur-X library +- pypdf>=4.0.0 - PDF text extraction +- lxml>=5.0.0 - XML processing + +**Development:** +- pytest>=8.0.0 - Testing framework +- pytest-asyncio>=0.23.0 - Async test support +- httpx>=0.27.0 - HTTP client for testing + +## Troubleshooting + +### Common Issues + +**Service fails to start with "Address already in use"** + +Change the port: +```bash +# Docker +docker run -p 8000:5000 zugferd-service + +# Nix/Python +PORT=8000 python -m src.main +``` + +**Extraction returns "is_zugferd: false"** + +- Verify the PDF contains ZUGFeRD/Factur-X XML attachment +- Check that the file is not password-protected +- Ensure the file is a valid PDF (not corrupt) + +**Validation fails with "missing_required" errors** + +Check that all required fields are present: +- invoice_number +- invoice_date (YYYY-MM-DD format) +- supplier.name and supplier.vat_id +- buyer.name +- Non-zero totals (net, gross, vat_total) +- At least one line item with description, quantity, and unit_price + +**VAT ID validation fails** + +Verify the VAT ID format: +- German: `DE` + 9 digits +- Austrian: `ATU` + 8 digits +- Swiss: `CHE` + 9 digits + `MWST`/`TVA`/`IVA` + +**Docker build is slow** + +Use BuildKit for faster builds: +```bash +DOCKER_BUILDKIT=1 docker build -t zugferd-service . +``` + +### Error Codes + +| Error Code | Description | +|------------|-------------| +| `invalid_base64` | Invalid base64 encoding in request | +| `file_too_large` | PDF exceeds 10MB limit | +| `password_protected_pdf` | PDF is password-protected | +| `invalid_pdf` | File is not a valid PDF | +| `corrupt_pdf` | PDF file is corrupted or unreadable | +| `invalid_xml` | Embedded XML is malformed | + +### Logs + +The service outputs structured JSON logs: + +```json +{ + "timestamp": "2024-02-01T10:30:00Z", + "level": "INFO", + "message": "Extraction completed", + "data": { + "is_zugferd": true, + "profile": "EN16931", + "extraction_time_ms": 45 + } +} +``` + +## License + +MIT + +## Support + +For issues, questions, or contributions, please refer to the project repository. diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..668ac53 --- /dev/null +++ b/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1770115704, + "narHash": "sha256-KHFT9UWOF2yRPlAnSXQJh6uVcgNcWlFqqiAZ7OVlHNc=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "e6eae2ee2110f3d31110d5c222cd395303343b08", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..574f11d --- /dev/null +++ b/flake.nix @@ -0,0 +1,93 @@ +{ + description = "ZUGFeRD REST API Service"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = nixpkgs.legacyPackages.${system}; + pythonPackages = pkgs.python311Packages; + + # factur-x package - not available in nixpkgs, so we package it inline + factur-x = pythonPackages.buildPythonPackage rec { + pname = "factur_x"; + version = "3.8"; + format = "wheel"; + + src = pythonPackages.fetchPypi { + inherit pname version format; + hash = "sha256-alctEgMZw79S2UStnt/bYTigE6h9wqCVpm7i1qc5efs="; + }; + + dependencies = with pythonPackages; [ pypdf lxml ]; + + pythonRelaxDeps = true; + + meta = { + description = + "Python library to generate and read Factur-X invoices"; + license = pkgs.lib.licenses.mit; + }; + }; + + zugferd-service = pythonPackages.buildPythonApplication { + pname = "zugferd-service"; + version = "1.0.0"; + pyproject = true; + src = ./.; + + pythonRelaxDeps = true; + + build-system = [ pythonPackages.hatchling ]; + + dependencies = with pythonPackages; [ + fastapi + uvicorn + pydantic + python-multipart + factur-x + pypdf + lxml + ]; + + nativeCheckInputs = with pythonPackages; [ + pytestCheckHook + pytest-asyncio + httpx + ]; + + passthru = { mainProgram = "zugferd-service"; }; + + meta = { + description = "REST API for ZUGFeRD invoice extraction"; + license = pkgs.lib.licenses.mit; + }; + }; + in { + packages.default = zugferd-service; + packages.zugferd-service = zugferd-service; + + devShells.default = pkgs.mkShell { + packages = [ + (pkgs.python311.withPackages (ps: + with ps; [ + fastapi + uvicorn + pydantic + python-multipart + pypdf + lxml + pytest + pytest-asyncio + httpx + # factur-x - install via pip if not available + ])) + pkgs.python311Packages.pip + ]; + }; + }); +} diff --git a/nix/module.nix b/nix/module.nix new file mode 100644 index 0000000..d9ac881 --- /dev/null +++ b/nix/module.nix @@ -0,0 +1,49 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let cfg = config.services.zugferd-service; +in { + options.services.zugferd-service = { + enable = mkEnableOption "ZUGFeRD REST API service"; + + port = mkOption { + type = types.port; + default = 5000; + description = "Port to listen on"; + }; + + host = mkOption { + type = types.str; + default = "127.0.0.1"; + description = "Host to bind to"; + }; + + package = mkOption { + type = types.package; + default = pkgs.zugferd-service; + description = "Package to use for the service"; + }; + }; + + config = mkIf cfg.enable { + systemd.services.zugferd-service = { + description = "ZUGFeRD REST API Service"; + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + + serviceConfig = { + Type = "simple"; + ExecStart = + "${cfg.package}/bin/zugferd-service --host ${cfg.host} --port ${ + toString cfg.port + }"; + Restart = "on-failure"; + DynamicUser = true; + NoNewPrivileges = true; + ProtectSystem = "strict"; + ProtectHome = true; + }; + }; + }; +}