build(nix): add Nix flake, NixOS module, and README documentation

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
m3tm3re
2026-02-04 20:39:43 +01:00
parent 1a01b46ed6
commit 3eddd66003
4 changed files with 729 additions and 0 deletions

526
README.md
View File

@@ -0,0 +1,526 @@
# ZUGFeRD-Service
A REST API service for extracting and validating ZUGFeRD/Factur-X invoice data from PDF files. Built with FastAPI and Python 3.11+.
## Overview
ZUGFeRD-Service provides a simple HTTP API to:
- Extract structured invoice data from ZUGFeRD-enabled PDFs
- Detect and identify ZUGFeRD profiles (MINIMUM, BASIC, BASIC WL, EN16931, EXTENDED)
- Validate invoice data against business rules and regulatory requirements
- Compare XML data against PDF text content
ZUGFeRD (Zentraler User Guide der Forums Elektronische Rechnung Deutschland) is a German standard for electronic invoices, using the Cross Industry Invoice (CII) XML format embedded in PDF files (also known as Factur-X in France).
## Quick Start
### Docker
The quickest way to get started is using Docker:
```bash
# Build the image
docker build -t zugferd-service .
# Run the service
docker run -p 5000:5000 zugferd-service
# Or use Docker Compose
docker-compose up -d
```
### Nix
If you're using Nix, build and run with:
```bash
# Build the package
nix build .#zugferd-service
# Run the service
nix run .#zugferd-service
# Enter development shell
nix develop
```
### Python (Development)
For local development:
```bash
# Install dependencies
pip install -e .
# Run directly
python -m src.main
# Or use the installed script
zugferd-service
```
The service starts on `http://0.0.0.0:5000` by default.
## API Reference
### GET /health
Check if the service is running.
**Response:**
```json
{
"status": "healthy",
"version": "1.0.0"
}
```
**Example:**
```bash
curl http://localhost:5000/health
```
### POST /extract
Extract ZUGFeRD data from a base64-encoded PDF file.
**Request:**
```json
{
"pdf_base64": "JVBERi0xLjQKJeLjz9MK..."
}
```
**Response (ZUGFeRD PDF):**
```json
{
"is_zugferd": true,
"zugferd_profil": "EN16931",
"xml_raw": "<?xml version=\"1.0\"...?>",
"xml_data": {
"invoice_number": "RE-2024-001",
"invoice_date": "2024-02-01",
"due_date": "2024-02-28",
"supplier": {
"name": "Acme Corp",
"street": "Main Street 123",
"postal_code": "12345",
"city": "Berlin",
"country": "DE",
"vat_id": "DE123456789",
"email": "billing@acme-corp.de"
},
"buyer": {
"name": "Customer GmbH",
"street": "Market Square 5",
"postal_code": "54321",
"city": "Hamburg",
"country": "DE",
"vat_id": "DE987654321"
},
"line_items": [
{
"position": 1,
"article_number": "ART-001",
"article_number_buyer": null,
"description": "Consulting Services",
"quantity": 10.0,
"unit": "HUR",
"unit_price": 100.0,
"line_total": 1000.0,
"vat_rate": 19.0,
"vat_amount": 190.0
}
],
"totals": {
"line_total_sum": 1000.0,
"net": 1000.0,
"vat_total": 190.0,
"gross": 1190.0,
"vat_breakdown": [
{
"rate": 19.0,
"base": 1000.0,
"amount": 190.0
}
]
},
"currency": "EUR",
"payment_terms": {
"iban": "DE89370400440532013000",
"bic": "COBADEFFXXX",
"account_holder": "Acme Corp"
},
"notes": "Payment due within 30 days"
},
"pdf_text": "Invoice RE-2024-001\nAcme Corp...",
"extraction_meta": {
"pages": 2,
"xml_attachment_name": "factur-x.xml",
"extraction_time_ms": 45
}
}
```
**Response (Non-ZUGFeRD PDF):**
```json
{
"is_zugferd": false,
"zugferd_profil": null,
"xml_raw": null,
"xml_data": null,
"pdf_text": "Regular PDF content...",
"extraction_meta": {
"pages": 1,
"xml_attachment_name": null,
"extraction_time_ms": 20
}
}
```
**Example:**
```bash
# Convert PDF to base64 and extract
PDF_BASE64=$(base64 -w 0 invoice.pdf)
curl -X POST http://localhost:5000/extract \
-H "Content-Type: application/json" \
-d "{\"pdf_base64\": \"$PDF_BASE64\"}"
```
### POST /validate
Validate invoice data against business rules and regulatory requirements.
**Request:**
```json
{
"xml_data": {
"invoice_number": "RE-2024-001",
"invoice_date": "2024-02-01",
"due_date": "2024-02-28",
"supplier": {
"name": "Acme Corp",
"vat_id": "DE123456789"
},
"buyer": {
"name": "Customer GmbH",
"vat_id": "DE987654321"
},
"line_items": [
{
"position": 1,
"description": "Consulting Services",
"quantity": 10.0,
"unit": "HUR",
"unit_price": 100.0,
"line_total": 1000.0,
"vat_rate": 19.0
}
],
"totals": {
"line_total_sum": 1000.0,
"net": 1000.0,
"vat_total": 190.0,
"gross": 1190.0,
"vat_breakdown": [
{
"rate": 19.0,
"base": 1000.0,
"amount": 190.0
}
]
},
"currency": "EUR"
},
"pdf_text": "Invoice RE-2024-001\nTotal: 1190.00 EUR",
"checks": ["pflichtfelder", "betraege", "ustid", "pdf_abgleich"]
}
```
**Response:**
```json
{
"result": {
"is_valid": true,
"errors": [],
"warnings": [],
"summary": {
"total_checks": 4,
"checks_passed": 4,
"checks_failed": 0,
"critical_errors": 0,
"warnings": 0
},
"validation_time_ms": 12
}
}
```
**Example:**
```bash
curl -X POST http://localhost:5000/validate \
-H "Content-Type: application/json" \
-d '{
"xml_data": {"invoice_number": "RE-001", ...},
"checks": ["pflichtfelder", "betraege"]
}'
```
## Validation Checks
The service supports four validation checks:
### 1. pflichtfelder (Required Fields)
Validates that all critical invoice fields are present and non-empty:
- **Critical errors:** invoice_number, invoice_date, supplier.name, supplier.vat_id, buyer.name, totals.net, totals.gross, totals.vat_total, line_items array, line item fields
- **Warnings:** due_date, payment_terms.iban
### 2. betraege (Amount Calculations)
Verifies all monetary calculations are correct:
- Line total = quantity × unit_price (for each line item)
- totals.net = sum of all line totals
- VAT breakdown amount = base × (rate/100) (for each VAT entry)
- totals.vat_total = sum of VAT breakdown amounts
- totals.gross = totals.net + totals.vat_total
Uses a tolerance of 0.01 for floating-point comparison.
### 3. ustid (VAT ID Format)
Validates VAT ID format for supported countries:
- **Germany (DE):** DE followed by 9 digits (e.g., `DE123456789`)
- **Austria (AT):** ATU followed by 8 digits (e.g., `ATU12345678`)
- **Switzerland (CH):** CHE followed by 9 digits and MWST/TVA/IVA suffix (e.g., `CHE123456789MWST`)
### 4. pdf_abgleich (PDF Comparison)
Compares XML data against extracted PDF text:
- Invoice number exact match
- Totals (net, gross, vat_total) within tolerance
- Returns warnings (not errors) for mismatches
## ZUGFeRD Profiles
The service detects and reports the following ZUGFeRD 2.x profiles:
| Profile | Description |
|---------|-------------|
| MINIMUM | Minimal profile with basic invoice data |
| BASIC | Basic profile for simple B2B invoicing |
| BASIC WL | Basic profile with additional buyer data |
| EN16931 | Full profile compliant with EN 16931 standard |
| EXTENDED | Extended profile with additional optional fields |
The profile is automatically detected from the embedded XML metadata.
## Configuration
### Environment Variables
The service supports the following environment variables:
| Variable | Default | Description |
|----------|---------|-------------|
| `HOST` | `0.0.0.0` | Host address to bind to |
| `PORT` | `5000` | Port to listen on |
| `LOG_LEVEL` | `INFO` | Logging level (DEBUG, INFO, WARNING, ERROR) |
### Docker Compose
The provided `docker-compose.yml` includes:
- Port mapping: `5000:5000`
- Health check endpoint
- Read-only source mount for development
- Restart policy: `unless-stopped`
### Nix
The flake provides:
- `packages.zugferd-service`: Production build
- `devShells.default`: Development shell with all dependencies
## NixOS Deployment
Example NixOS module configuration:
```nix
{ config, pkgs, ... }:
let
zugferd-service = (import ./zugferd-service {}).packages.zugferd-service;
in {
systemd.services.zugferd-service = {
enable = true;
description = "ZUGFeRD Invoice Service";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${zugferd-service}/bin/zugferd-service";
Restart = "always";
RestartSec = "10";
DynamicUser = true;
ProtectSystem = "strict";
ProtectHome = true;
PrivateTmp = true;
NoNewPrivileges = true;
};
environment = {
HOST = "127.0.0.1";
PORT = "5000";
LOG_LEVEL = "INFO";
};
};
}
```
For production, consider adding:
- Reverse proxy (nginx/caddy) with HTTPS
- Authentication middleware
- Rate limiting
- Logging aggregation
## Development
### Running Tests
```bash
# Run all tests
pytest
# Run with coverage
pytest --cov=src
# Run specific test file
pytest tests/test_extract.py
```
### Project Structure
```
zugferd-service/
├── src/
│ ├── __init__.py
│ ├── main.py # FastAPI application and endpoints
│ ├── models.py # Pydantic models for requests/responses
│ ├── extractor.py # ZUGFeRD XML extraction logic
│ ├── validator.py # Invoice validation logic
│ ├── pdf_parser.py # PDF text extraction
│ └── utils.py # Utility functions
├── tests/
│ ├── test_extract.py
│ ├── test_validate.py
│ └── fixtures/ # Test PDF files
├── pyproject.toml # Project metadata and dependencies
├── Dockerfile # Multi-stage Docker build
├── docker-compose.yml # Docker Compose configuration
└── flake.nix # Nix flake for reproducible builds
```
### Dependencies
**Core:**
- fastapi>=0.109.0 - Web framework
- uvicorn>=0.27.0 - ASGI server
- pydantic>=2.5.0 - Data validation
- factur-x>=2.5 - ZUGFeRD/Factur-X library
- pypdf>=4.0.0 - PDF text extraction
- lxml>=5.0.0 - XML processing
**Development:**
- pytest>=8.0.0 - Testing framework
- pytest-asyncio>=0.23.0 - Async test support
- httpx>=0.27.0 - HTTP client for testing
## Troubleshooting
### Common Issues
**Service fails to start with "Address already in use"**
Change the port:
```bash
# Docker
docker run -p 8000:5000 zugferd-service
# Nix/Python
PORT=8000 python -m src.main
```
**Extraction returns "is_zugferd: false"**
- Verify the PDF contains ZUGFeRD/Factur-X XML attachment
- Check that the file is not password-protected
- Ensure the file is a valid PDF (not corrupt)
**Validation fails with "missing_required" errors**
Check that all required fields are present:
- invoice_number
- invoice_date (YYYY-MM-DD format)
- supplier.name and supplier.vat_id
- buyer.name
- Non-zero totals (net, gross, vat_total)
- At least one line item with description, quantity, and unit_price
**VAT ID validation fails**
Verify the VAT ID format:
- German: `DE` + 9 digits
- Austrian: `ATU` + 8 digits
- Swiss: `CHE` + 9 digits + `MWST`/`TVA`/`IVA`
**Docker build is slow**
Use BuildKit for faster builds:
```bash
DOCKER_BUILDKIT=1 docker build -t zugferd-service .
```
### Error Codes
| Error Code | Description |
|------------|-------------|
| `invalid_base64` | Invalid base64 encoding in request |
| `file_too_large` | PDF exceeds 10MB limit |
| `password_protected_pdf` | PDF is password-protected |
| `invalid_pdf` | File is not a valid PDF |
| `corrupt_pdf` | PDF file is corrupted or unreadable |
| `invalid_xml` | Embedded XML is malformed |
### Logs
The service outputs structured JSON logs:
```json
{
"timestamp": "2024-02-01T10:30:00Z",
"level": "INFO",
"message": "Extraction completed",
"data": {
"is_zugferd": true,
"profile": "EN16931",
"extraction_time_ms": 45
}
}
```
## License
MIT
## Support
For issues, questions, or contributions, please refer to the project repository.

61
flake.lock generated Normal file
View File

@@ -0,0 +1,61 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1770115704,
"narHash": "sha256-KHFT9UWOF2yRPlAnSXQJh6uVcgNcWlFqqiAZ7OVlHNc=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "e6eae2ee2110f3d31110d5c222cd395303343b08",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

93
flake.nix Normal file
View File

@@ -0,0 +1,93 @@
{
description = "ZUGFeRD REST API Service";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = nixpkgs.legacyPackages.${system};
pythonPackages = pkgs.python311Packages;
# factur-x package - not available in nixpkgs, so we package it inline
factur-x = pythonPackages.buildPythonPackage rec {
pname = "factur_x";
version = "3.8";
format = "wheel";
src = pythonPackages.fetchPypi {
inherit pname version format;
hash = "sha256-alctEgMZw79S2UStnt/bYTigE6h9wqCVpm7i1qc5efs=";
};
dependencies = with pythonPackages; [ pypdf lxml ];
pythonRelaxDeps = true;
meta = {
description =
"Python library to generate and read Factur-X invoices";
license = pkgs.lib.licenses.mit;
};
};
zugferd-service = pythonPackages.buildPythonApplication {
pname = "zugferd-service";
version = "1.0.0";
pyproject = true;
src = ./.;
pythonRelaxDeps = true;
build-system = [ pythonPackages.hatchling ];
dependencies = with pythonPackages; [
fastapi
uvicorn
pydantic
python-multipart
factur-x
pypdf
lxml
];
nativeCheckInputs = with pythonPackages; [
pytestCheckHook
pytest-asyncio
httpx
];
passthru = { mainProgram = "zugferd-service"; };
meta = {
description = "REST API for ZUGFeRD invoice extraction";
license = pkgs.lib.licenses.mit;
};
};
in {
packages.default = zugferd-service;
packages.zugferd-service = zugferd-service;
devShells.default = pkgs.mkShell {
packages = [
(pkgs.python311.withPackages (ps:
with ps; [
fastapi
uvicorn
pydantic
python-multipart
pypdf
lxml
pytest
pytest-asyncio
httpx
# factur-x - install via pip if not available
]))
pkgs.python311Packages.pip
];
};
});
}

49
nix/module.nix Normal file
View File

@@ -0,0 +1,49 @@
{ config, lib, pkgs, ... }:
with lib;
let cfg = config.services.zugferd-service;
in {
options.services.zugferd-service = {
enable = mkEnableOption "ZUGFeRD REST API service";
port = mkOption {
type = types.port;
default = 5000;
description = "Port to listen on";
};
host = mkOption {
type = types.str;
default = "127.0.0.1";
description = "Host to bind to";
};
package = mkOption {
type = types.package;
default = pkgs.zugferd-service;
description = "Package to use for the service";
};
};
config = mkIf cfg.enable {
systemd.services.zugferd-service = {
description = "ZUGFeRD REST API Service";
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
Type = "simple";
ExecStart =
"${cfg.package}/bin/zugferd-service --host ${cfg.host} --port ${
toString cfg.port
}";
Restart = "on-failure";
DynamicUser = true;
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
};
};
};
}