From 0db2482bf26f6304c98cf9bd4d85a3d1ae844173 Mon Sep 17 00:00:00 2001
From: m3tm3re <p@m3ta.dev>
Date: Wed, 4 Feb 2026 19:19:44 +0100
Subject: [PATCH] feat(project): initialize ZUGFeRD service with pyproject.toml
 and directory structure

---
 .gitignore                                    |   42 +
 .sisyphus/boulder.json                        |    8 +
 .sisyphus/drafts/zugferd-service.md           |  130 ++
 .../notepads/zugferd-service/decisions.md     |   29 +
 .sisyphus/notepads/zugferd-service/issues.md  |    7 +
 .../notepads/zugferd-service/learnings.md     |   44 +
 .../notepads/zugferd-service/problems.md      |    7 +
 .sisyphus/plans/zugferd-service.md            | 1764 +++++++++++++++++
 pyproject.toml                                |   35 +
 src/__init__.py                               |    3 +
 src/extractor.py                              |    3 +
 src/main.py                                   |   20 +
 src/models.py                                 |    3 +
 src/pdf_parser.py                             |    3 +
 src/utils.py                                  |    3 +
 src/validator.py                              |    3 +
 tests/__init__.py                             |    1 +
 tests/conftest.py                             |    9 +
 tests/fixtures/.gitkeep                       |    0
 19 files changed, 2114 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .sisyphus/boulder.json
 create mode 100644 .sisyphus/drafts/zugferd-service.md
 create mode 100644 .sisyphus/notepads/zugferd-service/decisions.md
 create mode 100644 .sisyphus/notepads/zugferd-service/issues.md
 create mode 100644 .sisyphus/notepads/zugferd-service/learnings.md
 create mode 100644 .sisyphus/notepads/zugferd-service/problems.md
 create mode 100644 .sisyphus/plans/zugferd-service.md
 create mode 100644 pyproject.toml
 create mode 100644 src/__init__.py
 create mode 100644 src/extractor.py
 create mode 100644 src/main.py
 create mode 100644 src/models.py
 create mode 100644 src/pdf_parser.py
 create mode 100644 src/utils.py
 create mode 100644 src/validator.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/conftest.py
 create mode 100644 tests/fixtures/.gitkeep

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e1559e2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,42 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+venv/
+ENV/
+env/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+
+# Sisyphus internal
+.sisyphus/boulder.json
+.sisyphus/notepads/*/issues.md
+.sisyphus/notepads/*/problems.md
diff --git a/.sisyphus/boulder.json b/.sisyphus/boulder.json
new file mode 100644
index 0000000..c91693f
--- /dev/null
+++ b/.sisyphus/boulder.json
@@ -0,0 +1,8 @@
+{
+  "active_plan": "/home/m3tam3re/p/AZ/zugferd-service/.sisyphus/plans/zugferd-service.md",
+  "started_at": "2026-02-04T18:12:44.865Z",
+  "session_ids": [
+    "ses_3d634d45bffeW59tJMBkcCfwyd"
+  ],
+  "plan_name": "zugferd-service"
+}
\ No newline at end of file
diff --git a/.sisyphus/drafts/zugferd-service.md b/.sisyphus/drafts/zugferd-service.md
new file mode 100644
index 0000000..7efaa66
--- /dev/null
+++ b/.sisyphus/drafts/zugferd-service.md
@@ -0,0 +1,130 @@
+# Draft: ZUGFeRD-Service Implementation
+
+## Requirements (confirmed)
+
+### Core Functionality
+- **Purpose**: Python REST API for ZUGFeRD/Factur-X invoice extraction and validation
+- **Framework**: FastAPI (preferred by user)
+- **Runtime**: Python 3.11+
+- **Deployment**: Docker container on NixOS server + native Nix package
+
+### API Endpoints
+1. `GET /health` - Health check endpoint
+2. `POST /extract` - PDF extraction (accepts base64-encoded PDF)
+3. `POST /validate` - Invoice validation (pflichtfelder, betraege, ustid, pdf_abgleich)
+
+### Key Dependencies
+- `factur-x>=2.5` - ZUGFeRD/Factur-X extraction
+- `pypdf>=4.0.0` - PDF text extraction
+- `fastapi>=0.109.0` - API framework
+- `uvicorn>=0.27.0` - ASGI server
+- `pydantic>=2.5.0` - Data models
+- `lxml>=5.0.0` - XML parsing
+- `python-multipart>=0.0.6` - File uploads
+
+### Project Structure (user-specified)
+```
+zugferd-service/
+├── Dockerfile
+├── requirements.txt
+├── README.md
+├── src/
+│   ├── __init__.py
+│   ├── main.py              # FastAPI App + Endpoints
+│   ├── extractor.py         # ZUGFeRD/PDF Extraktion
+│   ├── validator.py         # Validierungslogik
+│   ├── pdf_parser.py        # PDF-Text-Parsing für Abgleich
+│   ├── models.py            # Pydantic Models
+│   └── utils.py             # Hilfsfunktionen
+├── tests/
+│   ├── __init__.py
+│   ├── test_extractor.py
+│   ├── test_validator.py
+│   └── fixtures/
+│       ├── sample_zugferd.pdf
+│       └── sample_no_zugferd.pdf
+└── docker-compose.yml
+```
+
+## Research Findings
+
+### Nix Packaging (from librarian research)
+- Use `buildPythonApplication` for standalone service
+- `pyproject = true` with hatchling/setuptools
+- `pythonRelaxDeps = true` for dependency flexibility
+- mem0 example pattern: custom server script via `postInstall`
+- Consider flake.nix for modern Nix workflow
+
+### factur-x Library (from librarian research)
+- `get_xml_from_pdf()` - Core extraction function
+- `get_level()` / `get_flavor()` - Profile detection
+- Namespaces: rsm, ram, udt for UN/CEFACT CII format
+- Profile levels: minimum, basicwl, basic, en16931, extended
+
+### UN/ECE Unit Codes
+- C62 = Piece, KGM = Kilogram, H87 = Piece (alt)
+- Need comprehensive mapping dictionary
+
+## Technical Decisions
+
+### Python Tooling
+- **PENDING**: Use pyproject.toml (modern) or requirements.txt (legacy)?
+- **PENDING**: Build system: setuptools, hatchling, or poetry-core?
+
+### Nix Approach
+- **PENDING**: Flake-based or traditional Nix expressions?
+- **PENDING**: Include NixOS service module?
+
+### Testing Strategy
+- **PENDING**: TDD or tests-after?
+- **PENDING**: Test framework: pytest (standard choice)
+
+## Scope Boundaries
+
+### INCLUDE
+- All 3 API endpoints as specified
+- All validation checks (pflichtfelder, betraege, ustid, pdf_abgleich)
+- Docker multi-stage build
+- Nix packaging
+- Basic test suite
+- README documentation
+
+### EXCLUDE
+- Online USt-ID validation (only format check)
+- Database/persistence (stateless service)
+- Authentication/authorization
+- Rate limiting
+- Metrics/tracing
+
+## Open Questions (RESOLVED)
+
+1. ✅ **Python project structure**: pyproject.toml with hatchling
+2. ✅ **Build system**: hatchling (modern, Nix-friendly)
+3. ✅ **Nix approach**: Flake-based
+4. ✅ **Testing**: TDD (test-first) with pytest
+5. ✅ **Sample PDFs**: Source from official ZUGFeRD repositories
+
+## Metis Gap Analysis (Reviewed)
+
+### Gaps Classified as MINOR (Auto-Resolved)
+- **UN/ECE unit codes**: Start with common codes (C62, KGM, H87, MTR, LTR, etc.), expand as needed
+- **Tolerance**: Hardcode 0.01 EUR as specified
+- **Validation scope**: Check required fields exist for declared profile
+- **Error codes**: Implement as specified in user's detailed spec
+
+### Gaps Classified as DEFAULTS APPLIED
+- **Authentication**: OPEN (no auth mentioned in spec → stateless public API)
+- **ZUGFeRD profiles**: ALL profiles supported (MINIMUM, BASIC, BASIC WL, EN16931, EXTENDED)
+- **Deployment**: Container-based on NixOS (as per NixOS config section in spec)
+- **PDF text extraction**: REQUIRED for pdf_abgleich check (explicitly in spec)
+- **File size limit**: Handle as error for >10MB (spec mentions this edge case)
+
+### Guardrails (Must NOT Have)
+- NO authentication middleware
+- NO database/persistence
+- NO caching layers
+- NO rate limiting
+- NO metrics endpoints (beyond /health)
+- NO CLI interface
+- NO web UI
+- NO abstraction layers for "future extensibility"
diff --git a/.sisyphus/notepads/zugferd-service/decisions.md b/.sisyphus/notepads/zugferd-service/decisions.md
new file mode 100644
index 0000000..037a730
--- /dev/null
+++ b/.sisyphus/notepads/zugferd-service/decisions.md
@@ -0,0 +1,29 @@
+# Decisions - zugferd-service
+
+This file accumulates architectural and design decisions during execution.
+
+## [2026-02-04T18:12:44.864Z] Session Start
+Planning decisions from Prometheus session.
+
+## Tech Stack
+- Python 3.11+
+- FastAPI for REST API
+- Pydantic for data validation
+- pytest for testing
+- factur-x library for ZUGFeRD extraction
+- pypdf for PDF text extraction
+
+## Scope Lock
+- NO authentication (stateless service)
+- NO database
+- NO caching
+- NO rate limiting
+- 10MB file size limit
+- 0.01 EUR tolerance for calculations
+
+## Validation Rules
+- pflichtfelder: Required fields check
+- betraege: Amount calculations check
+- ustid: VAT ID format check (no online validation)
+- pdf_abgleich: XML vs PDF text comparison
+
diff --git a/.sisyphus/notepads/zugferd-service/issues.md b/.sisyphus/notepads/zugferd-service/issues.md
new file mode 100644
index 0000000..a95d7dc
--- /dev/null
+++ b/.sisyphus/notepads/zugferd-service/issues.md
@@ -0,0 +1,7 @@
+# Issues - zugferd-service
+
+This file accumulates problems, errors, and gotchas during execution.
+
+## [2026-02-04T18:12:44.864Z] Session Start
+Initial session started. No issues yet.
+
diff --git a/.sisyphus/notepads/zugferd-service/learnings.md b/.sisyphus/notepads/zugferd-service/learnings.md
new file mode 100644
index 0000000..24331e0
--- /dev/null
+++ b/.sisyphus/notepads/zugferd-service/learnings.md
@@ -0,0 +1,44 @@
+# Learnings - zugferd-service
+
+This file accumulates conventions, patterns, and learnings during execution.
+
+## [2026-02-04T18:12:44.864Z] Session Start
+Initial session for ZUGFeRD-Service implementation.
+
+## Framework Decisions
+- FastAPI (user preference)
+- Pydantic v2+ for data models
+- pytest with pytest-asyncio for testing
+- hatchling for build system
+
+## Packaging Decisions
+- pyproject.toml (modern Python packaging)
+- Docker multi-stage build
+- Nix flake-based packaging with buildPythonApplication
+
+## Testing Decisions
+- TDD (test-first) approach
+- All acceptance criteria must be verifiable without human intervention
+
+
+## [2026-02-04T19:14:00.000Z] Task 1: Project Scaffold
+
+### hatchling Configuration Pattern
+- For src-layout projects, MUST add `[tool.hatch.build.targets.wheel]` section
+- Without this, hatchling cannot determine which files to ship
+- Config: `packages = ["src"]` to specify src directory
+
+### Nix Environment Considerations
+- Nix store is read-only, standard pip install fails
+- Use temporary venv for verification: `python -m venv /tmp/test_env`
+- Install to venv, verify imports, then cleanup
+
+### Entry Point Documentation
+- Functions referenced in `[project.scripts]` MUST have docstrings
+- These are public API entry points (CLI commands)
+- Example: `zugferd-service = "src.main:run"` -> run() needs docstring
+
+### Module Docstring Convention
+- Module-level docstrings: minimal, one line, describe purpose
+- Entry point function docstrings: Args/Returns style for CLI documentation
+- Both necessary for scaffolding clarity
diff --git a/.sisyphus/notepads/zugferd-service/problems.md b/.sisyphus/notepads/zugferd-service/problems.md
new file mode 100644
index 0000000..65abfb3
--- /dev/null
+++ b/.sisyphus/notepads/zugferd-service/problems.md
@@ -0,0 +1,7 @@
+# Problems - zugferd-service
+
+This file accumulates unresolved blockers.
+
+## [2026-02-04T18:12:44.864Z] Session Start
+No unresolved problems at this time.
+
diff --git a/.sisyphus/plans/zugferd-service.md b/.sisyphus/plans/zugferd-service.md
new file mode 100644
index 0000000..43533f8
--- /dev/null
+++ b/.sisyphus/plans/zugferd-service.md
@@ -0,0 +1,1764 @@
+# ZUGFeRD-Service: Python REST API for Invoice Extraction and Validation
+
+## TL;DR
+
+> **Quick Summary**: Build a stateless FastAPI service that extracts structured data from ZUGFeRD/Factur-X invoices embedded in PDFs, validates invoice correctness, and is packaged for both Docker and NixOS deployment.
+> 
+> **Deliverables**:
+> - Complete FastAPI application with 3 endpoints (`/health`, `/extract`, `/validate`)
+> - Pydantic models for all request/response schemas
+> - ZUGFeRD extraction pipeline using factur-x library
+> - 4 validation checks (pflichtfelder, betraege, ustid, pdf_abgleich)
+> - Docker multi-stage build (production-ready)
+> - Nix flake packaging (buildPythonApplication)
+> - Comprehensive test suite (TDD, pytest)
+> - README documentation with installation guide
+> 
+> **Estimated Effort**: Medium-Large (20-30 tasks)
+> **Parallel Execution**: YES - 4 waves
+> **Critical Path**: Project Setup → Models → Extractor → Validator → API → Packaging
+
+---
+
+## Context
+
+### Original Request
+Build a Python-based REST API service for ZUGFeRD invoice extraction and validation. The service runs as a Docker container on NixOS and should also be packaged as a Nix flake. Key requirements include handling all ZUGFeRD profiles (MINIMUM through EXTENDED), validating invoice amounts and required fields, and comparing XML data against PDF text.
+
+### Interview Summary
+**Key Discussions**:
+- **Framework choice**: FastAPI (user preference, excellent for REST APIs)
+- **Project format**: pyproject.toml with hatchling (modern, Nix-friendly)
+- **Nix packaging**: Flake-based approach with buildPythonApplication
+- **Testing strategy**: TDD with pytest, source official sample PDFs
+- **Authentication**: Open endpoints (no auth required)
+- **File handling**: 10MB limit, reject password-protected PDFs
+- **Calculations**: Standard rounding, 0.01 EUR tolerance
+
+**Research Findings**:
+- factur-x library provides `get_xml_from_pdf()`, `get_level()`, `get_flavor()` for extraction
+- Nix packaging follows mem0 pattern: pyproject=true, pythonRelaxDeps=true
+- UN/ECE unit codes require mapping dictionary (C62→Piece, KGM→Kilogram, etc.)
+- ZUGFeRD profiles detected via GuidelineSpecifiedDocumentContextParameter in XML
+
+### Metis Review
+**Identified Gaps** (addressed):
+- Password-protected PDFs → Reject with error
+- File size limits → 10MB hard limit
+- Decimal precision → Standard rounding
+- Authentication → Confirmed open/stateless
+
+---
+
+## Work Objectives
+
+### Core Objective
+Create a production-ready, stateless REST API that extracts ZUGFeRD/Factur-X invoice data from PDFs, validates invoice correctness against business rules, and provides clear error reporting for invoice processing workflows.
+
+### Concrete Deliverables
+- `src/main.py` - FastAPI application with all endpoints
+- `src/models.py` - Pydantic models for all data structures
+- `src/extractor.py` - ZUGFeRD extraction logic
+- `src/validator.py` - Validation checks implementation
+- `src/pdf_parser.py` - PDF text extraction for cross-validation
+- `src/utils.py` - Helper functions (unit codes, decimal handling)
+- `pyproject.toml` - Project configuration with hatchling
+- `Dockerfile` - Multi-stage production build
+- `docker-compose.yml` - Local development setup
+- `flake.nix` - Nix flake packaging
+- `tests/` - Complete test suite with fixtures
+- `README.md` - Installation and usage documentation
+
+### Definition of Done
+- [ ] `nix build .#zugferd-service` completes without errors
+- [ ] `docker build -t zugferd-service .` produces image <500MB
+- [ ] `pytest` runs all tests with 100% pass rate
+- [ ] `curl http://localhost:5000/health` returns `{"status": "healthy", "version": "1.0.0"}`
+- [ ] All ZUGFeRD profiles correctly detected from sample PDFs
+- [ ] All validation checks produce expected errors/warnings
+
+### Must Have
+- All 3 API endpoints as specified
+- Support for all ZUGFeRD profiles (MINIMUM, BASIC, BASIC WL, EN16931, EXTENDED)
+- All 4 validation checks (pflichtfelder, betraege, ustid, pdf_abgleich)
+- Structured JSON error responses with error codes
+- UTF-8 encoding throughout
+- Structured JSON logging
+
+### Must NOT Have (Guardrails)
+- ❌ Authentication middleware (open endpoints)
+- ❌ Database or persistence layer (stateless only)
+- ❌ Caching layers
+- ❌ Rate limiting
+- ❌ Metrics endpoints beyond /health
+- ❌ CLI interface
+- ❌ Web UI or admin dashboard
+- ❌ Batch processing or queue system
+- ❌ Online USt-ID validation (format check only)
+- ❌ Support for ZUGFeRD 1.x (only 2.x)
+- ❌ Abstraction layers for "future extensibility"
+
+---
+
+## Verification Strategy
+
+> **UNIVERSAL RULE: ZERO HUMAN INTERVENTION**
+>
+> ALL tasks in this plan MUST be verifiable WITHOUT any human action.
+> Every verification step is executed by the agent using tools (curl, pytest, nix, docker).
+
+### Test Decision
+- **Infrastructure exists**: NO (new project)
+- **Automated tests**: TDD (test-first)
+- **Framework**: pytest with pytest-asyncio
+
+### TDD Workflow
+Each implementation task follows RED-GREEN-REFACTOR:
+1. **RED**: Write failing test first
+2. **GREEN**: Implement minimum code to pass
+3. **REFACTOR**: Clean up while keeping tests green
+
+### Test Infrastructure Setup (Task 0)
+```bash
+# Install pytest and test dependencies
+pip install pytest pytest-asyncio httpx
+
+# Verify pytest works
+pytest --version
+
+# Run initial test
+pytest tests/ -v
+```
+
+### Agent-Executed QA Scenarios (MANDATORY)
+
+All verifications use:
+- **API Testing**: `curl` commands with JSON assertions
+- **Docker Testing**: `docker build` and `docker run` commands
+- **Nix Testing**: `nix build` and `nix run` commands
+- **Unit Testing**: `pytest` with specific test file targets
+
+---
+
+## Execution Strategy
+
+### Parallel Execution Waves
+
+```
+Wave 1 (Start Immediately):
+├── Task 1: Project scaffold (pyproject.toml, directories)
+├── Task 2: Download ZUGFeRD sample PDFs
+└── Task 3: Create Pydantic models
+
+Wave 2 (After Wave 1):
+├── Task 4: Extractor unit tests + implementation
+├── Task 5: PDF parser unit tests + implementation
+└── Task 6: Utils (unit codes, decimal handling)
+
+Wave 3 (After Wave 2):
+├── Task 7: Validator unit tests + implementation
+├── Task 8: FastAPI app structure
+└── Task 9: Health endpoint
+
+Wave 4 (After Wave 3):
+├── Task 10: Extract endpoint
+├── Task 11: Validate endpoint
+└── Task 12: Error handling middleware
+
+Wave 5 (After Wave 4):
+├── Task 13: Integration tests
+├── Task 14: Dockerfile
+└── Task 15: docker-compose.yml
+
+Wave 6 (After Wave 5):
+├── Task 16: Nix flake packaging
+├── Task 17: NixOS module example
+└── Task 18: README documentation
+
+Critical Path: Task 1 → Task 4 → Task 7 → Task 10 → Task 13 → Task 16
+```
+
+### Dependency Matrix
+
+| Task | Depends On | Blocks | Can Parallelize With |
+|------|------------|--------|---------------------|
+| 1 | None | 2-18 | None (must be first) |
+| 2 | 1 | 4, 5 | 3 |
+| 3 | 1 | 4, 5, 7 | 2 |
+| 4 | 2, 3 | 7, 10 | 5, 6 |
+| 5 | 2 | 7 | 4, 6 |
+| 6 | 1 | 4, 7 | 4, 5 |
+| 7 | 4, 5, 6 | 11 | 8, 9 |
+| 8 | 3 | 9, 10, 11 | 7 |
+| 9 | 8 | 10, 11 | 7 |
+| 10 | 4, 8, 9 | 13 | 11, 12 |
+| 11 | 7, 8, 9 | 13 | 10, 12 |
+| 12 | 8 | 13 | 10, 11 |
+| 13 | 10, 11, 12 | 14, 16 | None |
+| 14 | 13 | 16 | 15 |
+| 15 | 13 | None | 14 |
+| 16 | 14 | 17, 18 | None |
+| 17 | 16 | 18 | None |
+| 18 | 16, 17 | None | None |
+
+---
+
+## TODOs
+
+### Wave 1: Project Foundation
+
+- [x] 1. Project Scaffold and Configuration
+
+  **What to do**:
+  - Create project directory structure as specified
+  - Create `pyproject.toml` with hatchling build system
+  - Configure pytest in pyproject.toml
+  - Create `src/__init__.py` and `tests/__init__.py`
+  - Set up Python version requirement (3.11+)
+
+  **Must NOT do**:
+  - Do NOT add optional dependencies
+  - Do NOT create README yet (separate task)
+  - Do NOT add pre-commit hooks
+
+  **Recommended Agent Profile**:
+  - **Category**: `quick`
+    - Reason: Straightforward file creation with established patterns
+  - **Skills**: [`git-master`]
+    - `git-master`: For proper initial commit after scaffold
+
+  **Parallelization**:
+  - **Can Run In Parallel**: NO
+  - **Parallel Group**: Sequential (must be first)
+  - **Blocks**: All subsequent tasks
+  - **Blocked By**: None
+
+  **References**:
+  - **Pattern References**: mem0 pyproject.toml pattern (from librarian research)
+  - **External References**: https://hatch.pypa.io/latest/config/metadata/ (hatchling docs)
+
+  **Files to Create**:
+  ```
+  pyproject.toml
+  src/__init__.py
+  src/main.py (placeholder)
+  src/models.py (placeholder)
+  src/extractor.py (placeholder)
+  src/validator.py (placeholder)
+  src/pdf_parser.py (placeholder)
+  src/utils.py (placeholder)
+  tests/__init__.py
+  tests/conftest.py (pytest fixtures)
+  tests/fixtures/.gitkeep
+  ```
+
+  **pyproject.toml Structure**:
+  ```toml
+  [build-system]
+  requires = ["hatchling"]
+  build-backend = "hatchling.build"
+
+  [project]
+  name = "zugferd-service"
+  version = "1.0.0"
+  description = "REST API for ZUGFeRD invoice extraction and validation"
+  requires-python = ">=3.11"
+  dependencies = [
+      "fastapi>=0.109.0",
+      "uvicorn>=0.27.0",
+      "python-multipart>=0.0.6",
+      "factur-x>=2.5",
+      "pypdf>=4.0.0",
+      "pydantic>=2.5.0",
+      "lxml>=5.0.0",
+  ]
+
+  [project.optional-dependencies]
+  dev = [
+      "pytest>=8.0.0",
+      "pytest-asyncio>=0.23.0",
+      "httpx>=0.27.0",
+  ]
+
+  [project.scripts]
+  zugferd-service = "src.main:run"
+
+  [tool.pytest.ini_options]
+  asyncio_mode = "auto"
+  testpaths = ["tests"]
+  ```
+
+  **Acceptance Criteria**:
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: pyproject.toml is valid
+    Tool: Bash
+    Steps:
+      1. python -c "import tomllib; tomllib.load(open('pyproject.toml', 'rb'))"
+      2. Assert: Exit code 0
+    Expected Result: TOML parses without error
+    Evidence: Command output captured
+
+  Scenario: Project structure exists
+    Tool: Bash
+    Steps:
+      1. ls -la src/
+      2. Assert: main.py, models.py, extractor.py, validator.py, pdf_parser.py, utils.py exist
+      3. ls -la tests/
+      4. Assert: conftest.py, fixtures/ exist
+    Expected Result: All required files present
+    Evidence: Directory listing captured
+
+  Scenario: Dependencies install correctly
+    Tool: Bash
+    Steps:
+      1. pip install -e ".[dev]"
+      2. Assert: Exit code 0
+      3. python -c "import fastapi; import facturx; import pypdf"
+      4. Assert: Exit code 0
+    Expected Result: All dependencies importable
+    Evidence: pip output captured
+  ```
+
+  **Commit**: YES
+  - Message: `feat(project): initialize ZUGFeRD service with pyproject.toml and directory structure`
+  - Files: All created files
+  - Pre-commit: `python -c "import tomllib; tomllib.load(open('pyproject.toml', 'rb'))"`
+
+---
+
+- [ ] 2. Download ZUGFeRD Sample PDFs
+
+  **What to do**:
+  - Download official ZUGFeRD sample PDFs from FeRD/ZUGFeRD repositories
+  - Include samples for: MINIMUM, BASIC, BASIC WL, EN16931, EXTENDED profiles
+  - Include a non-ZUGFeRD PDF for negative testing
+  - Store in `tests/fixtures/`
+  - Create fixture manifest documenting each file
+
+  **Must NOT do**:
+  - Do NOT create synthetic PDFs
+  - Do NOT download more than 10 samples (keep focused)
+
+  **Recommended Agent Profile**:
+  - **Category**: `quick`
+    - Reason: Download and organize files
+  - **Skills**: []
+    - No special skills needed
+
+  **Parallelization**:
+  - **Can Run In Parallel**: YES
+  - **Parallel Group**: Wave 1 (with Task 3)
+  - **Blocks**: Tasks 4, 5
+  - **Blocked By**: Task 1
+
+  **References**:
+  - **External References**: 
+    - https://www.ferd-net.de/download/testrechnungen
+    - https://github.com/ZUGFeRD/mustangproject/tree/master/Mustang-CLI/src/test/resources
+    - https://github.com/akretion/factur-x/tree/master/tests
+
+  **Acceptance Criteria**:
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: Sample PDFs exist for all profiles
+    Tool: Bash
+    Steps:
+      1. ls tests/fixtures/*.pdf | wc -l
+      2. Assert: At least 5 PDF files
+      3. file tests/fixtures/*.pdf
+      4. Assert: All files identified as "PDF document"
+    Expected Result: Multiple valid PDF files in fixtures
+    Evidence: File listing captured
+
+  Scenario: Fixture manifest documents samples
+    Tool: Bash
+    Steps:
+      1. cat tests/fixtures/MANIFEST.md
+      2. Assert: Contains entries for MINIMUM, BASIC, EN16931, EXTENDED
+    Expected Result: Manifest describes all test fixtures
+    Evidence: Manifest content captured
+  ```
+
+  **Commit**: YES
+  - Message: `test(fixtures): add official ZUGFeRD sample PDFs for all profiles`
+  - Files: `tests/fixtures/*.pdf`, `tests/fixtures/MANIFEST.md`
+
+---
+
+- [ ] 3. Create Pydantic Models
+
+  **What to do**:
+  - Define all Pydantic models as per API specification
+  - Models: ExtractRequest, ExtractResponse, ValidateRequest, ValidateResponse
+  - Nested models: Supplier, Buyer, LineItem, Totals, VatBreakdown, PaymentTerms
+  - Error models: ErrorResponse, ValidationError
+  - Add field validators where appropriate (e.g., VAT ID format)
+
+  **Must NOT do**:
+  - Do NOT add ORM mappings (no database)
+  - Do NOT add serialization beyond Pydantic defaults
+
+  **Recommended Agent Profile**:
+  - **Category**: `quick`
+    - Reason: Straightforward Pydantic model definitions from spec
+  - **Skills**: []
+    - Standard Python work
+
+  **Parallelization**:
+  - **Can Run In Parallel**: YES
+  - **Parallel Group**: Wave 1 (with Task 2)
+  - **Blocks**: Tasks 4, 5, 7
+  - **Blocked By**: Task 1
+
+  **References**:
+  - **API Specification**: User's detailed spec document (request/response schemas)
+  - **Pattern References**: https://docs.pydantic.dev/latest/concepts/models/
+  
+  **Key Models from Spec**:
+  ```python
+  class Supplier(BaseModel):
+      name: str
+      street: str | None = None
+      postal_code: str | None = None
+      city: str | None = None
+      country: str | None = None
+      vat_id: str | None = None
+      email: str | None = None
+
+  class LineItem(BaseModel):
+      position: int
+      article_number: str | None = None
+      article_number_buyer: str | None = None
+      description: str
+      quantity: float
+      unit: str  # Human-readable, translated from UN/ECE code
+      unit_price: float
+      line_total: float
+      vat_rate: float | None = None
+      vat_amount: float | None = None
+
+  class ExtractResponse(BaseModel):
+      is_zugferd: bool
+      zugferd_profil: str | None = None
+      xml_raw: str | None = None
+      xml_data: XmlData | None = None
+      pdf_text: str | None = None
+      extraction_meta: ExtractionMeta
+  ```
+
+  **Acceptance Criteria**:
+
+  **TDD - Write Tests First:**
+  ```python
+  # tests/test_models.py
+  def test_extract_response_zugferd():
+      response = ExtractResponse(
+          is_zugferd=True,
+          zugferd_profil="EN16931",
+          xml_raw="<?xml ...",
+          # ... full data
+      )
+      assert response.is_zugferd is True
+      assert response.zugferd_profil == "EN16931"
+
+  def test_extract_response_non_zugferd():
+      response = ExtractResponse(
+          is_zugferd=False,
+          pdf_text="Invoice text...",
+          extraction_meta=ExtractionMeta(pages=1, extraction_time_ms=50)
+      )
+      assert response.xml_data is None
+
+  def test_line_item_validation():
+      item = LineItem(
+          position=1,
+          description="Widget",
+          quantity=10.0,
+          unit="Piece",
+          unit_price=9.99,
+          line_total=99.90
+      )
+      assert item.line_total == 99.90
+  ```
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: Models pass all unit tests
+    Tool: Bash
+    Steps:
+      1. pytest tests/test_models.py -v
+      2. Assert: All tests pass (exit code 0)
+    Expected Result: 100% test pass rate
+    Evidence: pytest output captured
+
+  Scenario: Models serialize to JSON correctly
+    Tool: Bash
+    Steps:
+      1. python -c "from src.models import ExtractResponse, ExtractionMeta; r = ExtractResponse(is_zugferd=False, extraction_meta=ExtractionMeta(pages=1, extraction_time_ms=50)); print(r.model_dump_json())"
+      2. Assert: Valid JSON output containing "is_zugferd": false
+    Expected Result: JSON serialization works
+    Evidence: JSON output captured
+  ```
+
+  **Commit**: YES
+  - Message: `feat(models): add Pydantic models for all API request/response schemas`
+  - Files: `src/models.py`, `tests/test_models.py`
+  - Pre-commit: `pytest tests/test_models.py`
+
+---
+
+### Wave 2: Core Extraction Logic
+
+- [ ] 4. ZUGFeRD Extractor Implementation (TDD)
+
+  **What to do**:
+  - Write tests first using sample PDFs from fixtures
+  - Implement `extract_zugferd()` function using factur-x library
+  - Handle profile detection (get_level, get_flavor)
+  - Parse XML to structured data matching models
+  - Handle non-ZUGFeRD PDFs gracefully
+  - Handle errors (corrupt PDF, password-protected)
+
+  **Must NOT do**:
+  - Do NOT implement custom XML parsing if factur-x provides it
+  - Do NOT cache extracted data
+
+  **Recommended Agent Profile**:
+  - **Category**: `unspecified-high`
+    - Reason: Core business logic requiring careful implementation
+  - **Skills**: [`systematic-debugging`]
+    - `systematic-debugging`: For handling edge cases in PDF extraction
+
+  **Parallelization**:
+  - **Can Run In Parallel**: YES
+  - **Parallel Group**: Wave 2 (with Tasks 5, 6)
+  - **Blocks**: Tasks 7, 10
+  - **Blocked By**: Tasks 2, 3
+
+  **References**:
+  - **Library Docs**: https://github.com/akretion/factur-x
+  - **Pattern References**: factur-x get_xml_from_pdf usage (from librarian research)
+  - **Test Fixtures**: `tests/fixtures/*.pdf`
+
+  **Key Functions**:
+  ```python
+  from facturx import get_xml_from_pdf, get_level, get_flavor
+
+  def extract_zugferd(pdf_bytes: bytes) -> ExtractResponse:
+      """Extract ZUGFeRD data from PDF bytes."""
+      # 1. Check file size (<10MB)
+      # 2. Try to extract XML using factur-x
+      # 3. Detect profile and flavor
+      # 4. Parse XML to structured data
+      # 5. Extract PDF text for pdf_abgleich
+      # 6. Return ExtractResponse
+  ```
+
+  **Acceptance Criteria**:
+
+  **TDD - Write Tests First:**
+  ```python
+  # tests/test_extractor.py
+  def test_extract_en16931_profile(sample_en16931_pdf):
+      result = extract_zugferd(sample_en16931_pdf)
+      assert result.is_zugferd is True
+      assert result.zugferd_profil == "EN16931"
+      assert result.xml_data is not None
+      assert result.xml_data.invoice_number is not None
+
+  def test_extract_non_zugferd_pdf(sample_plain_pdf):
+      result = extract_zugferd(sample_plain_pdf)
+      assert result.is_zugferd is False
+      assert result.xml_data is None
+      assert result.pdf_text is not None
+
+  def test_extract_corrupt_pdf():
+      with pytest.raises(ExtractionError) as exc:
+          extract_zugferd(b"not a pdf")
+      assert exc.value.error_code == "invalid_pdf"
+
+  def test_file_size_limit():
+      large_pdf = b"x" * (11 * 1024 * 1024)  # 11MB
+      with pytest.raises(ExtractionError) as exc:
+          extract_zugferd(large_pdf)
+      assert exc.value.error_code == "file_too_large"
+  ```
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: Extractor passes all unit tests
+    Tool: Bash
+    Steps:
+      1. pytest tests/test_extractor.py -v
+      2. Assert: All tests pass
+    Expected Result: 100% test pass rate
+    Evidence: pytest output captured
+
+  Scenario: EN16931 profile correctly detected
+    Tool: Bash
+    Steps:
+      1. python -c "
+         from src.extractor import extract_zugferd
+         with open('tests/fixtures/sample_en16931.pdf', 'rb') as f:
+             result = extract_zugferd(f.read())
+         print(f'Profile: {result.zugferd_profil}')
+         print(f'Invoice: {result.xml_data.invoice_number}')"
+      2. Assert: Output contains "Profile: EN16931" or "Profile: en16931"
+    Expected Result: Profile correctly identified
+    Evidence: Script output captured
+
+  Scenario: Non-ZUGFeRD PDF handled gracefully
+    Tool: Bash
+    Steps:
+      1. python -c "
+         from src.extractor import extract_zugferd
+         with open('tests/fixtures/sample_no_zugferd.pdf', 'rb') as f:
+             result = extract_zugferd(f.read())
+         assert result.is_zugferd == False
+         assert result.pdf_text is not None
+         print('OK: Non-ZUGFeRD handled correctly')"
+      2. Assert: Output contains "OK:"
+    Expected Result: Graceful handling
+    Evidence: Script output captured
+  ```
+
+  **Commit**: YES
+  - Message: `feat(extractor): implement ZUGFeRD extraction with profile detection`
+  - Files: `src/extractor.py`, `tests/test_extractor.py`
+  - Pre-commit: `pytest tests/test_extractor.py`
+
+---
+
+- [ ] 5. PDF Text Parser Implementation (TDD)
+
+  **What to do**:
+  - Write tests first with expected extraction patterns
+  - Implement PDF text extraction using pypdf
+  - Create regex patterns for extracting key values from text
+  - Extract: invoice_number, invoice_date, amounts (net, gross, vat)
+  - Return confidence scores for each extraction
+
+  **Must NOT do**:
+  - Do NOT use OCR (text extraction only)
+  - Do NOT parse images in PDFs
+
+  **Recommended Agent Profile**:
+  - **Category**: `unspecified-high`
+    - Reason: Regex pattern development requires careful testing
+  - **Skills**: [`systematic-debugging`]
+    - `systematic-debugging`: For developing and testing regex patterns
+
+  **Parallelization**:
+  - **Can Run In Parallel**: YES
+  - **Parallel Group**: Wave 2 (with Tasks 4, 6)
+  - **Blocks**: Task 7
+  - **Blocked By**: Task 2
+
+  **References**:
+  - **User Spec**: Regex patterns provided in spec (invoice_number, gross_amount patterns)
+  - **Library Docs**: https://pypdf.readthedocs.io/en/stable/
+
+  **Key Patterns from Spec**:
+  ```python
+  EXTRACTION_PATTERNS = {
+      "invoice_number": [
+          r"Rechnungs?-?(?:Nr|Nummer)[.:\s]*([A-Z0-9\-]+)",
+          r"Invoice\s*(?:No|Number)?[.:\s]*([A-Z0-9\-]+)",
+          r"Beleg-?Nr[.:\s]*([A-Z0-9\-]+)"
+      ],
+      "gross_amount": [
+          r"Brutto[:\s]*([0-9.,]+)\s*(?:EUR|€)?",
+          r"Gesamtbetrag[:\s]*([0-9.,]+)",
+          r"Total[:\s]*([0-9.,]+)\s*(?:EUR|€)?"
+      ],
+      # ... more patterns
+  }
+  ```
+
+  **Acceptance Criteria**:
+
+  **TDD - Write Tests First:**
+  ```python
+  # tests/test_pdf_parser.py
+  def test_extract_invoice_number():
+      text = "Rechnung\nRechnungs-Nr.: RE-2025-001234\nDatum: 04.02.2025"
+      result = extract_from_text(text)
+      assert result["invoice_number"] == "RE-2025-001234"
+      assert result["invoice_number_confidence"] >= 0.9
+
+  def test_extract_amounts():
+      text = "Netto: 100,00 EUR\nMwSt 19%: 19,00 EUR\nBrutto: 119,00 EUR"
+      result = extract_from_text(text)
+      assert result["gross_amount"] == 119.00
+      assert result["net_amount"] == 100.00
+
+  def test_german_number_format():
+      text = "Gesamtbetrag: 1.234,56 €"
+      result = extract_from_text(text)
+      assert result["gross_amount"] == 1234.56
+  ```
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: PDF parser passes all unit tests
+    Tool: Bash
+    Steps:
+      1. pytest tests/test_pdf_parser.py -v
+      2. Assert: All tests pass
+    Expected Result: 100% test pass rate
+    Evidence: pytest output captured
+
+  Scenario: Real PDF text extraction works
+    Tool: Bash
+    Steps:
+      1. python -c "
+         from src.pdf_parser import extract_text_from_pdf, extract_from_text
+         with open('tests/fixtures/sample_en16931.pdf', 'rb') as f:
+             text = extract_text_from_pdf(f.read())
+         print(f'Extracted {len(text)} characters')
+         result = extract_from_text(text)
+         print(f'Invoice: {result.get(\"invoice_number\", \"NOT FOUND\")}')"
+      2. Assert: Output shows extracted characters and invoice number
+    Expected Result: Text extraction works on real PDFs
+    Evidence: Script output captured
+  ```
+
+  **Commit**: YES
+  - Message: `feat(pdf-parser): implement PDF text extraction with regex patterns`
+  - Files: `src/pdf_parser.py`, `tests/test_pdf_parser.py`
+  - Pre-commit: `pytest tests/test_pdf_parser.py`
+
+---
+
+- [ ] 6. Utility Functions Implementation
+
+  **What to do**:
+  - Create UN/ECE unit code mapping dictionary
+  - Implement decimal rounding helper (2 decimal places, standard rounding)
+  - Implement tolerance comparison function (0.01 EUR)
+  - Add German date format parser
+
+  **Must NOT do**:
+  - Do NOT create configurable tolerance (hardcode 0.01)
+  - Do NOT add unit codes beyond common ones (expand later if needed)
+
+  **Recommended Agent Profile**:
+  - **Category**: `quick`
+    - Reason: Simple utility functions
+  - **Skills**: []
+    - Standard Python work
+
+  **Parallelization**:
+  - **Can Run In Parallel**: YES
+  - **Parallel Group**: Wave 2 (with Tasks 4, 5)
+  - **Blocks**: Tasks 4, 7
+  - **Blocked By**: Task 1
+
+  **References**:
+  - **UN/ECE Codes**: https://docs.peppol.eu/poacc/upgrade-3/codelist/UNECERec20/
+
+  **Unit Code Dictionary**:
+  ```python
+  UNECE_UNIT_CODES = {
+      "C62": "Stück",
+      "H87": "Stück",
+      "KGM": "Kilogramm",
+      "GRM": "Gramm",
+      "TNE": "Tonne",
+      "MTR": "Meter",
+      "KMT": "Kilometer",
+      "MTK": "Quadratmeter",
+      "LTR": "Liter",
+      "MLT": "Milliliter",
+      "DAY": "Tag",
+      "HUR": "Stunde",
+      "MON": "Monat",
+      "ANN": "Jahr",
+      "SET": "Set",
+      "PCE": "Stück",
+      "EA": "Stück",
+  }
+  ```
+
+  **Acceptance Criteria**:
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: Unit code translation works
+    Tool: Bash
+    Steps:
+      1. python -c "
+         from src.utils import translate_unit_code
+         assert translate_unit_code('C62') == 'Stück'
+         assert translate_unit_code('KGM') == 'Kilogramm'
+         assert translate_unit_code('UNKNOWN') == 'UNKNOWN'
+         print('OK: Unit codes translate correctly')"
+      2. Assert: Output contains "OK:"
+    Expected Result: All translations correct
+    Evidence: Script output captured
+
+  Scenario: Decimal comparison with tolerance
+    Tool: Bash
+    Steps:
+      1. python -c "
+         from src.utils import amounts_match
+         assert amounts_match(100.00, 100.00) == True
+         assert amounts_match(100.00, 100.005) == True  # Within 0.01
+         assert amounts_match(100.00, 100.02) == False  # Outside tolerance
+         print('OK: Tolerance comparison works')"
+      2. Assert: Output contains "OK:"
+    Expected Result: Tolerance logic correct
+    Evidence: Script output captured
+  ```
+
+  **Commit**: YES
+  - Message: `feat(utils): add unit code mapping and decimal utilities`
+  - Files: `src/utils.py`, `tests/test_utils.py`
+  - Pre-commit: `pytest tests/test_utils.py`
+
+---
+
+### Wave 3: Validation Logic
+
+- [ ] 7. Validator Implementation (TDD)
+
+  **What to do**:
+  - Write tests first for each validation check
+  - Implement all 4 validation checks:
+    1. `pflichtfelder` - Required fields check
+    2. `betraege` - Amount calculations check
+    3. `ustid` - VAT ID format check
+    4. `pdf_abgleich` - XML vs PDF comparison
+  - Return structured ValidationResult with errors and warnings
+
+  **Must NOT do**:
+  - Do NOT implement online USt-ID validation
+  - Do NOT add validation checks beyond spec
+
+  **Recommended Agent Profile**:
+  - **Category**: `unspecified-high`
+    - Reason: Core business logic with multiple validation rules
+  - **Skills**: [`systematic-debugging`]
+    - `systematic-debugging`: For handling edge cases in validation logic
+
+  **Parallelization**:
+  - **Can Run In Parallel**: YES
+  - **Parallel Group**: Wave 3 (with Tasks 8, 9)
+  - **Blocks**: Task 11
+  - **Blocked By**: Tasks 4, 5, 6
+
+  **References**:
+  - **User Spec**: Detailed validation logic tables
+  - **Pattern References**: src/models.py ValidationError model
+
+  **Validation Rules from Spec**:
+
+  **pflichtfelder (Required Fields)**:
+  | Field | Severity |
+  |-------|----------|
+  | invoice_number | critical |
+  | invoice_date | critical |
+  | supplier.name | critical |
+  | supplier.vat_id | critical |
+  | buyer.name | critical |
+  | totals.net | critical |
+  | totals.gross | critical |
+  | totals.vat_total | critical |
+  | due_date | warning |
+  | payment_terms.iban | warning |
+  | line_items (min 1) | critical |
+
+  **betraege (Calculations)**:
+  - line_total ≈ quantity × unit_price (±0.01)
+  - totals.net ≈ Σ(line_items.line_total) (±0.01)
+  - vat_breakdown.amount ≈ base × (rate/100) (±0.01)
+  - totals.vat_total ≈ Σ(vat_breakdown.amount) (±0.01)
+  - totals.gross ≈ totals.net + totals.vat_total (±0.01)
+
+  **ustid (VAT ID Format)**:
+  | Country | Regex |
+  |---------|-------|
+  | DE | `^DE[0-9]{9}$` |
+  | AT | `^ATU[0-9]{8}$` |
+  | CH | `^CHE[0-9]{9}(MWST\|TVA\|IVA)$` |
+
+  **Acceptance Criteria**:
+
+  **TDD - Write Tests First:**
+  ```python
+  # tests/test_validator.py
+  def test_pflichtfelder_missing_invoice_number():
+      data = XmlData(invoice_number=None, ...)
+      result = validate_pflichtfelder(data)
+      assert any(e.field == "invoice_number" for e in result.errors)
+      assert any(e.severity == "critical" for e in result.errors)
+
+  def test_betraege_calculation_mismatch():
+      data = XmlData(
+          line_items=[LineItem(quantity=10, unit_price=9.99, line_total=100.00)],  # Wrong!
+          ...
+      )
+      result = validate_betraege(data)
+      assert any(e.error_code == "calculation_mismatch" for e in result.errors)
+      assert any("99.90" in e.message for e in result.errors)
+
+  def test_ustid_valid_german():
+      result = validate_ustid("DE123456789")
+      assert result.is_valid is True
+
+  def test_ustid_invalid_format():
+      result = validate_ustid("DE12345")  # Too short
+      assert result.is_valid is False
+      assert result.error_code == "invalid_format"
+
+  def test_pdf_abgleich_mismatch():
+      xml_data = XmlData(invoice_number="RE-001", totals=Totals(gross=118.88))
+      pdf_values = {"invoice_number": "RE-002", "gross_amount": 118.88}
+      result = validate_pdf_abgleich(xml_data, pdf_values)
+      assert any(e.field == "invoice_number" for e in result.errors)
+  ```
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: Validator passes all unit tests
+    Tool: Bash
+    Steps:
+      1. pytest tests/test_validator.py -v
+      2. Assert: All tests pass
+    Expected Result: 100% test pass rate
+    Evidence: pytest output captured
+
+  Scenario: All validation checks work together
+    Tool: Bash
+    Steps:
+      1. python -c "
+         from src.validator import validate_invoice
+         from src.models import XmlData, ValidateRequest
+         # Create a request with known issues
+         request = ValidateRequest(
+             xml_data={'invoice_number': None, ...},
+             checks=['pflichtfelder', 'betraege']
+         )
+         result = validate_invoice(request)
+         print(f'Errors: {len(result.errors)}')"
+      2. Assert: Script runs without error, shows error count
+    Expected Result: Validator processes all checks
+    Evidence: Script output captured
+  ```
+
+  **Commit**: YES
+  - Message: `feat(validator): implement all validation checks (pflichtfelder, betraege, ustid, pdf_abgleich)`
+  - Files: `src/validator.py`, `tests/test_validator.py`
+  - Pre-commit: `pytest tests/test_validator.py`
+
+---
+
+### Wave 3 (continued): API Foundation
+
+- [ ] 8. FastAPI Application Structure
+
+  **What to do**:
+  - Create FastAPI app instance in main.py
+  - Configure exception handlers for custom errors
+  - Set up structured JSON logging
+  - Add CORS middleware (for local development)
+  - Configure app metadata (title, version, description)
+
+  **Must NOT do**:
+  - Do NOT add authentication middleware
+  - Do NOT add rate limiting
+
+  **Recommended Agent Profile**:
+  - **Category**: `quick`
+    - Reason: Standard FastAPI setup
+  - **Skills**: []
+    - Standard FastAPI patterns
+
+  **Parallelization**:
+  - **Can Run In Parallel**: YES
+  - **Parallel Group**: Wave 3 (with Tasks 7, 9)
+  - **Blocks**: Tasks 9, 10, 11
+  - **Blocked By**: Task 3
+
+  **References**:
+  - **FastAPI Docs**: https://fastapi.tiangolo.com/
+  - **Pattern References**: User spec error handling table
+
+  **Acceptance Criteria**:
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: FastAPI app starts
+    Tool: Bash
+    Steps:
+      1. timeout 5 uvicorn src.main:app --port 5001 &
+      2. sleep 2
+      3. curl -s http://localhost:5001/openapi.json | head -c 100
+      4. Assert: Output contains "openapi"
+      5. kill %1
+    Expected Result: OpenAPI schema accessible
+    Evidence: curl output captured
+  ```
+
+  **Commit**: YES
+  - Message: `feat(api): create FastAPI application structure with error handling`
+  - Files: `src/main.py`
+  - Pre-commit: `python -c "from src.main import app; print(app.title)"`
+
+---
+
+- [ ] 9. Health Endpoint Implementation
+
+  **What to do**:
+  - Implement `GET /health` endpoint
+  - Return version from pyproject.toml
+  - Return simple health status
+
+  **Must NOT do**:
+  - Do NOT add complex health checks (no dependencies to check)
+
+  **Recommended Agent Profile**:
+  - **Category**: `quick`
+    - Reason: Simple endpoint
+  - **Skills**: []
+
+  **Parallelization**:
+  - **Can Run In Parallel**: YES
+  - **Parallel Group**: Wave 3 (with Tasks 7, 8)
+  - **Blocks**: Tasks 10, 11
+  - **Blocked By**: Task 8
+
+  **References**:
+  - **User Spec**: Health endpoint response format
+
+  **Acceptance Criteria**:
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: Health endpoint returns correct format
+    Tool: Bash
+    Steps:
+      1. uvicorn src.main:app --port 5002 &
+      2. sleep 2
+      3. curl -s http://localhost:5002/health
+      4. Assert: Response contains {"status": "healthy", "version": "1.0.0"}
+      5. kill %1
+    Expected Result: Health check passes
+    Evidence: curl output captured
+  ```
+
+  **Commit**: YES (groups with Task 8)
+  - Message: `feat(api): add health endpoint`
+  - Files: `src/main.py`
+
+---
+
+### Wave 4: API Endpoints
+
+- [ ] 10. Extract Endpoint Implementation (TDD)
+
+  **What to do**:
+  - Write integration tests for `/extract` endpoint
+  - Implement `POST /extract` endpoint
+  - Accept base64-encoded PDF in JSON body
+  - Use extractor module for extraction
+  - Return structured ExtractResponse
+
+  **Must NOT do**:
+  - Do NOT accept multipart file uploads (JSON only per spec)
+
+  **Recommended Agent Profile**:
+  - **Category**: `unspecified-high`
+    - Reason: Integration of extractor into API endpoint
+  - **Skills**: []
+
+  **Parallelization**:
+  - **Can Run In Parallel**: YES
+  - **Parallel Group**: Wave 4 (with Tasks 11, 12)
+  - **Blocks**: Task 13
+  - **Blocked By**: Tasks 4, 8, 9
+
+  **References**:
+  - **User Spec**: /extract request/response format
+  - **Pattern References**: src/extractor.py
+
+  **Acceptance Criteria**:
+
+  **TDD - Write Tests First:**
+  ```python
+  # tests/test_api.py
+  @pytest.fixture
+  def client():
+      return TestClient(app)
+
+  def test_extract_zugferd_pdf(client, sample_en16931_pdf_base64):
+      response = client.post("/extract", json={"pdf_base64": sample_en16931_pdf_base64})
+      assert response.status_code == 200
+      data = response.json()
+      assert data["is_zugferd"] is True
+      assert data["zugferd_profil"] is not None
+
+  def test_extract_invalid_base64(client):
+      response = client.post("/extract", json={"pdf_base64": "not-valid-base64!!!"})
+      assert response.status_code == 400
+      assert response.json()["error"] == "invalid_base64"
+
+  def test_extract_non_pdf(client):
+      # Base64 of "Hello World" (not a PDF)
+      response = client.post("/extract", json={"pdf_base64": "SGVsbG8gV29ybGQ="})
+      assert response.status_code == 400
+      assert response.json()["error"] == "invalid_pdf"
+  ```
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: Extract endpoint integration test
+    Tool: Bash
+    Steps:
+      1. pytest tests/test_api.py::test_extract -v
+      2. Assert: All extract tests pass
+    Expected Result: Endpoint works correctly
+    Evidence: pytest output captured
+
+  Scenario: Live extract endpoint test
+    Tool: Bash
+    Steps:
+      1. uvicorn src.main:app --port 5003 &
+      2. sleep 2
+      3. PDF_BASE64=$(base64 -w 0 tests/fixtures/sample_en16931.pdf)
+      4. curl -s -X POST http://localhost:5003/extract \
+           -H "Content-Type: application/json" \
+           -d "{\"pdf_base64\": \"$PDF_BASE64\"}" | jq '.is_zugferd'
+      5. Assert: Output is "true"
+      6. kill %1
+    Expected Result: ZUGFeRD detected
+    Evidence: curl output captured
+  ```
+
+  **Commit**: YES
+  - Message: `feat(api): implement /extract endpoint for PDF processing`
+  - Files: `src/main.py`, `tests/test_api.py`
+  - Pre-commit: `pytest tests/test_api.py::test_extract`
+
+---
+
+- [ ] 11. Validate Endpoint Implementation (TDD)
+
+  **What to do**:
+  - Write integration tests for `/validate` endpoint
+  - Implement `POST /validate` endpoint
+  - Accept xml_data, pdf_text, and checks array
+  - Use validator module for validation
+  - Return structured ValidateResponse
+
+  **Must NOT do**:
+  - Do NOT run checks not in the request's checks array
+
+  **Recommended Agent Profile**:
+  - **Category**: `unspecified-high`
+    - Reason: Integration of validator into API endpoint
+  - **Skills**: []
+
+  **Parallelization**:
+  - **Can Run In Parallel**: YES
+  - **Parallel Group**: Wave 4 (with Tasks 10, 12)
+  - **Blocks**: Task 13
+  - **Blocked By**: Tasks 7, 8, 9
+
+  **References**:
+  - **User Spec**: /validate request/response format
+  - **Pattern References**: src/validator.py
+
+  **Acceptance Criteria**:
+
+  **TDD - Write Tests First:**
+  ```python
+  def test_validate_all_checks(client):
+      response = client.post("/validate", json={
+          "xml_data": {...},
+          "pdf_text": "...",
+          "checks": ["pflichtfelder", "betraege", "ustid", "pdf_abgleich"]
+      })
+      assert response.status_code == 200
+      data = response.json()
+      assert "is_valid" in data
+      assert "errors" in data
+      assert "summary" in data
+
+  def test_validate_partial_checks(client):
+      response = client.post("/validate", json={
+          "xml_data": {...},
+          "checks": ["pflichtfelder"]
+      })
+      assert response.status_code == 200
+      # Only pflichtfelder check should run
+  ```
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: Validate endpoint integration test
+    Tool: Bash
+    Steps:
+      1. pytest tests/test_api.py::test_validate -v
+      2. Assert: All validate tests pass
+    Expected Result: Endpoint works correctly
+    Evidence: pytest output captured
+
+  Scenario: Live validate endpoint with invalid data
+    Tool: Bash
+    Steps:
+      1. uvicorn src.main:app --port 5004 &
+      2. sleep 2
+      3. curl -s -X POST http://localhost:5004/validate \
+           -H "Content-Type: application/json" \
+           -d '{"xml_data": {"invoice_number": null}, "checks": ["pflichtfelder"]}' | jq '.is_valid'
+      4. Assert: Output is "false"
+      5. kill %1
+    Expected Result: Validation detects missing field
+    Evidence: curl output captured
+  ```
+
+  **Commit**: YES
+  - Message: `feat(api): implement /validate endpoint for invoice validation`
+  - Files: `src/main.py`, `tests/test_api.py`
+  - Pre-commit: `pytest tests/test_api.py::test_validate`
+
+---
+
+- [ ] 12. Error Handling Middleware
+
+  **What to do**:
+  - Implement exception handlers for all error types
+  - Map exceptions to HTTP status codes and error responses
+  - Ensure all errors return JSON format
+  - Add request logging
+
+  **Must NOT do**:
+  - Do NOT expose stack traces in production
+
+  **Recommended Agent Profile**:
+  - **Category**: `quick`
+    - Reason: Standard FastAPI error handling
+  - **Skills**: []
+
+  **Parallelization**:
+  - **Can Run In Parallel**: YES
+  - **Parallel Group**: Wave 4 (with Tasks 10, 11)
+  - **Blocks**: Task 13
+  - **Blocked By**: Task 8
+
+  **References**:
+  - **User Spec**: Error handling table
+
+  **Error Mapping from Spec**:
+  | Error | Status | error_code |
+  |-------|--------|------------|
+  | Invalid JSON | 400 | invalid_json |
+  | Not a PDF | 400 | invalid_pdf |
+  | PDF corrupt | 400 | corrupt_pdf |
+  | Base64 invalid | 400 | invalid_base64 |
+  | File too large | 400 | file_too_large |
+  | Password protected | 400 | password_protected_pdf |
+  | Internal error | 500 | internal_error |
+
+  **Acceptance Criteria**:
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: Error responses are JSON
+    Tool: Bash
+    Steps:
+      1. uvicorn src.main:app --port 5005 &
+      2. sleep 2
+      3. curl -s -X POST http://localhost:5005/extract \
+           -H "Content-Type: application/json" \
+           -d '{"pdf_base64": "invalid!!!"}' | jq '.error'
+      4. Assert: Output is "invalid_base64"
+      5. kill %1
+    Expected Result: JSON error response
+    Evidence: curl output captured
+  ```
+
+  **Commit**: YES (groups with Task 10, 11)
+  - Message: `feat(api): add comprehensive error handling middleware`
+  - Files: `src/main.py`
+
+---
+
+### Wave 5: Packaging
+
+- [ ] 13. Integration Tests
+
+  **What to do**:
+  - Create end-to-end integration tests
+  - Test full workflow: extract → validate
+  - Test with all sample PDFs
+  - Test error scenarios
+
+  **Must NOT do**:
+  - Do NOT create performance tests
+
+  **Recommended Agent Profile**:
+  - **Category**: `unspecified-high`
+    - Reason: Integration testing requires comprehensive coverage
+  - **Skills**: [`systematic-debugging`]
+
+  **Parallelization**:
+  - **Can Run In Parallel**: NO
+  - **Parallel Group**: Sequential (depends on all endpoints)
+  - **Blocks**: Tasks 14, 16
+  - **Blocked By**: Tasks 10, 11, 12
+
+  **Acceptance Criteria**:
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: Full integration test suite passes
+    Tool: Bash
+    Steps:
+      1. pytest tests/ -v --tb=short
+      2. Assert: All tests pass (exit code 0)
+    Expected Result: 100% test pass rate
+    Evidence: pytest output captured
+  ```
+
+  **Commit**: YES
+  - Message: `test(integration): add end-to-end integration tests`
+  - Files: `tests/test_integration.py`
+  - Pre-commit: `pytest tests/`
+
+---
+
+- [ ] 14. Dockerfile Creation
+
+  **What to do**:
+  - Create multi-stage Dockerfile as per spec
+  - Build stage: install dependencies
+  - Production stage: slim image with app
+  - Non-root user for security
+  - Expose port 5000
+
+  **Must NOT do**:
+  - Do NOT include dev dependencies in production image
+
+  **Recommended Agent Profile**:
+  - **Category**: `quick`
+    - Reason: Standard Dockerfile from spec template
+  - **Skills**: []
+
+  **Parallelization**:
+  - **Can Run In Parallel**: YES
+  - **Parallel Group**: Wave 5 (with Task 15)
+  - **Blocks**: Task 16
+  - **Blocked By**: Task 13
+
+  **References**:
+  - **User Spec**: Complete Dockerfile template provided
+
+  **Acceptance Criteria**:
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: Docker build succeeds
+    Tool: Bash
+    Steps:
+      1. docker build -t zugferd-service:test .
+      2. Assert: Exit code 0
+      3. docker images zugferd-service:test --format "{{.Size}}"
+      4. Assert: Size < 500MB
+    Expected Result: Image builds and is reasonably sized
+    Evidence: Build output captured
+
+  Scenario: Container runs and responds
+    Tool: Bash
+    Steps:
+      1. docker run -d --name zugferd-test -p 5006:5000 zugferd-service:test
+      2. sleep 3
+      3. curl -s http://localhost:5006/health | jq '.status'
+      4. Assert: Output is "healthy"
+      5. docker stop zugferd-test && docker rm zugferd-test
+    Expected Result: Container is functional
+    Evidence: curl output captured
+  ```
+
+  **Commit**: YES
+  - Message: `build(docker): add multi-stage Dockerfile for production`
+  - Files: `Dockerfile`
+  - Pre-commit: `docker build -t zugferd-service:test .`
+
+---
+
+- [ ] 15. Docker Compose Configuration
+
+  **What to do**:
+  - Create docker-compose.yml for local development
+  - Include volume mount for live reload
+  - Configure environment variables
+  - Add health check
+
+  **Must NOT do**:
+  - Do NOT add additional services (no DB, no cache)
+
+  **Recommended Agent Profile**:
+  - **Category**: `quick`
+    - Reason: Simple docker-compose setup
+  - **Skills**: []
+
+  **Parallelization**:
+  - **Can Run In Parallel**: YES
+  - **Parallel Group**: Wave 5 (with Task 14)
+  - **Blocks**: None
+  - **Blocked By**: Task 13
+
+  **Acceptance Criteria**:
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: Docker Compose starts service
+    Tool: Bash
+    Steps:
+      1. docker-compose up -d
+      2. sleep 5
+      3. curl -s http://localhost:5000/health | jq '.status'
+      4. Assert: Output is "healthy"
+      5. docker-compose down
+    Expected Result: Compose setup works
+    Evidence: curl output captured
+  ```
+
+  **Commit**: YES
+  - Message: `build(docker): add docker-compose.yml for local development`
+  - Files: `docker-compose.yml`
+
+---
+
+### Wave 6: Nix Packaging
+
+- [ ] 16. Nix Flake Packaging
+
+  **What to do**:
+  - Create flake.nix with buildPythonApplication
+  - Use pythonRelaxDeps for dependency flexibility
+  - Include devShell for development
+  - Test with nix build and nix run
+
+  **Must NOT do**:
+  - Do NOT create complex overlay structure
+
+  **Recommended Agent Profile**:
+  - **Category**: `unspecified-high`
+    - Reason: Nix packaging requires careful dependency handling
+  - **Skills**: []
+
+  **Parallelization**:
+  - **Can Run In Parallel**: NO
+  - **Parallel Group**: Sequential
+  - **Blocks**: Tasks 17, 18
+  - **Blocked By**: Task 14
+
+  **References**:
+  - **Librarian Research**: mem0 Nix packaging pattern
+  - **External References**: https://github.com/mem0ai/mem0 Nix package
+
+  **flake.nix Structure**:
+  ```nix
+  {
+    description = "ZUGFeRD REST API Service";
+
+    inputs = {
+      nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+      flake-utils.url = "github:numtide/flake-utils";
+    };
+
+    outputs = { self, nixpkgs, flake-utils }:
+      flake-utils.lib.eachDefaultSystem (system:
+        let
+          pkgs = nixpkgs.legacyPackages.${system};
+          pythonPackages = pkgs.python311Packages;
+
+          zugferd-service = pythonPackages.buildPythonApplication {
+            pname = "zugferd-service";
+            version = "1.0.0";
+            pyproject = true;
+            src = ./.;
+
+            pythonRelaxDeps = true;
+
+            build-system = [ pythonPackages.hatchling ];
+
+            dependencies = with pythonPackages; [
+              fastapi
+              uvicorn
+              pydantic
+              python-multipart
+              # factur-x - may need packaging
+              pypdf
+              lxml
+            ];
+
+            nativeCheckInputs = with pythonPackages; [
+              pytestCheckHook
+              pytest-asyncio
+              httpx
+            ];
+
+            passthru = {
+              mainProgram = "zugferd-service";
+            };
+
+            meta = {
+              description = "REST API for ZUGFeRD invoice extraction";
+              license = pkgs.lib.licenses.mit;
+            };
+          };
+        in
+        {
+          packages.default = zugferd-service;
+          packages.zugferd-service = zugferd-service;
+
+          devShells.default = pkgs.mkShell {
+            packages = [
+              (pkgs.python311.withPackages (ps: with ps; [
+                fastapi uvicorn pydantic pypdf lxml
+                pytest pytest-asyncio httpx
+              ]))
+            ];
+          };
+        }
+      );
+  }
+  ```
+
+  **Acceptance Criteria**:
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: Nix flake builds successfully
+    Tool: Bash
+    Steps:
+      1. nix build .#zugferd-service
+      2. Assert: Exit code 0
+      3. ls -la result/bin/
+      4. Assert: zugferd-service binary exists
+    Expected Result: Nix package builds
+    Evidence: Build output captured
+
+  Scenario: Nix package runs correctly
+    Tool: Bash
+    Steps:
+      1. nix run .#zugferd-service &
+      2. sleep 3
+      3. curl -s http://localhost:5000/health | jq '.status'
+      4. Assert: Output is "healthy"
+      5. kill %1
+    Expected Result: Nix-built service runs
+    Evidence: curl output captured
+
+  Scenario: Dev shell provides dependencies
+    Tool: Bash
+    Steps:
+      1. nix develop -c python -c "import fastapi; import pypdf; print('OK')"
+      2. Assert: Output is "OK"
+    Expected Result: Dev shell has all deps
+    Evidence: Command output captured
+  ```
+
+  **Commit**: YES
+  - Message: `build(nix): add flake.nix for Nix packaging`
+  - Files: `flake.nix`
+  - Pre-commit: `nix flake check`
+
+---
+
+- [ ] 17. NixOS Service Module Example
+
+  **What to do**:
+  - Create example NixOS module for deployment
+  - Include service configuration options
+  - Add systemd service definition
+  - Document usage in README
+
+  **Must NOT do**:
+  - Do NOT create production-ready module (example only)
+
+  **Recommended Agent Profile**:
+  - **Category**: `quick`
+    - Reason: Example module following standard patterns
+  - **Skills**: []
+
+  **Parallelization**:
+  - **Can Run In Parallel**: NO
+  - **Parallel Group**: Sequential
+  - **Blocks**: Task 18
+  - **Blocked By**: Task 16
+
+  **References**:
+  - **User Spec**: NixOS container configuration example
+  - **Librarian Research**: NixOS service module pattern
+
+  **Acceptance Criteria**:
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: NixOS module syntax is valid
+    Tool: Bash
+    Steps:
+      1. nix-instantiate --eval -E "import ./nix/module.nix"
+      2. Assert: Exit code 0 or specific Nix evaluation output
+    Expected Result: Module parses correctly
+    Evidence: Nix output captured
+  ```
+
+  **Commit**: YES
+  - Message: `docs(nix): add example NixOS service module`
+  - Files: `nix/module.nix`
+
+---
+
+- [ ] 18. README Documentation
+
+  **What to do**:
+  - Create comprehensive README.md
+  - Include: Overview, Installation, Usage, API Reference
+  - Add examples for Docker, Nix, and direct Python usage
+  - Document all endpoints with curl examples
+  - Include troubleshooting section
+
+  **Must NOT do**:
+  - Do NOT duplicate API spec (reference it)
+
+  **Recommended Agent Profile**:
+  - **Category**: `writing`
+    - Reason: Documentation writing
+  - **Skills**: []
+
+  **Parallelization**:
+  - **Can Run In Parallel**: NO
+  - **Parallel Group**: Sequential (last task)
+  - **Blocks**: None
+  - **Blocked By**: Tasks 16, 17
+
+  **README Structure**:
+  ```markdown
+  # ZUGFeRD-Service
+
+  ## Overview
+  ## Quick Start
+  ### Docker
+  ### Nix
+  ### Python (Development)
+  ## API Reference
+  ### GET /health
+  ### POST /extract
+  ### POST /validate
+  ## Configuration
+  ## NixOS Deployment
+  ## Development
+  ## Troubleshooting
+  ## License
+  ```
+
+  **Acceptance Criteria**:
+
+  **Agent-Executed QA Scenarios:**
+
+  ```
+  Scenario: README contains all required sections
+    Tool: Bash
+    Steps:
+      1. grep -c "## " README.md
+      2. Assert: At least 8 sections
+      3. grep "curl" README.md | wc -l
+      4. Assert: At least 3 curl examples
+    Expected Result: Comprehensive documentation
+    Evidence: grep output captured
+  ```
+
+  **Commit**: YES
+  - Message: `docs: add comprehensive README with installation and usage guide`
+  - Files: `README.md`
+
+---
+
+## Commit Strategy
+
+| After Task | Message | Files | Verification |
+|------------|---------|-------|--------------|
+| 1 | `feat(project): initialize ZUGFeRD service` | pyproject.toml, src/, tests/ | toml parse |
+| 2 | `test(fixtures): add ZUGFeRD sample PDFs` | tests/fixtures/ | file exists |
+| 3 | `feat(models): add Pydantic models` | src/models.py | pytest |
+| 4 | `feat(extractor): implement extraction` | src/extractor.py | pytest |
+| 5 | `feat(pdf-parser): implement PDF parsing` | src/pdf_parser.py | pytest |
+| 6 | `feat(utils): add utilities` | src/utils.py | pytest |
+| 7 | `feat(validator): implement validation` | src/validator.py | pytest |
+| 8-9 | `feat(api): add FastAPI app + health` | src/main.py | curl |
+| 10 | `feat(api): add /extract endpoint` | src/main.py | pytest |
+| 11 | `feat(api): add /validate endpoint` | src/main.py | pytest |
+| 12 | `feat(api): add error handling` | src/main.py | curl |
+| 13 | `test(integration): add e2e tests` | tests/ | pytest |
+| 14 | `build(docker): add Dockerfile` | Dockerfile | docker build |
+| 15 | `build(docker): add docker-compose` | docker-compose.yml | compose up |
+| 16 | `build(nix): add flake.nix` | flake.nix | nix build |
+| 17 | `docs(nix): add NixOS module` | nix/module.nix | nix eval |
+| 18 | `docs: add README` | README.md | grep check |
+
+---
+
+## Success Criteria
+
+### Verification Commands
+```bash
+# All tests pass
+pytest tests/ -v
+# Expected: All tests pass (exit code 0)
+
+# Docker builds and runs
+docker build -t zugferd-service .
+docker run -p 5000:5000 zugferd-service &
+curl http://localhost:5000/health
+# Expected: {"status": "healthy", "version": "1.0.0"}
+
+# Nix builds and runs
+nix build .#zugferd-service
+./result/bin/zugferd-service &
+curl http://localhost:5000/health
+# Expected: {"status": "healthy", "version": "1.0.0"}
+
+# Extract endpoint works
+PDF_BASE64=$(base64 -w 0 tests/fixtures/sample_en16931.pdf)
+curl -X POST http://localhost:5000/extract \
+  -H "Content-Type: application/json" \
+  -d "{\"pdf_base64\": \"$PDF_BASE64\"}" | jq '.is_zugferd'
+# Expected: true
+```
+
+### Final Checklist
+- [ ] All 18 tasks completed
+- [ ] All tests pass (pytest)
+- [ ] Docker image builds (<500MB)
+- [ ] Docker container runs and responds
+- [ ] Nix flake builds without errors
+- [ ] Nix package runs and responds
+- [ ] All endpoints return expected responses
+- [ ] README documents all features
+- [ ] No "Must NOT Have" items present
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..52ee3f2
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,35 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "zugferd-service"
+version = "1.0.0"
+description = "REST API for ZUGFeRD invoice extraction and validation"
+requires-python = ">=3.11"
+dependencies = [
+    "fastapi>=0.109.0",
+    "uvicorn>=0.27.0",
+    "python-multipart>=0.0.6",
+    "factur-x>=2.5",
+    "pypdf>=4.0.0",
+    "pydantic>=2.5.0",
+    "lxml>=5.0.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0.0",
+    "pytest-asyncio>=0.23.0",
+    "httpx>=0.27.0",
+]
+
+[project.scripts]
+zugferd-service = "src.main:run"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src"]
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+testpaths = ["tests"]
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..1dc5bc7
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1,3 @@
+"""ZUGFeRD Service Package."""
+
+__version__ = "1.0.0"
diff --git a/src/extractor.py b/src/extractor.py
new file mode 100644
index 0000000..3add2d2
--- /dev/null
+++ b/src/extractor.py
@@ -0,0 +1,3 @@
+"""ZUGFeRD extraction module."""
+
+pass
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000..c582506
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,20 @@
+"""FastAPI application for ZUGFeRD invoice processing."""
+
+import uvicorn
+from fastapi import FastAPI
+
+app = FastAPI(
+    title="ZUGFeRD Service",
+    version="1.0.0",
+    description="REST API for ZUGFeRD invoice extraction and validation",
+)
+
+
+def run(host: str = "0.0.0.0", port: int = 5000) -> None:
+    """Run the FastAPI application.
+
+    Args:
+        host: Host to bind to.
+        port: Port to listen on.
+    """
+    uvicorn.run(app, host=host, port=port)
diff --git a/src/models.py b/src/models.py
new file mode 100644
index 0000000..1d2ac3b
--- /dev/null
+++ b/src/models.py
@@ -0,0 +1,3 @@
+"""Pydantic models for ZUGFeRD service."""
+
+pass
diff --git a/src/pdf_parser.py b/src/pdf_parser.py
new file mode 100644
index 0000000..a31bccb
--- /dev/null
+++ b/src/pdf_parser.py
@@ -0,0 +1,3 @@
+"""PDF text parsing module."""
+
+pass
diff --git a/src/utils.py b/src/utils.py
new file mode 100644
index 0000000..fd307a9
--- /dev/null
+++ b/src/utils.py
@@ -0,0 +1,3 @@
+"""Utility functions for ZUGFeRD service."""
+
+pass
diff --git a/src/validator.py b/src/validator.py
new file mode 100644
index 0000000..c43fac1
--- /dev/null
+++ b/src/validator.py
@@ -0,0 +1,3 @@
+"""Validation module for ZUGFeRD invoices."""
+
+pass
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..b49798d
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+"""Test suite for ZUGFeRD service."""
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..ebde1d8
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,9 @@
+"""Pytest configuration and shared fixtures."""
+
+import pytest
+
+
+@pytest.fixture
+def sample_pdf_bytes():
+    """Fixture providing sample PDF bytes for testing."""
+    return b"%PDF-1.4\n%fake pdf content for testing\n%%EOF"
diff --git a/tests/fixtures/.gitkeep b/tests/fixtures/.gitkeep
new file mode 100644
index 0000000..e69de29