From 867b47efd083b7cc3da8a47abe71bcf3fa302b13 Mon Sep 17 00:00:00 2001
From: m3tm3re
Date: Wed, 4 Feb 2026 20:07:35 +0100
Subject: [PATCH] feat(api): implement /extract and /validate endpoints with
error handling
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)
Co-authored-by: Sisyphus
---
.../notepads/zugferd-service/learnings.md | 84 ++++++++++++++++
.sisyphus/plans/zugferd-service.md | 6 +-
src/main.py | 66 ++++++++++++-
src/validator.py | 19 +++-
tests/test_api.py | 95 +++++++++++++++++++
5 files changed, 263 insertions(+), 7 deletions(-)
create mode 100644 tests/test_api.py
diff --git a/.sisyphus/notepads/zugferd-service/learnings.md b/.sisyphus/notepads/zugferd-service/learnings.md
index 835ee81..6b7e54c 100644
--- a/.sisyphus/notepads/zugferd-service/learnings.md
+++ b/.sisyphus/notepads/zugferd-service/learnings.md
@@ -424,3 +424,87 @@ async def health_check() -> HealthResponse:
- Import models in `src/main.py` using `from src.models import ModelName`
- Keep all data models centralized for consistency
- Exception: models local to a specific module can be defined there
+
+## [2026-02-04T19:59:00.000Z] Task 11: Validate Endpoint Implementation
+
+### Implementation
+- Added POST /validate endpoint to src/main.py
+- Endpoint accepts ValidateRequest (xml_data, pdf_text, checks)
+- Returns ValidateResponse wrapping ValidationResult in "result" field
+- Delegates to validate_invoice() from src.validator module
+
+### Key Code Pattern
+```python
+@app.post("/validate", response_model=ValidateResponse)
+async def validate_invoice_endpoint(request: ValidateRequest) -> ValidateResponse:
+ result = validate_invoice(request)
+ return ValidateResponse(result=result)
+```
+
+### Important Fix in Validator
+- Updated validate_invoice() to handle empty checks gracefully
+- If request.checks is empty, return early with ValidationResult(is_valid=True, ...)
+- This prevents ValidationError when xml_data is empty but no checks need to run
+
+### Testing
+- test_validate_pflichtfelder: Tests valid invoice with pflichtfelder check
+- test_validate_empty_checks: Tests empty checks list returns 200
+- Both tests pass
+
+### Validation Response Structure
+Response contains nested "result" field:
+```json
+{
+ "result": {
+ "is_valid": false,
+ "errors": [...],
+ "warnings": [...],
+ "summary": {...},
+ "validation_time_ms": 45
+ }
+}
+```
+
+### Docstring Justification
+- Endpoint docstring provides API documentation for OpenAPI/Swagger
+- Describes args (request type) and return (response type)
+- Follows existing pattern from health_check endpoint
+
+## Task 12: HTTPException Handler (2025-02-04)
+
+### Pattern: Custom FastAPI Exception Handlers
+FastAPI's default `HTTPException` returns nested `{"detail": {...}}` format which breaks API spec.
+
+**Solution**: Add custom exception handler for `HTTPException` that returns flat JSON structure.
+
+```python
+@app.exception_handler(HTTPException)
+async def http_exception_handler(request: Request, exc: HTTPException):
+ if isinstance(exc.detail, dict) and "error" in exc.detail:
+ return JSONResponse(
+ status_code=exc.status_code,
+ content={
+ "error": exc.detail.get("error"),
+ "message": exc.detail.get("message"),
+ },
+ )
+ return JSONResponse(
+ status_code=exc.status_code,
+ content={
+ "error": "http_error",
+ "message": str(exc.detail),
+ },
+ )
+```
+
+**Key Implementation Details**:
+1. Handler checks if `exc.detail` is a dict with "error" key
+2. If structured error (dict with error/message), extracts to flat format
+3. Falls back to generic `{"error": "http_error", "message": str(exc.detail)}` for other cases
+4. Preserves original status code from HTTPException
+
+**Error Format Consistency**:
+- All error responses now use flat structure: `{"error": "code", "message": "..."}`
+- ExtractionError, HTTPException, and generic Exception handlers all follow this pattern
+- Test `test_extract_invalid_base64` expects this flat format
+
diff --git a/.sisyphus/plans/zugferd-service.md b/.sisyphus/plans/zugferd-service.md
index 97165c6..dd7ee00 100644
--- a/.sisyphus/plans/zugferd-service.md
+++ b/.sisyphus/plans/zugferd-service.md
@@ -1067,7 +1067,7 @@ Critical Path: Task 1 → Task 4 → Task 7 → Task 10 → Task 13 → Task 16
### Wave 4: API Endpoints
-- [ ] 10. Extract Endpoint Implementation (TDD)
+- [x] 10. Extract Endpoint Implementation (TDD)
**What to do**:
- Write integration tests for `/extract` endpoint
@@ -1155,7 +1155,7 @@ Critical Path: Task 1 → Task 4 → Task 7 → Task 10 → Task 13 → Task 16
---
-- [ ] 11. Validate Endpoint Implementation (TDD)
+- [x] 11. Validate Endpoint Implementation (TDD)
**What to do**:
- Write integration tests for `/validate` endpoint
@@ -1239,7 +1239,7 @@ Critical Path: Task 1 → Task 4 → Task 7 → Task 10 → Task 13 → Task 16
---
-- [ ] 12. Error Handling Middleware
+- [x] 12. Error Handling Middleware
**What to do**:
- Implement exception handlers for all error types
diff --git a/src/main.py b/src/main.py
index 70082e2..765e186 100644
--- a/src/main.py
+++ b/src/main.py
@@ -1,5 +1,6 @@
"""FastAPI application for ZUGFeRD invoice processing."""
+import base64
import json
import logging
from datetime import datetime
@@ -9,8 +10,15 @@ from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
-from src.extractor import ExtractionError
-from src.models import HealthResponse
+from src.extractor import ExtractionError, extract_zugferd
+from src.models import (
+ ExtractRequest,
+ ExtractResponse,
+ HealthResponse,
+ ValidateRequest,
+ ValidateResponse,
+)
+from src.validator import validate_invoice
class JSONFormatter(logging.Formatter):
@@ -60,6 +68,25 @@ async def extraction_error_handler(request: Request, exc: ExtractionError):
)
+@app.exception_handler(HTTPException)
+async def http_exception_handler(request: Request, exc: HTTPException):
+ if isinstance(exc.detail, dict) and "error" in exc.detail:
+ return JSONResponse(
+ status_code=exc.status_code,
+ content={
+ "error": exc.detail.get("error"),
+ "message": exc.detail.get("message"),
+ },
+ )
+ return JSONResponse(
+ status_code=exc.status_code,
+ content={
+ "error": "http_error",
+ "message": str(exc.detail),
+ },
+ )
+
+
@app.exception_handler(Exception)
async def generic_error_handler(request: Request, exc: Exception):
logger.error(f"Internal error: {exc}")
@@ -82,6 +109,41 @@ async def health_check() -> HealthResponse:
return HealthResponse(status="healthy", version="1.0.0")
+@app.post("/extract", response_model=ExtractResponse)
+async def extract_pdf(request: ExtractRequest) -> ExtractResponse:
+ """Extract ZUGFeRD data from PDF.
+
+ Args:
+ request: ExtractRequest with pdf_base64 field
+
+ Returns:
+ ExtractResponse with extraction results
+ """
+ try:
+ pdf_bytes = base64.b64decode(request.pdf_base64)
+ except Exception:
+ raise HTTPException(
+ status_code=400,
+ detail={"error": "invalid_base64", "message": "Invalid base64 encoding"},
+ )
+
+ return extract_zugferd(pdf_bytes)
+
+
+@app.post("/validate", response_model=ValidateResponse)
+async def validate_invoice_endpoint(request: ValidateRequest) -> ValidateResponse:
+ """Validate ZUGFeRD invoice data.
+
+ Args:
+ request: ValidateRequest with xml_data, pdf_text, checks
+
+ Returns:
+ ValidateResponse with validation results
+ """
+ result = validate_invoice(request)
+ return ValidateResponse(result=result)
+
+
def run(host: str = "0.0.0.0", port: int = 5000) -> None:
"""Run the FastAPI application.
diff --git a/src/validator.py b/src/validator.py
index efd3022..8cb50e2 100644
--- a/src/validator.py
+++ b/src/validator.py
@@ -257,11 +257,26 @@ def validate_invoice(request: ValidateRequest) -> ValidationResult:
all_errors = []
all_warnings = []
- xml_data = XmlData(**request.xml_data)
-
checks_run = 0
checks_passed = 0
+ if not request.checks:
+ return ValidationResult(
+ is_valid=True,
+ errors=[],
+ warnings=[],
+ summary={
+ "total_checks": 0,
+ "checks_passed": 0,
+ "checks_failed": 0,
+ "critical_errors": 0,
+ "warnings": 0,
+ },
+ validation_time_ms=0,
+ )
+
+ xml_data = XmlData(**request.xml_data)
+
# Run requested checks
for check_name in request.checks:
check_errors: list[ErrorDetail] = []
diff --git a/tests/test_api.py b/tests/test_api.py
new file mode 100644
index 0000000..8bca8db
--- /dev/null
+++ b/tests/test_api.py
@@ -0,0 +1,95 @@
+import base64
+
+import pytest
+from fastapi.testclient import TestClient
+
+from src.main import app
+
+
+@pytest.fixture
+def client():
+ return TestClient(app)
+
+
+def test_extract_valid_zugferd(client):
+ with open("tests/fixtures/EN16931_Einfach.pdf", "rb") as f:
+ pdf_base64 = base64.b64encode(f.read()).decode()
+
+ response = client.post("/extract", json={"pdf_base64": pdf_base64})
+ assert response.status_code == 200
+ data = response.json()
+ assert data["is_zugferd"] is True
+ assert data["zugferd_profil"] == "EN16931"
+ assert "xml_raw" in data
+ assert "xml_data" in data
+ assert "pdf_text" in data
+ assert "extraction_meta" in data
+
+
+def test_extract_non_zugferd(client):
+ with open("tests/fixtures/EmptyPDFA1.pdf", "rb") as f:
+ pdf_base64 = base64.b64encode(f.read()).decode()
+
+ response = client.post("/extract", json={"pdf_base64": pdf_base64})
+ assert response.status_code == 200
+ data = response.json()
+ assert data["is_zugferd"] is False
+ assert data["zugferd_profil"] is None
+ assert "pdf_text" in data
+ assert "extraction_meta" in data
+
+
+def test_extract_invalid_base64(client):
+ response = client.post("/extract", json={"pdf_base64": "invalid!!!"})
+ assert response.status_code == 400
+ data = response.json()
+ assert data["error"] == "invalid_base64"
+ assert "message" in data
+
+
+def test_extract_non_pdf(client):
+ pdf_base64 = base64.b64encode(b"Hello World").decode()
+ response = client.post("/extract", json={"pdf_base64": pdf_base64})
+ assert response.status_code == 400
+ data = response.json()
+ assert "error" in data
+
+
+def test_validate_pflichtfelder(client):
+ response = client.post(
+ "/validate",
+ json={
+ "xml_data": {
+ "invoice_number": "RE-001",
+ "invoice_date": "2025-02-04",
+ "supplier": {"name": "Test GmbH", "vat_id": "DE123456789"},
+ "buyer": {"name": "Kunde AG"},
+ "totals": {
+ "net": 100.0,
+ "gross": 119.0,
+ "vat_total": 19.0,
+ "line_total_sum": 100.0,
+ },
+ "line_items": [
+ {
+ "position": 1,
+ "description": "Test",
+ "quantity": 1.0,
+ "unit": "Stück",
+ "unit_price": 100.0,
+ "line_total": 100.0,
+ }
+ ],
+ },
+ "checks": ["pflichtfelder"],
+ },
+ )
+ assert response.status_code == 200
+ data = response.json()
+ assert "result" in data
+ assert "is_valid" in data["result"]
+
+
+def test_validate_empty_checks(client):
+ response = client.post("/validate", json={"xml_data": {}, "checks": []})
+ assert response.status_code == 200