From 867b47efd083b7cc3da8a47abe71bcf3fa302b13 Mon Sep 17 00:00:00 2001 From: m3tm3re Date: Wed, 4 Feb 2026 20:07:35 +0100 Subject: [PATCH] feat(api): implement /extract and /validate endpoints with error handling Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus --- .../notepads/zugferd-service/learnings.md | 84 ++++++++++++++++ .sisyphus/plans/zugferd-service.md | 6 +- src/main.py | 66 ++++++++++++- src/validator.py | 19 +++- tests/test_api.py | 95 +++++++++++++++++++ 5 files changed, 263 insertions(+), 7 deletions(-) create mode 100644 tests/test_api.py diff --git a/.sisyphus/notepads/zugferd-service/learnings.md b/.sisyphus/notepads/zugferd-service/learnings.md index 835ee81..6b7e54c 100644 --- a/.sisyphus/notepads/zugferd-service/learnings.md +++ b/.sisyphus/notepads/zugferd-service/learnings.md @@ -424,3 +424,87 @@ async def health_check() -> HealthResponse: - Import models in `src/main.py` using `from src.models import ModelName` - Keep all data models centralized for consistency - Exception: models local to a specific module can be defined there + +## [2026-02-04T19:59:00.000Z] Task 11: Validate Endpoint Implementation + +### Implementation +- Added POST /validate endpoint to src/main.py +- Endpoint accepts ValidateRequest (xml_data, pdf_text, checks) +- Returns ValidateResponse wrapping ValidationResult in "result" field +- Delegates to validate_invoice() from src.validator module + +### Key Code Pattern +```python +@app.post("/validate", response_model=ValidateResponse) +async def validate_invoice_endpoint(request: ValidateRequest) -> ValidateResponse: + result = validate_invoice(request) + return ValidateResponse(result=result) +``` + +### Important Fix in Validator +- Updated validate_invoice() to handle empty checks gracefully +- If request.checks is empty, return early with ValidationResult(is_valid=True, ...) +- This prevents ValidationError when xml_data is empty but no checks need to run + +### Testing +- test_validate_pflichtfelder: Tests valid invoice with pflichtfelder check +- test_validate_empty_checks: Tests empty checks list returns 200 +- Both tests pass + +### Validation Response Structure +Response contains nested "result" field: +```json +{ + "result": { + "is_valid": false, + "errors": [...], + "warnings": [...], + "summary": {...}, + "validation_time_ms": 45 + } +} +``` + +### Docstring Justification +- Endpoint docstring provides API documentation for OpenAPI/Swagger +- Describes args (request type) and return (response type) +- Follows existing pattern from health_check endpoint + +## Task 12: HTTPException Handler (2025-02-04) + +### Pattern: Custom FastAPI Exception Handlers +FastAPI's default `HTTPException` returns nested `{"detail": {...}}` format which breaks API spec. + +**Solution**: Add custom exception handler for `HTTPException` that returns flat JSON structure. + +```python +@app.exception_handler(HTTPException) +async def http_exception_handler(request: Request, exc: HTTPException): + if isinstance(exc.detail, dict) and "error" in exc.detail: + return JSONResponse( + status_code=exc.status_code, + content={ + "error": exc.detail.get("error"), + "message": exc.detail.get("message"), + }, + ) + return JSONResponse( + status_code=exc.status_code, + content={ + "error": "http_error", + "message": str(exc.detail), + }, + ) +``` + +**Key Implementation Details**: +1. Handler checks if `exc.detail` is a dict with "error" key +2. If structured error (dict with error/message), extracts to flat format +3. Falls back to generic `{"error": "http_error", "message": str(exc.detail)}` for other cases +4. Preserves original status code from HTTPException + +**Error Format Consistency**: +- All error responses now use flat structure: `{"error": "code", "message": "..."}` +- ExtractionError, HTTPException, and generic Exception handlers all follow this pattern +- Test `test_extract_invalid_base64` expects this flat format + diff --git a/.sisyphus/plans/zugferd-service.md b/.sisyphus/plans/zugferd-service.md index 97165c6..dd7ee00 100644 --- a/.sisyphus/plans/zugferd-service.md +++ b/.sisyphus/plans/zugferd-service.md @@ -1067,7 +1067,7 @@ Critical Path: Task 1 → Task 4 → Task 7 → Task 10 → Task 13 → Task 16 ### Wave 4: API Endpoints -- [ ] 10. Extract Endpoint Implementation (TDD) +- [x] 10. Extract Endpoint Implementation (TDD) **What to do**: - Write integration tests for `/extract` endpoint @@ -1155,7 +1155,7 @@ Critical Path: Task 1 → Task 4 → Task 7 → Task 10 → Task 13 → Task 16 --- -- [ ] 11. Validate Endpoint Implementation (TDD) +- [x] 11. Validate Endpoint Implementation (TDD) **What to do**: - Write integration tests for `/validate` endpoint @@ -1239,7 +1239,7 @@ Critical Path: Task 1 → Task 4 → Task 7 → Task 10 → Task 13 → Task 16 --- -- [ ] 12. Error Handling Middleware +- [x] 12. Error Handling Middleware **What to do**: - Implement exception handlers for all error types diff --git a/src/main.py b/src/main.py index 70082e2..765e186 100644 --- a/src/main.py +++ b/src/main.py @@ -1,5 +1,6 @@ """FastAPI application for ZUGFeRD invoice processing.""" +import base64 import json import logging from datetime import datetime @@ -9,8 +10,15 @@ from fastapi import FastAPI, HTTPException, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse -from src.extractor import ExtractionError -from src.models import HealthResponse +from src.extractor import ExtractionError, extract_zugferd +from src.models import ( + ExtractRequest, + ExtractResponse, + HealthResponse, + ValidateRequest, + ValidateResponse, +) +from src.validator import validate_invoice class JSONFormatter(logging.Formatter): @@ -60,6 +68,25 @@ async def extraction_error_handler(request: Request, exc: ExtractionError): ) +@app.exception_handler(HTTPException) +async def http_exception_handler(request: Request, exc: HTTPException): + if isinstance(exc.detail, dict) and "error" in exc.detail: + return JSONResponse( + status_code=exc.status_code, + content={ + "error": exc.detail.get("error"), + "message": exc.detail.get("message"), + }, + ) + return JSONResponse( + status_code=exc.status_code, + content={ + "error": "http_error", + "message": str(exc.detail), + }, + ) + + @app.exception_handler(Exception) async def generic_error_handler(request: Request, exc: Exception): logger.error(f"Internal error: {exc}") @@ -82,6 +109,41 @@ async def health_check() -> HealthResponse: return HealthResponse(status="healthy", version="1.0.0") +@app.post("/extract", response_model=ExtractResponse) +async def extract_pdf(request: ExtractRequest) -> ExtractResponse: + """Extract ZUGFeRD data from PDF. + + Args: + request: ExtractRequest with pdf_base64 field + + Returns: + ExtractResponse with extraction results + """ + try: + pdf_bytes = base64.b64decode(request.pdf_base64) + except Exception: + raise HTTPException( + status_code=400, + detail={"error": "invalid_base64", "message": "Invalid base64 encoding"}, + ) + + return extract_zugferd(pdf_bytes) + + +@app.post("/validate", response_model=ValidateResponse) +async def validate_invoice_endpoint(request: ValidateRequest) -> ValidateResponse: + """Validate ZUGFeRD invoice data. + + Args: + request: ValidateRequest with xml_data, pdf_text, checks + + Returns: + ValidateResponse with validation results + """ + result = validate_invoice(request) + return ValidateResponse(result=result) + + def run(host: str = "0.0.0.0", port: int = 5000) -> None: """Run the FastAPI application. diff --git a/src/validator.py b/src/validator.py index efd3022..8cb50e2 100644 --- a/src/validator.py +++ b/src/validator.py @@ -257,11 +257,26 @@ def validate_invoice(request: ValidateRequest) -> ValidationResult: all_errors = [] all_warnings = [] - xml_data = XmlData(**request.xml_data) - checks_run = 0 checks_passed = 0 + if not request.checks: + return ValidationResult( + is_valid=True, + errors=[], + warnings=[], + summary={ + "total_checks": 0, + "checks_passed": 0, + "checks_failed": 0, + "critical_errors": 0, + "warnings": 0, + }, + validation_time_ms=0, + ) + + xml_data = XmlData(**request.xml_data) + # Run requested checks for check_name in request.checks: check_errors: list[ErrorDetail] = [] diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..8bca8db --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,95 @@ +import base64 + +import pytest +from fastapi.testclient import TestClient + +from src.main import app + + +@pytest.fixture +def client(): + return TestClient(app) + + +def test_extract_valid_zugferd(client): + with open("tests/fixtures/EN16931_Einfach.pdf", "rb") as f: + pdf_base64 = base64.b64encode(f.read()).decode() + + response = client.post("/extract", json={"pdf_base64": pdf_base64}) + assert response.status_code == 200 + data = response.json() + assert data["is_zugferd"] is True + assert data["zugferd_profil"] == "EN16931" + assert "xml_raw" in data + assert "xml_data" in data + assert "pdf_text" in data + assert "extraction_meta" in data + + +def test_extract_non_zugferd(client): + with open("tests/fixtures/EmptyPDFA1.pdf", "rb") as f: + pdf_base64 = base64.b64encode(f.read()).decode() + + response = client.post("/extract", json={"pdf_base64": pdf_base64}) + assert response.status_code == 200 + data = response.json() + assert data["is_zugferd"] is False + assert data["zugferd_profil"] is None + assert "pdf_text" in data + assert "extraction_meta" in data + + +def test_extract_invalid_base64(client): + response = client.post("/extract", json={"pdf_base64": "invalid!!!"}) + assert response.status_code == 400 + data = response.json() + assert data["error"] == "invalid_base64" + assert "message" in data + + +def test_extract_non_pdf(client): + pdf_base64 = base64.b64encode(b"Hello World").decode() + response = client.post("/extract", json={"pdf_base64": pdf_base64}) + assert response.status_code == 400 + data = response.json() + assert "error" in data + + +def test_validate_pflichtfelder(client): + response = client.post( + "/validate", + json={ + "xml_data": { + "invoice_number": "RE-001", + "invoice_date": "2025-02-04", + "supplier": {"name": "Test GmbH", "vat_id": "DE123456789"}, + "buyer": {"name": "Kunde AG"}, + "totals": { + "net": 100.0, + "gross": 119.0, + "vat_total": 19.0, + "line_total_sum": 100.0, + }, + "line_items": [ + { + "position": 1, + "description": "Test", + "quantity": 1.0, + "unit": "Stück", + "unit_price": 100.0, + "line_total": 100.0, + } + ], + }, + "checks": ["pflichtfelder"], + }, + ) + assert response.status_code == 200 + data = response.json() + assert "result" in data + assert "is_valid" in data["result"] + + +def test_validate_empty_checks(client): + response = client.post("/validate", json={"xml_data": {}, "checks": []}) + assert response.status_code == 200