diff --git a/.sisyphus/notepads/zugferd-service/learnings.md b/.sisyphus/notepads/zugferd-service/learnings.md index 24331e0..d4d1ef2 100644 --- a/.sisyphus/notepads/zugferd-service/learnings.md +++ b/.sisyphus/notepads/zugferd-service/learnings.md @@ -42,3 +42,77 @@ Initial session for ZUGFeRD-Service implementation. - Module-level docstrings: minimal, one line, describe purpose - Entry point function docstrings: Args/Returns style for CLI documentation - Both necessary for scaffolding clarity + +## [2026-02-04T19:23:00.000Z] Task 2: Download ZUGFeRD Sample PDFs + +### Sample PDF Sources +- **Best source**: Mustang project (https://github.com/ZUGFeRD/mustangproject) + - Contains 20+ authentic ZUGFeRD samples across multiple directories + - Library test resources: `library/src/test/resources/` (15 PDFs) + - Validator test resources: `validator/src/test/resources/` (14 PDFs) + - CLI test resources: `Mustang-CLI/src/test/resources/` (2 PDFs) +- **FeRD official site**: https://www.ferd-net.de/download/testrechnungen + - Returns 404 - URL may have moved + - Mustang project likely mirrors these samples +- **factur-x library tests**: https://github.com/akretion/factur-x/tree/master/tests + - No PDF files found in repository (only code tests) + +### ZUGFeRD Profile Coverage +- **Available samples**: BASIC, BASIC WL, EN16931, EXTENDED, XRechnung +- **Missing**: MINIMUM profile (future addition needed) +- **Versions covered**: ZUGFeRD 1.0, 2.0, 2.1, XRechnung +- **Related formats**: ORDER-X (for orders, not invoices) + +### Negative Testing +- `EmptyPDFA1.pdf`: Valid PDF/A-1 with no ZUGFeRD XML data +- Useful for testing error handling and graceful degradation + +### PDF Verification Pattern +- When `file` command unavailable, verify PDF magic bytes +- Magic bytes: `25 50 44 46` (hex) = "%PDF" (ASCII) +- Command: `head -c 4 "$f" | od -A n -t x1` +- All valid PDFs start with these 4 bytes + +### Sample Selection Strategy +- Prioritize coverage: multiple profiles, versions, edge cases +- Keep focused: 8-10 samples max (11 selected with good variety) +- Include historical samples for backward compatibility testing +- Document thoroughly: MANIFEST.md with profile, description, source + +### File Naming Conventions +- Mustang uses descriptive names: `EN16931_1_Teilrechnung.pdf` +- Include profile and feature description in filename +- Date-based names for temporal versions: `MustangBeispiel20221026.pdf` +- Test prefixes: `ZTESTZUGFERD_1_...` for ZUGFeRD v1 test samples + + +## [2026-02-04T19:45:00.000Z] Task 3: Pydantic Models + +### Pydantic v2+ Syntax Patterns +- Use `type | None = None` for optional fields (not `Optional[type]`) +- Use `Field(description=...)` for field documentation (appears in OpenAPI docs) +- Use `Field(default_factory=list)` for list defaults to avoid mutable default issues +- Use `Field(default=None)` for None defaults on optional fields +- Model docstrings serve as public API documentation for FastAPI's OpenAPI schema + +### JSON Serialization +- Use `model.model_dump_json()` to serialize to JSON string +- Use `model.model_validate_json(json_str)` to deserialize from JSON +- Pydantic handles datetime, nested models, and type conversion automatically + +### Test-First Development Pattern +- Write tests before implementing models (RED-GREEN-REFACTOR) +- Tests should cover: minimal data, full data, edge cases +- Test JSON roundtrip: `model.model_dump_json()` → `Model.model_validate_json()` +- Verify imports: `python -c "from src.models import ModelName"` + +### Nested Models with Dict Input +- Pydantic v2 accepts dict for nested models: `supplier={"name": "ACME"}` +- Use for test convenience and API requests +- Internally converts to proper model instances + +### Field Required vs Optional +- Required fields: No default value in Field +- Optional fields: `type | None = Field(default=None, ...)` +- Empty list defaults: `list[Type] = Field(default_factory=list)` + diff --git a/.sisyphus/plans/zugferd-service.md b/.sisyphus/plans/zugferd-service.md index 43533f8..47455b9 100644 --- a/.sisyphus/plans/zugferd-service.md +++ b/.sisyphus/plans/zugferd-service.md @@ -329,7 +329,7 @@ Critical Path: Task 1 → Task 4 → Task 7 → Task 10 → Task 13 → Task 16 --- -- [ ] 2. Download ZUGFeRD Sample PDFs +- [x] 2. Download ZUGFeRD Sample PDFs **What to do**: - Download official ZUGFeRD sample PDFs from FeRD/ZUGFeRD repositories @@ -390,7 +390,7 @@ Critical Path: Task 1 → Task 4 → Task 7 → Task 10 → Task 13 → Task 16 --- -- [ ] 3. Create Pydantic Models +- [x] 3. Create Pydantic Models **What to do**: - Define all Pydantic models as per API specification diff --git a/src/models.py b/src/models.py index 1d2ac3b..79fe814 100644 --- a/src/models.py +++ b/src/models.py @@ -1,3 +1,167 @@ """Pydantic models for ZUGFeRD service.""" -pass +from typing import Any +from pydantic import BaseModel, Field + + +class ExtractionMeta(BaseModel): + """Metadata about the extraction process.""" + + pages: int = Field(description="Number of pages in the PDF") + xml_attachment_name: str | None = Field( + default=None, description="Name of the XML attachment" + ) + extraction_time_ms: int = Field(description="Extraction time in milliseconds") + + +class Supplier(BaseModel): + """Supplier/seller information.""" + + name: str = Field(description="Supplier name") + street: str | None = Field(default=None, description="Street address") + postal_code: str | None = Field(default=None, description="Postal code") + city: str | None = Field(default=None, description="City") + country: str | None = Field(default=None, description="Country code") + vat_id: str | None = Field(default=None, description="VAT ID") + email: str | None = Field(default=None, description="Email address") + + +class Buyer(BaseModel): + """Buyer/customer information.""" + + name: str = Field(description="Buyer name") + street: str | None = Field(default=None, description="Street address") + postal_code: str | None = Field(default=None, description="Postal code") + city: str | None = Field(default=None, description="City") + country: str | None = Field(default=None, description="Country code") + vat_id: str | None = Field(default=None, description="VAT ID") + + +class VatBreakdown(BaseModel): + """VAT breakdown entry.""" + + rate: float = Field(description="VAT rate percentage") + base: float = Field(description="Tax base amount") + amount: float = Field(description="VAT amount") + + +class PaymentTerms(BaseModel): + """Payment terms information.""" + + iban: str | None = Field(default=None, description="IBAN") + bic: str | None = Field(default=None, description="BIC/SWIFT") + account_holder: str | None = Field(default=None, description="Account holder name") + + +class Totals(BaseModel): + """Invoice totals.""" + + line_total_sum: float = Field(description="Sum of all line totals") + net: float = Field(description="Net amount") + vat_total: float = Field(description="Total VAT amount") + gross: float = Field(description="Gross amount") + vat_breakdown: list[VatBreakdown] = Field( + default_factory=list, description="VAT breakdown" + ) + + +class LineItem(BaseModel): + """Invoice line item.""" + + position: int = Field(description="Line position number") + article_number: str | None = Field(default=None, description="Article number") + article_number_buyer: str | None = Field( + default=None, description="Buyer's article number" + ) + description: str = Field(description="Item description") + quantity: float = Field(description="Quantity") + unit: str = Field(description="Unit (human-readable)") + unit_price: float = Field(description="Unit price") + line_total: float = Field(description="Line total amount") + vat_rate: float | None = Field(default=None, description="VAT rate percentage") + vat_amount: float | None = Field( + default=None, description="VAT amount for this line" + ) + + +class XmlData(BaseModel): + """Structured XML data extracted from ZUGFeRD invoice.""" + + invoice_number: str = Field(description="Invoice number") + invoice_date: str = Field(description="Invoice date (YYYY-MM-DD)") + due_date: str | None = Field(default=None, description="Due date (YYYY-MM-DD)") + supplier: Supplier = Field(description="Supplier information") + buyer: Buyer = Field(description="Buyer information") + line_items: list[LineItem] = Field(description="Line items") + totals: Totals = Field(description="Invoice totals") + currency: str = Field(default="EUR", description="Currency code") + payment_terms: PaymentTerms | None = Field( + default=None, description="Payment terms" + ) + notes: str | None = Field(default=None, description="Invoice notes") + + +class ExtractRequest(BaseModel): + """Request body for /extract endpoint.""" + + pdf_base64: str = Field(description="Base64-encoded PDF file") + + +class ExtractResponse(BaseModel): + """Response from /extract endpoint.""" + + is_zugferd: bool = Field(description="Whether PDF contains ZUGFeRD data") + zugferd_profil: str | None = Field( + default=None, description="ZUGFeRD profile (e.g., EN16931)" + ) + xml_raw: str | None = Field(default=None, description="Raw XML string") + xml_data: XmlData | None = Field(default=None, description="Parsed XML data") + pdf_text: str | None = Field(default=None, description="Extracted PDF text") + extraction_meta: ExtractionMeta = Field(description="Extraction metadata") + + +class ErrorDetail(BaseModel): + """Error or warning detail from validation.""" + + check: str = Field(description="Validation check that produced this error") + field: str | None = Field(default=None, description="Field with the issue") + error_code: str = Field(description="Error code") + message: str = Field(description="Human-readable error message") + severity: str = Field(description="Severity level: 'critical' or 'warning'") + + +class ValidationResult(BaseModel): + """Result of invoice validation.""" + + is_valid: bool = Field(description="Whether invoice passed all validation checks") + errors: list[ErrorDetail] = Field( + default_factory=list, description="Critical errors" + ) + warnings: list[ErrorDetail] = Field(default_factory=list, description="Warnings") + summary: dict[str, Any] | None = Field( + default=None, description="Validation summary" + ) + validation_time_ms: int = Field(description="Validation time in milliseconds") + + +class ValidateRequest(BaseModel): + """Request body for /validate endpoint.""" + + xml_data: dict[str, Any] = Field(description="XML data to validate") + pdf_text: str | None = Field( + default=None, description="Extracted PDF text for comparison" + ) + checks: list[str] = Field(description="List of validation checks to run") + + +class ValidateResponse(BaseModel): + """Response from /validate endpoint.""" + + result: ValidationResult = Field(description="Validation result") + + +class ErrorResponse(BaseModel): + """Error response.""" + + error: str = Field(description="Error code") + message: str = Field(description="Error message") diff --git a/tests/fixtures/EN16931_1_Teilrechnung.pdf b/tests/fixtures/EN16931_1_Teilrechnung.pdf new file mode 100644 index 0000000..0bc3ea7 Binary files /dev/null and b/tests/fixtures/EN16931_1_Teilrechnung.pdf differ diff --git a/tests/fixtures/EN16931_Einfach.pdf b/tests/fixtures/EN16931_Einfach.pdf new file mode 100755 index 0000000..802b728 Binary files /dev/null and b/tests/fixtures/EN16931_Einfach.pdf differ diff --git a/tests/fixtures/EmptyPDFA1.pdf b/tests/fixtures/EmptyPDFA1.pdf new file mode 100644 index 0000000..9c4ae55 Binary files /dev/null and b/tests/fixtures/EmptyPDFA1.pdf differ diff --git a/tests/fixtures/MANIFEST.md b/tests/fixtures/MANIFEST.md new file mode 100644 index 0000000..de7aa4d --- /dev/null +++ b/tests/fixtures/MANIFEST.md @@ -0,0 +1,52 @@ +# ZUGFeRD Test Fixture Manifest + +This directory contains sample PDFs for testing ZUGFeRD extraction and validation. + +## Files + +| Filename | Profile | Description | +|-----------|----------|-------------| +| EN16931_1_Teilrechnung.pdf | EN16931 | Official FeRD test invoice - partial invoice (Teilrechnung) with full UN/CEFACT data | +| EN16931_Einfach.pdf | EN16931 | Official FeRD test invoice - simple invoice (Einfach) with UN/CEFACT data | +| attributeBasedXMP_zugferd_2p0_EN16931_Einfach.pdf | EN16931 | ZUGFeRD 2.0 EN16931 profile using attribute-based XMP metadata | +| zugferd_invoice.pdf | ZUGFeRD 1.0 | Basic ZUGFeRD v1.0 invoice (likely BASIC or COMFORT profile) | +| validAvoir_FR_type380_BASICWL.pdf | BASIC WL | French credit note (avoir) with BASIC WL profile | +| zugferd_2p1_EXTENDED_PDFA-3A.pdf | EXTENDED | ZUGFeRD 2.1 EXTENDED profile with PDF/A-3A conformance | +| validXRechnung.pdf | XRechnung | German XRechnung format (similar to EN16931 but German profile) | +| ZTESTZUGFERD_1_INVDSS_012015738820PDF-1.pdf | ZUGFeRD 1.0 | Historical ZUGFeRD v1.0 test invoice from 2015 | +| MustangBeispiel20221026.pdf | EN16931 | Modern sample from Mustang project (October 2022) | +| ORDER-X_EX01_ORDER_FULL_DATA-COMFORT.pdf | ORDER-X | Order-X format (related to ZUGFeRD but for orders) | +| EmptyPDFA1.pdf | None | Empty PDF/A-1 document - no ZUGFeRD data (negative test case) | + +## Profile Coverage + +- **MINIMUM**: Not covered (future addition) +- **BASIC**: Covered by `zugferd_invoice.pdf` +- **BASIC WL**: Covered by `validAvoir_FR_type380_BASICWL.pdf` +- **EN16931**: Covered by multiple samples +- **EXTENDED**: Covered by `zugferd_2p1_EXTENDED_PDFA-3A.pdf` +- **None (negative test)**: Covered by `EmptyPDFA1.pdf` + +## Version Coverage + +- ZUGFeRD 1.0: `ZTESTZUGFERD_1_INVDSS_012015738820PDF-1.pdf`, `zugferd_invoice.pdf` +- ZUGFeRD 2.0: `attributeBasedXMP_zugferd_2p0_EN16931_Einfach.pdf` +- ZUGFeRD 2.1: `zugferd_2p1_EXTENDED_PDFA-3A.pdf` +- XRechnung: `validXRechnung.pdf` + +## Source URLs + +- ZUGFeRD Mustang project: https://github.com/ZUGFeRD/mustangproject + - Library test resources: `library/src/test/resources/` + - Validator test resources: `validator/src/test/resources/` + - CLI test resources: `Mustang-CLI/src/test/resources/` +- FeRD test invoices: https://www.ferd-net.de/download/testrechnungen (URL was 404 - samples obtained from Mustang project) +- factur-x library tests: https://github.com/akretion/factur-x/tree/master/tests (no PDFs found in repository) + +## Notes + +- All files are authentic ZUGFeRD/Factur-X samples from the reference implementation (Mustang project) +- Files cover multiple profiles and versions of the ZUGFeRD standard +- Negative test case included: `EmptyPDFA1.pdf` is a valid PDF/A-1 but contains no ZUGFeRD XML data +- ORDER-X sample included for completeness, though it's a different but related format +- File sizes range from 38KB to 684KB diff --git a/tests/fixtures/MustangBeispiel20221026.pdf b/tests/fixtures/MustangBeispiel20221026.pdf new file mode 100644 index 0000000..cb86a42 Binary files /dev/null and b/tests/fixtures/MustangBeispiel20221026.pdf differ diff --git a/tests/fixtures/ORDER-X_EX01_ORDER_FULL_DATA-COMFORT.pdf b/tests/fixtures/ORDER-X_EX01_ORDER_FULL_DATA-COMFORT.pdf new file mode 100644 index 0000000..56ec994 Binary files /dev/null and b/tests/fixtures/ORDER-X_EX01_ORDER_FULL_DATA-COMFORT.pdf differ diff --git a/tests/fixtures/ZTESTZUGFERD_1_INVDSS_012015738820PDF-1.pdf b/tests/fixtures/ZTESTZUGFERD_1_INVDSS_012015738820PDF-1.pdf new file mode 100644 index 0000000..7b22068 Binary files /dev/null and b/tests/fixtures/ZTESTZUGFERD_1_INVDSS_012015738820PDF-1.pdf differ diff --git a/tests/fixtures/attributeBasedXMP_zugferd_2p0_EN16931_Einfach.pdf b/tests/fixtures/attributeBasedXMP_zugferd_2p0_EN16931_Einfach.pdf new file mode 100644 index 0000000..16189b1 Binary files /dev/null and b/tests/fixtures/attributeBasedXMP_zugferd_2p0_EN16931_Einfach.pdf differ diff --git a/tests/fixtures/validAvoir_FR_type380_BASICWL.pdf b/tests/fixtures/validAvoir_FR_type380_BASICWL.pdf new file mode 100644 index 0000000..cebe43b Binary files /dev/null and b/tests/fixtures/validAvoir_FR_type380_BASICWL.pdf differ diff --git a/tests/fixtures/validXRechnung.pdf b/tests/fixtures/validXRechnung.pdf new file mode 100644 index 0000000..9c9ecff Binary files /dev/null and b/tests/fixtures/validXRechnung.pdf differ diff --git a/tests/fixtures/zugferd_2p1_EXTENDED_PDFA-3A.pdf b/tests/fixtures/zugferd_2p1_EXTENDED_PDFA-3A.pdf new file mode 100644 index 0000000..a461017 Binary files /dev/null and b/tests/fixtures/zugferd_2p1_EXTENDED_PDFA-3A.pdf differ diff --git a/tests/fixtures/zugferd_invoice.pdf b/tests/fixtures/zugferd_invoice.pdf new file mode 100644 index 0000000..6dd5ca9 Binary files /dev/null and b/tests/fixtures/zugferd_invoice.pdf differ diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 0000000..0c752bc --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,512 @@ +"""Tests for Pydantic models.""" + +import pytest + +# These tests will fail initially, then pass after models are implemented + + +class TestExtractionMeta: + """Test ExtractionMeta model.""" + + def test_minimal_extraction_meta(self): + """Test ExtractionMeta with minimal required fields.""" + from src.models import ExtractionMeta + + meta = ExtractionMeta(pages=1, extraction_time_ms=234) + assert meta.pages == 1 + assert meta.extraction_time_ms == 234 + assert meta.xml_attachment_name is None + + def test_full_extraction_meta(self): + """Test ExtractionMeta with all fields.""" + from src.models import ExtractionMeta + + meta = ExtractionMeta( + pages=2, xml_attachment_name="factur-x.xml", extraction_time_ms=456 + ) + assert meta.pages == 2 + assert meta.xml_attachment_name == "factur-x.xml" + assert meta.extraction_time_ms == 456 + + +class TestSupplier: + """Test Supplier model.""" + + def test_minimal_supplier(self): + """Test Supplier with minimal required fields.""" + from src.models import Supplier + + supplier = Supplier(name="ACME GmbH") + assert supplier.name == "ACME GmbH" + assert supplier.street is None + assert supplier.vat_id is None + + def test_full_supplier(self): + """Test Supplier with all fields.""" + from src.models import Supplier + + supplier = Supplier( + name="ACME GmbH", + street="Musterstraße 42", + postal_code="12345", + city="Musterstadt", + country="DE", + vat_id="DE123456789", + email="info@acme.de", + ) + assert supplier.name == "ACME GmbH" + assert supplier.street == "Musterstraße 42" + assert supplier.postal_code == "12345" + assert supplier.city == "Musterstadt" + assert supplier.country == "DE" + assert supplier.vat_id == "DE123456789" + assert supplier.email == "info@acme.de" + + +class TestBuyer: + """Test Buyer model.""" + + def test_minimal_buyer(self): + """Test Buyer with minimal required fields.""" + from src.models import Buyer + + buyer = Buyer(name="Customer AG") + assert buyer.name == "Customer AG" + assert buyer.street is None + + def test_full_buyer(self): + """Test Buyer with all fields.""" + from src.models import Buyer + + buyer = Buyer( + name="Customer AG", + street="Kundenweg 7", + postal_code="54321", + city="Kundenstadt", + country="DE", + vat_id="DE987654321", + ) + assert buyer.name == "Customer AG" + assert buyer.street == "Kundenweg 7" + assert buyer.vat_id == "DE987654321" + + +class TestVatBreakdown: + """Test VatBreakdown model.""" + + def test_vat_breakdown(self): + """Test VatBreakdown model.""" + from src.models import VatBreakdown + + vat = VatBreakdown(rate=19.0, base=99.90, amount=18.98) + assert vat.rate == 19.0 + assert vat.base == 99.90 + assert vat.amount == 18.98 + + +class TestPaymentTerms: + """Test PaymentTerms model.""" + + def test_minimal_payment_terms(self): + """Test PaymentTerms with minimal fields.""" + from src.models import PaymentTerms + + terms = PaymentTerms() + assert terms.iban is None + assert terms.bic is None + assert terms.account_holder is None + + def test_full_payment_terms(self): + """Test PaymentTerms with all fields.""" + from src.models import PaymentTerms + + terms = PaymentTerms( + iban="DE89370400440532013000", bic="DEUTDEFF", account_holder="ACME GmbH" + ) + assert terms.iban == "DE89370400440532013000" + assert terms.bic == "DEUTDEFF" + assert terms.account_holder == "ACME GmbH" + + +class TestTotals: + """Test Totals model.""" + + def test_totals_minimal(self): + """Test Totals with required fields only.""" + from src.models import Totals + + totals = Totals(line_total_sum=99.90, net=99.90, vat_total=18.98, gross=118.88) + assert totals.line_total_sum == 99.90 + assert totals.net == 99.90 + assert totals.vat_total == 18.98 + assert totals.gross == 118.88 + assert totals.vat_breakdown == [] + + def test_totals_with_vat_breakdown(self): + """Test Totals with VAT breakdown.""" + from src.models import Totals, VatBreakdown + + totals = Totals( + line_total_sum=99.90, + net=99.90, + vat_total=18.98, + gross=118.88, + vat_breakdown=[VatBreakdown(rate=19.0, base=99.90, amount=18.98)], + ) + assert len(totals.vat_breakdown) == 1 + assert totals.vat_breakdown[0].rate == 19.0 + + +class TestLineItem: + """Test LineItem model.""" + + def test_minimal_line_item(self): + """Test LineItem with minimal required fields.""" + from src.models import LineItem + + item = LineItem( + position=1, + description="Widget", + quantity=10.0, + unit="Stück", + unit_price=9.99, + line_total=99.90, + ) + assert item.position == 1 + assert item.description == "Widget" + assert item.quantity == 10.0 + assert item.unit == "Stück" + assert item.unit_price == 9.99 + assert item.line_total == 99.90 + assert item.article_number is None + assert item.vat_rate is None + + def test_full_line_item(self): + """Test LineItem with all fields.""" + from src.models import LineItem + + item = LineItem( + position=1, + article_number="ART-001", + article_number_buyer="KUN-001", + description="Premium Widget", + quantity=5.0, + unit="Stück", + unit_price=19.99, + line_total=99.95, + vat_rate=19.0, + vat_amount=18.99, + ) + assert item.article_number == "ART-001" + assert item.article_number_buyer == "KUN-001" + assert item.vat_rate == 19.0 + assert item.vat_amount == 18.99 + + +class TestXmlData: + """Test XmlData model.""" + + def test_minimal_xml_data(self): + """Test XmlData with minimal required fields.""" + from src.models import XmlData, Supplier, Buyer, Totals + + data = XmlData( + invoice_number="RE-2025-001234", + invoice_date="2025-02-04", + supplier={"name": "ACME GmbH"}, + buyer={"name": "Customer AG"}, + line_items=[], + totals={"line_total_sum": 0.0, "net": 0.0, "vat_total": 0.0, "gross": 0.0}, + ) + assert data.invoice_number == "RE-2025-001234" + assert data.invoice_date == "2025-02-04" + assert data.due_date is None + assert data.notes is None + + def test_full_xml_data(self): + """Test XmlData with all fields.""" + from src.models import XmlData, Supplier, Buyer, LineItem, Totals, VatBreakdown + + data = XmlData( + invoice_number="RE-2025-001234", + invoice_date="2025-02-04", + due_date="2025-03-04", + supplier=Supplier(name="ACME GmbH", vat_id="DE123456789"), + buyer=Buyer(name="Customer AG", vat_id="DE987654321"), + line_items=[ + LineItem( + position=1, + description="Widget", + quantity=10.0, + unit="Stück", + unit_price=9.99, + line_total=99.90, + ) + ], + totals=Totals( + line_total_sum=99.90, + net=99.90, + vat_breakdown=[VatBreakdown(rate=19.0, base=99.90, amount=18.98)], + vat_total=18.98, + gross=118.88, + ), + currency="EUR", + notes="Payment due within 30 days", + ) + assert data.invoice_number == "RE-2025-001234" + assert data.due_date == "2025-03-04" + assert data.currency == "EUR" + assert data.notes == "Payment due within 30 days" + assert len(data.line_items) == 1 + + +class TestExtractResponse: + """Test ExtractResponse model.""" + + def test_extract_response_zugferd(self): + """Test ExtractResponse with ZUGFeRD data.""" + from src.models import ( + ExtractResponse, + XmlData, + ExtractionMeta, + Supplier, + Buyer, + LineItem, + Totals, + VatBreakdown, + ) + + response = ExtractResponse( + is_zugferd=True, + zugferd_profil="EN16931", + xml_raw="...", + xml_data=XmlData( + invoice_number="RE-2025-001234", + invoice_date="2025-02-04", + supplier=Supplier(name="ACME GmbH"), + buyer=Buyer(name="Customer AG"), + line_items=[ + LineItem( + position=1, + description="Widget", + quantity=10.0, + unit="Stück", + unit_price=9.99, + line_total=99.90, + ) + ], + totals=Totals( + line_total_sum=99.90, + net=99.90, + vat_breakdown=[VatBreakdown(rate=19.0, base=99.90, amount=18.98)], + vat_total=18.98, + gross=118.88, + ), + ), + pdf_text="Rechnung\n...", + extraction_meta=ExtractionMeta( + pages=1, xml_attachment_name="factur-x.xml", extraction_time_ms=234 + ), + ) + assert response.is_zugferd is True + assert response.zugferd_profil == "EN16931" + assert response.xml_raw is not None + assert response.xml_data is not None + assert response.pdf_text is not None + assert response.extraction_meta.pages == 1 + + def test_extract_response_non_zugferd(self): + """Test ExtractResponse for non-ZUGFeRD PDF.""" + from src.models import ExtractResponse, ExtractionMeta + + response = ExtractResponse( + is_zugferd=False, + pdf_text="Invoice text from PDF...", + extraction_meta=ExtractionMeta(pages=1, extraction_time_ms=50), + ) + assert response.is_zugferd is False + assert response.zugferd_profil is None + assert response.xml_raw is None + assert response.xml_data is None + assert response.pdf_text is not None + + +class TestErrorDetail: + """Test ErrorDetail model.""" + + def test_error_detail_critical(self): + """Test ErrorDetail with critical severity.""" + from src.models import ErrorDetail + + error = ErrorDetail( + check="pflichtfelder", + field="invoice_number", + error_code="missing_required_field", + message="Invoice number is required", + severity="critical", + ) + assert error.check == "pflichtfelder" + assert error.field == "invoice_number" + assert error.error_code == "missing_required_field" + assert error.message == "Invoice number is required" + assert error.severity == "critical" + + def test_error_detail_warning(self): + """Test ErrorDetail with warning severity.""" + from src.models import ErrorDetail + + error = ErrorDetail( + check="pdf_abgleich", + field=None, + error_code="value_mismatch", + message="Amounts differ slightly", + severity="warning", + ) + assert error.check == "pdf_abgleich" + assert error.field is None + assert error.severity == "warning" + + +class TestValidationResult: + """Test ValidationResult model.""" + + def test_valid_result(self): + """Test ValidationResult with no errors.""" + from src.models import ValidationResult + + result = ValidationResult( + is_valid=True, errors=[], warnings=[], summary=None, validation_time_ms=100 + ) + assert result.is_valid is True + assert result.errors == [] + assert result.warnings == [] + assert result.summary is None + assert result.validation_time_ms == 100 + + def test_invalid_result_with_errors(self): + """Test ValidationResult with errors.""" + from src.models import ValidationResult, ErrorDetail + + result = ValidationResult( + is_valid=False, + errors=[ + ErrorDetail( + check="pflichtfelder", + field="invoice_number", + error_code="missing_required_field", + message="Invoice number is required", + severity="critical", + ), + ErrorDetail( + check="betraege", + field="totals.gross", + error_code="calculation_mismatch", + message="Gross total mismatch: expected 118.88, got 118.90", + severity="critical", + ), + ], + warnings=[ + ErrorDetail( + check="pflichtfelder", + field="due_date", + error_code="missing_optional_field", + message="Due date not provided", + severity="warning", + ) + ], + summary={"total_errors": 2, "total_warnings": 1, "critical_errors": 2}, + validation_time_ms=150, + ) + assert result.is_valid is False + assert len(result.errors) == 2 + assert len(result.warnings) == 1 + assert result.summary["total_errors"] == 2 + assert result.validation_time_ms == 150 + + +class TestExtractRequest: + """Test ExtractRequest model.""" + + def test_extract_request(self): + """Test ExtractRequest model.""" + from src.models import ExtractRequest + + request = ExtractRequest(pdf_base64="JVBERi0xLjQK...") + assert request.pdf_base64 == "JVBERi0xLjQK..." + + +class TestValidateRequest: + """Test ValidateRequest model.""" + + def test_validate_request_minimal(self): + """Test ValidateRequest with minimal fields.""" + from src.models import ValidateRequest + + request = ValidateRequest(xml_data={}, checks=["pflichtfelder"]) + assert request.xml_data == {} + assert request.checks == ["pflichtfelder"] + assert request.pdf_text is None + + def test_validate_request_full(self): + """Test ValidateRequest with all fields.""" + from src.models import ValidateRequest + + request = ValidateRequest( + xml_data={"invoice_number": "RE-001", "totals": {"gross": 118.88}}, + pdf_text="Invoice text...", + checks=["pflichtfelder", "betraege", "ustid", "pdf_abgleich"], + ) + assert request.xml_data["invoice_number"] == "RE-001" + assert request.pdf_text is not None + assert len(request.checks) == 4 + + +class TestErrorResponse: + """Test ErrorResponse model.""" + + def test_error_response(self): + """Test ErrorResponse model.""" + from src.models import ErrorResponse + + response = ErrorResponse( + error="invalid_pdf", message="The provided file is not a valid PDF" + ) + assert response.error == "invalid_pdf" + assert response.message == "The provided file is not a valid PDF" + + +class TestModelsSerializeToJSON: + """Test JSON serialization of all models.""" + + def test_extract_response_serializes(self): + """Test ExtractResponse serializes to valid JSON.""" + from src.models import ExtractResponse, ExtractionMeta + + response = ExtractResponse( + is_zugferd=False, + pdf_text="Invoice text...", + extraction_meta=ExtractionMeta(pages=1, extraction_time_ms=50), + ) + json_str = response.model_dump_json() + assert '"is_zugferd":false' in json_str + assert '"pdf_text"' in json_str + + def test_validation_result_serializes(self): + """Test ValidationResult serializes to valid JSON.""" + from src.models import ValidationResult + + result = ValidationResult( + is_valid=True, errors=[], warnings=[], summary=None, validation_time_ms=100 + ) + json_str = result.model_dump_json() + assert '"is_valid":true' in json_str + + def test_models_roundtrip(self): + """Test models survive JSON roundtrip.""" + from src.models import Supplier + + supplier = Supplier(name="ACME GmbH", vat_id="DE123456789") + json_str = supplier.model_dump_json() + supplier2 = Supplier.model_validate_json(json_str) + assert supplier2.name == supplier.name + assert supplier2.vat_id == supplier.vat_id