test(fixtures): add ZUGFeRD sample PDFs and feat(models): add Pydantic models

- Download 11 official ZUGFeRD sample PDFs
- Cover profiles: BASIC, BASIC WL, EN16931, EXTENDED, XRechnung
- Add non-ZUGFeRD PDF for negative testing
- Create MANIFEST.md documenting all samples
- Implement all Pydantic models from spec
- Add 28 TDD tests for models
- All tests pass
This commit is contained in:
m3tm3re
2026-02-04 19:26:01 +01:00
parent 0db2482bf2
commit 29bd8453ec
16 changed files with 805 additions and 3 deletions

View File

@@ -1,3 +1,167 @@
"""Pydantic models for ZUGFeRD service."""
pass
from typing import Any
from pydantic import BaseModel, Field
class ExtractionMeta(BaseModel):
"""Metadata about the extraction process."""
pages: int = Field(description="Number of pages in the PDF")
xml_attachment_name: str | None = Field(
default=None, description="Name of the XML attachment"
)
extraction_time_ms: int = Field(description="Extraction time in milliseconds")
class Supplier(BaseModel):
"""Supplier/seller information."""
name: str = Field(description="Supplier name")
street: str | None = Field(default=None, description="Street address")
postal_code: str | None = Field(default=None, description="Postal code")
city: str | None = Field(default=None, description="City")
country: str | None = Field(default=None, description="Country code")
vat_id: str | None = Field(default=None, description="VAT ID")
email: str | None = Field(default=None, description="Email address")
class Buyer(BaseModel):
"""Buyer/customer information."""
name: str = Field(description="Buyer name")
street: str | None = Field(default=None, description="Street address")
postal_code: str | None = Field(default=None, description="Postal code")
city: str | None = Field(default=None, description="City")
country: str | None = Field(default=None, description="Country code")
vat_id: str | None = Field(default=None, description="VAT ID")
class VatBreakdown(BaseModel):
"""VAT breakdown entry."""
rate: float = Field(description="VAT rate percentage")
base: float = Field(description="Tax base amount")
amount: float = Field(description="VAT amount")
class PaymentTerms(BaseModel):
"""Payment terms information."""
iban: str | None = Field(default=None, description="IBAN")
bic: str | None = Field(default=None, description="BIC/SWIFT")
account_holder: str | None = Field(default=None, description="Account holder name")
class Totals(BaseModel):
"""Invoice totals."""
line_total_sum: float = Field(description="Sum of all line totals")
net: float = Field(description="Net amount")
vat_total: float = Field(description="Total VAT amount")
gross: float = Field(description="Gross amount")
vat_breakdown: list[VatBreakdown] = Field(
default_factory=list, description="VAT breakdown"
)
class LineItem(BaseModel):
"""Invoice line item."""
position: int = Field(description="Line position number")
article_number: str | None = Field(default=None, description="Article number")
article_number_buyer: str | None = Field(
default=None, description="Buyer's article number"
)
description: str = Field(description="Item description")
quantity: float = Field(description="Quantity")
unit: str = Field(description="Unit (human-readable)")
unit_price: float = Field(description="Unit price")
line_total: float = Field(description="Line total amount")
vat_rate: float | None = Field(default=None, description="VAT rate percentage")
vat_amount: float | None = Field(
default=None, description="VAT amount for this line"
)
class XmlData(BaseModel):
"""Structured XML data extracted from ZUGFeRD invoice."""
invoice_number: str = Field(description="Invoice number")
invoice_date: str = Field(description="Invoice date (YYYY-MM-DD)")
due_date: str | None = Field(default=None, description="Due date (YYYY-MM-DD)")
supplier: Supplier = Field(description="Supplier information")
buyer: Buyer = Field(description="Buyer information")
line_items: list[LineItem] = Field(description="Line items")
totals: Totals = Field(description="Invoice totals")
currency: str = Field(default="EUR", description="Currency code")
payment_terms: PaymentTerms | None = Field(
default=None, description="Payment terms"
)
notes: str | None = Field(default=None, description="Invoice notes")
class ExtractRequest(BaseModel):
"""Request body for /extract endpoint."""
pdf_base64: str = Field(description="Base64-encoded PDF file")
class ExtractResponse(BaseModel):
"""Response from /extract endpoint."""
is_zugferd: bool = Field(description="Whether PDF contains ZUGFeRD data")
zugferd_profil: str | None = Field(
default=None, description="ZUGFeRD profile (e.g., EN16931)"
)
xml_raw: str | None = Field(default=None, description="Raw XML string")
xml_data: XmlData | None = Field(default=None, description="Parsed XML data")
pdf_text: str | None = Field(default=None, description="Extracted PDF text")
extraction_meta: ExtractionMeta = Field(description="Extraction metadata")
class ErrorDetail(BaseModel):
"""Error or warning detail from validation."""
check: str = Field(description="Validation check that produced this error")
field: str | None = Field(default=None, description="Field with the issue")
error_code: str = Field(description="Error code")
message: str = Field(description="Human-readable error message")
severity: str = Field(description="Severity level: 'critical' or 'warning'")
class ValidationResult(BaseModel):
"""Result of invoice validation."""
is_valid: bool = Field(description="Whether invoice passed all validation checks")
errors: list[ErrorDetail] = Field(
default_factory=list, description="Critical errors"
)
warnings: list[ErrorDetail] = Field(default_factory=list, description="Warnings")
summary: dict[str, Any] | None = Field(
default=None, description="Validation summary"
)
validation_time_ms: int = Field(description="Validation time in milliseconds")
class ValidateRequest(BaseModel):
"""Request body for /validate endpoint."""
xml_data: dict[str, Any] = Field(description="XML data to validate")
pdf_text: str | None = Field(
default=None, description="Extracted PDF text for comparison"
)
checks: list[str] = Field(description="List of validation checks to run")
class ValidateResponse(BaseModel):
"""Response from /validate endpoint."""
result: ValidationResult = Field(description="Validation result")
class ErrorResponse(BaseModel):
"""Error response."""
error: str = Field(description="Error code")
message: str = Field(description="Error message")