Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
234 lines
7.8 KiB
Python
234 lines
7.8 KiB
Python
"""Integration tests for full ZUGFeRD workflow: extract → validate."""
|
|
|
|
import base64
|
|
|
|
import pytest
|
|
from fastapi.testclient import TestClient
|
|
|
|
from src.main import app
|
|
|
|
|
|
@pytest.fixture
|
|
def client():
|
|
"""Create TestClient fixture for FastAPI app."""
|
|
return TestClient(app)
|
|
|
|
|
|
def read_pdf_as_base64(filepath: str) -> str:
|
|
"""Helper function to read a PDF file and encode as base64.
|
|
|
|
Args:
|
|
filepath: Path to the PDF file.
|
|
|
|
Returns:
|
|
Base64-encoded PDF content as string.
|
|
"""
|
|
with open(filepath, "rb") as f:
|
|
return base64.b64encode(f.read()).decode()
|
|
|
|
|
|
def test_integration_en16931_full_workflow(client):
|
|
"""Test full workflow: extract → validate with EN16931 invoice."""
|
|
pdf_base64 = read_pdf_as_base64("tests/fixtures/EN16931_Einfach.pdf")
|
|
extract_response = client.post("/extract", json={"pdf_base64": pdf_base64})
|
|
|
|
assert extract_response.status_code == 200
|
|
extract_data = extract_response.json()
|
|
assert extract_data["is_zugferd"] is True
|
|
assert extract_data["zugferd_profil"] == "EN16931"
|
|
assert "xml_data" in extract_data
|
|
assert "pdf_text" in extract_data
|
|
|
|
validate_response = client.post(
|
|
"/validate",
|
|
json={
|
|
"xml_data": extract_data["xml_data"],
|
|
"pdf_text": extract_data["pdf_text"],
|
|
"checks": ["pflichtfelder", "betraege", "ustid", "pdf_abgleich"],
|
|
},
|
|
)
|
|
|
|
assert validate_response.status_code == 200
|
|
validate_data = validate_response.json()
|
|
assert "result" in validate_data
|
|
assert "is_valid" in validate_data["result"]
|
|
|
|
|
|
def test_integration_basic_wl_full_workflow(client):
|
|
"""Test full workflow: extract → validate with BASIC WL invoice."""
|
|
pdf_base64 = read_pdf_as_base64("tests/fixtures/validAvoir_FR_type380_BASICWL.pdf")
|
|
extract_response = client.post("/extract", json={"pdf_base64": pdf_base64})
|
|
|
|
assert extract_response.status_code == 200
|
|
extract_data = extract_response.json()
|
|
assert extract_data["is_zugferd"] is True
|
|
assert "xml_data" in extract_data
|
|
|
|
validate_response = client.post(
|
|
"/validate",
|
|
json={
|
|
"xml_data": extract_data["xml_data"],
|
|
"pdf_text": extract_data["pdf_text"],
|
|
"checks": ["pflichtfelder"],
|
|
},
|
|
)
|
|
|
|
assert validate_response.status_code == 200
|
|
validate_data = validate_response.json()
|
|
assert "result" in validate_data
|
|
|
|
|
|
def test_integration_extended_profile_full_workflow(client):
|
|
"""Test full workflow: extract → validate with EXTENDED profile."""
|
|
pdf_base64 = read_pdf_as_base64("tests/fixtures/zugferd_2p1_EXTENDED_PDFA-3A.pdf")
|
|
extract_response = client.post("/extract", json={"pdf_base64": pdf_base64})
|
|
|
|
assert extract_response.status_code == 200
|
|
extract_data = extract_response.json()
|
|
assert extract_data["is_zugferd"] is True
|
|
assert "xml_data" in extract_data
|
|
|
|
validate_response = client.post(
|
|
"/validate",
|
|
json={
|
|
"xml_data": extract_data["xml_data"],
|
|
"pdf_text": extract_data["pdf_text"],
|
|
"checks": ["pflichtfelder", "betraege"],
|
|
},
|
|
)
|
|
|
|
assert validate_response.status_code == 200
|
|
validate_data = validate_response.json()
|
|
assert "result" in validate_data
|
|
|
|
|
|
def test_integration_invalid_base64_error(client):
|
|
"""Test error scenario: invalid base64 in extract request."""
|
|
extract_response = client.post(
|
|
"/extract", json={"pdf_base64": "not_valid_base64!!!"}
|
|
)
|
|
|
|
assert extract_response.status_code == 400
|
|
extract_data = extract_response.json()
|
|
assert extract_data["error"] == "invalid_base64"
|
|
assert "message" in extract_data
|
|
|
|
|
|
def test_integration_non_zugferd_pdf_workflow(client):
|
|
"""Test workflow with non-ZUGFeRD PDF."""
|
|
pdf_base64 = read_pdf_as_base64("tests/fixtures/EmptyPDFA1.pdf")
|
|
extract_response = client.post("/extract", json={"pdf_base64": pdf_base64})
|
|
|
|
assert extract_response.status_code == 200
|
|
extract_data = extract_response.json()
|
|
assert extract_data["is_zugferd"] is False
|
|
assert extract_data["zugferd_profil"] is None
|
|
assert extract_data["xml_data"] is None
|
|
assert "pdf_text" in extract_data
|
|
|
|
# Only validate if xml_data is present
|
|
if extract_data.get("xml_data"):
|
|
validate_response = client.post(
|
|
"/validate",
|
|
json={
|
|
"xml_data": extract_data["xml_data"],
|
|
"pdf_text": extract_data["pdf_text"],
|
|
"checks": ["pflichtfelder"],
|
|
},
|
|
)
|
|
assert validate_response.status_code == 200
|
|
validate_data = validate_response.json()
|
|
assert "result" in validate_data
|
|
|
|
|
|
def test_integration_various_validation_checks(client):
|
|
"""Test full workflow with different validation check combinations."""
|
|
pdf_base64 = read_pdf_as_base64("tests/fixtures/EN16931_Einfach.pdf")
|
|
extract_response = client.post("/extract", json={"pdf_base64": pdf_base64})
|
|
|
|
assert extract_response.status_code == 200
|
|
extract_data = extract_response.json()
|
|
assert extract_data["is_zugferd"] is True
|
|
|
|
validate_response = client.post(
|
|
"/validate",
|
|
json={
|
|
"xml_data": extract_data["xml_data"],
|
|
"pdf_text": extract_data["pdf_text"],
|
|
"checks": ["pflichtfelder"],
|
|
},
|
|
)
|
|
assert validate_response.status_code == 200
|
|
|
|
validate_response = client.post(
|
|
"/validate",
|
|
json={
|
|
"xml_data": extract_data["xml_data"],
|
|
"pdf_text": extract_data["pdf_text"],
|
|
"checks": ["betraege"],
|
|
},
|
|
)
|
|
assert validate_response.status_code == 200
|
|
|
|
|
|
def test_integration_multiple_profiles_sequentially(client):
|
|
"""Test extraction from multiple ZUGFeRD profiles in sequence."""
|
|
pdf_base64 = read_pdf_as_base64("tests/fixtures/EN16931_Einfach.pdf")
|
|
response = client.post("/extract", json={"pdf_base64": pdf_base64})
|
|
assert response.status_code == 200
|
|
assert response.json()["zugferd_profil"] == "EN16931"
|
|
|
|
pdf_base64 = read_pdf_as_base64("tests/fixtures/validAvoir_FR_type380_BASICWL.pdf")
|
|
response = client.post("/extract", json={"pdf_base64": pdf_base64})
|
|
assert response.status_code == 200
|
|
|
|
pdf_base64 = read_pdf_as_base64("tests/fixtures/zugferd_2p1_EXTENDED_PDFA-3A.pdf")
|
|
response = client.post("/extract", json={"pdf_base64": pdf_base64})
|
|
assert response.status_code == 200
|
|
|
|
|
|
def test_integration_empty_checks_list(client):
|
|
"""Test workflow with empty checks list in validation."""
|
|
pdf_base64 = read_pdf_as_base64("tests/fixtures/EN16931_Einfach.pdf")
|
|
extract_response = client.post("/extract", json={"pdf_base64": pdf_base64})
|
|
|
|
assert extract_response.status_code == 200
|
|
extract_data = extract_response.json()
|
|
|
|
validate_response = client.post(
|
|
"/validate",
|
|
json={
|
|
"xml_data": extract_data["xml_data"],
|
|
"pdf_text": extract_data["pdf_text"],
|
|
"checks": [],
|
|
},
|
|
)
|
|
|
|
assert validate_response.status_code == 200
|
|
validate_data = validate_response.json()
|
|
assert "result" in validate_data
|
|
|
|
|
|
def test_integration_corrupt_xml_data_validation(client):
|
|
"""Test validation with corrupt or malformed XML data returns validation errors."""
|
|
corrupt_data = {
|
|
"invoice_number": "TEST-001",
|
|
"totals": {"net": "invalid_number"},
|
|
}
|
|
|
|
validate_response = client.post(
|
|
"/validate",
|
|
json={
|
|
"xml_data": corrupt_data,
|
|
"pdf_text": "",
|
|
"checks": ["pflichtfelder"],
|
|
},
|
|
)
|
|
|
|
# Validator catches Pydantic errors and returns 200 with validation result
|
|
assert validate_response.status_code == 200
|
|
validate_data = validate_response.json()
|
|
assert "result" in validate_data
|
|
assert validate_data["result"]["is_valid"] is False
|
|
assert len(validate_data["result"]["errors"]) > 0
|