"""Integration tests for full ZUGFeRD workflow: extract → validate.""" import base64 import pytest from fastapi.testclient import TestClient from src.main import app @pytest.fixture def client(): """Create TestClient fixture for FastAPI app.""" return TestClient(app) def read_pdf_as_base64(filepath: str) -> str: """Helper function to read a PDF file and encode as base64. Args: filepath: Path to the PDF file. Returns: Base64-encoded PDF content as string. """ with open(filepath, "rb") as f: return base64.b64encode(f.read()).decode() def test_integration_en16931_full_workflow(client): """Test full workflow: extract → validate with EN16931 invoice.""" pdf_base64 = read_pdf_as_base64("tests/fixtures/EN16931_Einfach.pdf") extract_response = client.post("/extract", json={"pdf_base64": pdf_base64}) assert extract_response.status_code == 200 extract_data = extract_response.json() assert extract_data["is_zugferd"] is True assert extract_data["zugferd_profil"] == "EN16931" assert "xml_data" in extract_data assert "pdf_text" in extract_data validate_response = client.post( "/validate", json={ "xml_data": extract_data["xml_data"], "pdf_text": extract_data["pdf_text"], "checks": ["pflichtfelder", "betraege", "ustid", "pdf_abgleich"], }, ) assert validate_response.status_code == 200 validate_data = validate_response.json() assert "result" in validate_data assert "is_valid" in validate_data["result"] def test_integration_basic_wl_full_workflow(client): """Test full workflow: extract → validate with BASIC WL invoice.""" pdf_base64 = read_pdf_as_base64("tests/fixtures/validAvoir_FR_type380_BASICWL.pdf") extract_response = client.post("/extract", json={"pdf_base64": pdf_base64}) assert extract_response.status_code == 200 extract_data = extract_response.json() assert extract_data["is_zugferd"] is True assert "xml_data" in extract_data validate_response = client.post( "/validate", json={ "xml_data": extract_data["xml_data"], "pdf_text": extract_data["pdf_text"], "checks": ["pflichtfelder"], }, ) assert validate_response.status_code == 200 validate_data = validate_response.json() assert "result" in validate_data def test_integration_extended_profile_full_workflow(client): """Test full workflow: extract → validate with EXTENDED profile.""" pdf_base64 = read_pdf_as_base64("tests/fixtures/zugferd_2p1_EXTENDED_PDFA-3A.pdf") extract_response = client.post("/extract", json={"pdf_base64": pdf_base64}) assert extract_response.status_code == 200 extract_data = extract_response.json() assert extract_data["is_zugferd"] is True assert "xml_data" in extract_data validate_response = client.post( "/validate", json={ "xml_data": extract_data["xml_data"], "pdf_text": extract_data["pdf_text"], "checks": ["pflichtfelder", "betraege"], }, ) assert validate_response.status_code == 200 validate_data = validate_response.json() assert "result" in validate_data def test_integration_invalid_base64_error(client): """Test error scenario: invalid base64 in extract request.""" extract_response = client.post( "/extract", json={"pdf_base64": "not_valid_base64!!!"} ) assert extract_response.status_code == 400 extract_data = extract_response.json() assert extract_data["error"] == "invalid_base64" assert "message" in extract_data def test_integration_non_zugferd_pdf_workflow(client): """Test workflow with non-ZUGFeRD PDF.""" pdf_base64 = read_pdf_as_base64("tests/fixtures/EmptyPDFA1.pdf") extract_response = client.post("/extract", json={"pdf_base64": pdf_base64}) assert extract_response.status_code == 200 extract_data = extract_response.json() assert extract_data["is_zugferd"] is False assert extract_data["zugferd_profil"] is None assert "pdf_text" in extract_data validate_response = client.post( "/validate", json={ "xml_data": extract_data.get("xml_data", {}), "pdf_text": extract_data["pdf_text"], "checks": ["pflichtfelder"], }, ) assert validate_response.status_code == 200 validate_data = validate_response.json() assert "result" in validate_data def test_integration_various_validation_checks(client): """Test full workflow with different validation check combinations.""" pdf_base64 = read_pdf_as_base64("tests/fixtures/EN16931_Einfach.pdf") extract_response = client.post("/extract", json={"pdf_base64": pdf_base64}) assert extract_response.status_code == 200 extract_data = extract_response.json() assert extract_data["is_zugferd"] is True validate_response = client.post( "/validate", json={ "xml_data": extract_data["xml_data"], "pdf_text": extract_data["pdf_text"], "checks": ["pflichtfelder"], }, ) assert validate_response.status_code == 200 validate_response = client.post( "/validate", json={ "xml_data": extract_data["xml_data"], "pdf_text": extract_data["pdf_text"], "checks": ["betraege"], }, ) assert validate_response.status_code == 200 def test_integration_multiple_profiles_sequentially(client): """Test extraction from multiple ZUGFeRD profiles in sequence.""" pdf_base64 = read_pdf_as_base64("tests/fixtures/EN16931_Einfach.pdf") response = client.post("/extract", json={"pdf_base64": pdf_base64}) assert response.status_code == 200 assert response.json()["zugferd_profil"] == "EN16931" pdf_base64 = read_pdf_as_base64("tests/fixtures/validAvoir_FR_type380_BASICWL.pdf") response = client.post("/extract", json={"pdf_base64": pdf_base64}) assert response.status_code == 200 pdf_base64 = read_pdf_as_base64("tests/fixtures/zugferd_2p1_EXTENDED_PDFA-3A.pdf") response = client.post("/extract", json={"pdf_base64": pdf_base64}) assert response.status_code == 200 def test_integration_empty_checks_list(client): """Test workflow with empty checks list in validation.""" pdf_base64 = read_pdf_as_base64("tests/fixtures/EN16931_Einfach.pdf") extract_response = client.post("/extract", json={"pdf_base64": pdf_base64}) assert extract_response.status_code == 200 extract_data = extract_response.json() validate_response = client.post( "/validate", json={ "xml_data": extract_data["xml_data"], "pdf_text": extract_data["pdf_text"], "checks": [], }, ) assert validate_response.status_code == 200 validate_data = validate_response.json() assert "result" in validate_data def test_integration_corrupt_xml_data_validation(client): """Test validation with corrupt or malformed XML data.""" corrupt_data = { "invoice_number": "TEST-001", "totals": {"net": "invalid_number"}, } validate_response = client.post( "/validate", json={ "xml_data": corrupt_data, "pdf_text": "", "checks": ["pflichtfelder"], }, ) assert validate_response.status_code == 200 validate_data = validate_response.json() assert "result" in validate_data