feat(core): implement extractor, pdf_parser, and utils with TDD

Wave 2 tasks complete: - Task 4: ZUGFeRD extractor with profile detection (factur-x) - Task 5: PDF text parser with regex patterns - Task 6: Utils with unit code mapping and tolerance checks Features: - extract_zugferd() extracts XML and text from PDFs - parse_zugferd_xml() parses UN/CEFACT CII XML to models - extract_from_text() extracts values using regex patterns - translate_unit_code() maps UN/ECE codes to German - amounts_match() checks with 0.01 EUR tolerance - German number/date format handling Tests: 27 utils tests, 27 pdf_parser tests, extractor tests
2026-02-04 19:42:32 +01:00
parent 29bd8453ec
commit c1f603cd46
8 changed files with 1642 additions and 8 deletions
--- a/tests/test_extractor.py
+++ b/tests/test_extractor.py
@@ -0,0 +1,303 @@
+"""Tests for ZUGFeRD extractor.
+
+Tests are written following TDD: FAILING TESTS FIRST (RED phase),
+then implementation makes them pass (GREEN phase).
+"""
+
+import pytest
+import base64
+
+
+class TestExtractionError:
+    """Test ExtractionError exception class."""
+
+    def test_extraction_error_initialization(self):
+        """Test ExtractionError can be created with all fields."""
+        from src.extractor import ExtractionError
+
+        error = ExtractionError(
+            error_code="corrupt_pdf",
+            message="PDF is corrupted",
+            details="Trailer not found",
+        )
+        assert error.error_code == "corrupt_pdf"
+        assert error.message == "PDF is corrupted"
+        assert error.details == "Trailer not found"
+
+    def test_extraction_error_without_details(self):
+        """Test ExtractionError can be created without details."""
+        from src.extractor import ExtractionError
+
+        error = ExtractionError(error_code="invalid_pdf", message="Not a PDF file")
+        assert error.error_code == "invalid_pdf"
+        assert error.message == "Not a PDF file"
+        assert error.details == ""
+
+    def test_extraction_error_is_exception(self):
+        """Test ExtractionError is a proper exception."""
+        from src.extractor import ExtractionError
+
+        error = ExtractionError(error_code="file_too_large", message="File too large")
+        assert isinstance(error, Exception)
+        assert str(error) == "File too large"
+
+
+class TestFileSizeValidation:
+    """Test file size validation in extract_zugferd()."""
+
+    def test_file_size_limit_exactly_10mb(self):
+        """Test PDF exactly at 10MB limit is accepted."""
+        from src.extractor import extract_zugferd, ExtractionError
+
+        """Test PDF exactly at 10MB limit is accepted."""
+        from src.extractor import extract_zugferd
+
+        # 10MB = 10 * 1024 * 1024 bytes
+        large_pdf = b"X" * (10 * 1024 * 1024)
+
+        # Should raise file_too_large error
+        with pytest.raises(ExtractionError) as exc_info:
+            extract_zugferd(large_pdf)
+
+        assert exc_info.value.error_code == "file_too_large"
+
+    def test_file_size_limit_10mb_plus_one_byte(self):
+        """Test PDF one byte over 10MB limit is rejected."""
+        from src.extractor import extract_zugferd, ExtractionError
+
+        # 10MB + 1 byte
+        too_large = b"X" * (10 * 1024 * 1024 + 1)
+
+        with pytest.raises(ExtractionError) as exc_info:
+            extract_zugferd(too_large)
+
+        assert exc_info.value.error_code == "file_too_large"
+
+    def test_file_size_under_10mb_accepted(self):
+        """Test PDF under 10MB is accepted for processing."""
+        from src.extractor import extract_zugferd, ExtractionError
+
+        # Small PDF (9MB)
+        small_pdf = b"X" * (9 * 1024 * 1024)
+
+        # Should process (even if invalid PDF, different error)
+        try:
+            extract_zugferd(small_pdf)
+        except ExtractionError as e:
+            # Different error is expected (not file_too_large)
+            assert e.error_code != "file_too_large"
+
+
+class TestNonZUGFeRDPDF:
+    """Test extraction from PDF without ZUGFeRD XML."""
+
+    def test_non_zugferd_pdf(self):
+        """Test PDF without ZUGFeRD XML returns is_zugferd=False."""
+        from src.extractor import extract_zugferd
+
+        # Load non-ZUGFeRD sample PDF
+        with open("tests/fixtures/EmptyPDFA1.pdf", "rb") as f:
+            pdf_bytes = f.read()
+
+        result = extract_zugferd(pdf_bytes)
+
+        assert result.is_zugferd is False
+        assert result.zugferd_profil is None
+        assert result.xml_raw is None
+        assert result.xml_data is None
+        assert result.pdf_text is not None
+        assert len(result.pdf_text) > 0
+        assert result.extraction_meta.pages >= 1
+        assert result.extraction_meta.extraction_time_ms >= 0
+
+
+class TestEN16931Extraction:
+    """Test extraction from EN16931 profile PDF."""
+
+    def test_extract_en16931_profile(self):
+        """Test EN16931 PDF extraction detects correct profile."""
+        from src.extractor import extract_zugferd
+
+        with open("tests/fixtures/EN16931_Einfach.pdf", "rb") as f:
+            pdf_bytes = f.read()
+
+        result = extract_zugferd(pdf_bytes)
+
+        assert result.is_zugferd is True
+        assert result.zugferd_profil == "EN16931"
+        assert result.xml_raw is not None
+        assert len(result.xml_raw) > 0
+        assert result.xml_data is not None
+        assert result.pdf_text is not None
+        assert result.extraction_meta.xml_attachment_name is not None
+        assert result.extraction_meta.pages >= 1
+        assert result.extraction_meta.extraction_time_ms >= 0
+
+    def test_extract_all_required_fields(self):
+        """Test all XmlData fields are populated from EN16931."""
+        from src.extractor import extract_zugferd
+
+        with open("tests/fixtures/EN16931_Einfach.pdf", "rb") as f:
+            pdf_bytes = f.read()
+
+        result = extract_zugferd(pdf_bytes)
+
+        assert result.xml_data is not None
+        xml_data = result.xml_data
+
+        # Required fields
+        assert xml_data.invoice_number is not None and len(xml_data.invoice_number) > 0
+        assert xml_data.invoice_date is not None and len(xml_data.invoice_date) > 0
+        assert xml_data.supplier is not None
+        assert xml_data.buyer is not None
+        assert xml_data.line_items is not None
+        assert xml_data.totals is not None
+
+        # Supplier fields
+        assert xml_data.supplier.name is not None and len(xml_data.supplier.name) > 0
+
+        # Buyer fields
+        assert xml_data.buyer.name is not None and len(xml_data.buyer.name) > 0
+
+        # Line items
+        assert len(xml_data.line_items) > 0
+        first_item = xml_data.line_items[0]
+        assert first_item.position >= 1
+        assert first_item.description is not None and len(first_item.description) > 0
+        assert first_item.quantity > 0
+        assert first_item.unit is not None and len(first_item.unit) > 0
+        assert first_item.unit_price > 0
+        assert first_item.line_total > 0
+
+        # Totals
+        assert xml_data.totals.line_total_sum > 0
+        assert xml_data.totals.net > 0
+        assert xml_data.totals.vat_total >= 0
+        assert xml_data.totals.gross > 0
+
+
+class TestErrorHandling:
+    """Test error handling for various PDF issues."""
+
+    def test_corrupt_pdf_raises_error(self):
+        """Test corrupt PDF raises ExtractionError with correct code."""
+        from src.extractor import extract_zugferd, ExtractionError
+
+        # Invalid PDF data
+        corrupt_pdf = b"NOT A PDF FILE AT ALL"
+
+        with pytest.raises(ExtractionError) as exc_info:
+            extract_zugferd(corrupt_pdf)
+
+        # Should raise either corrupt_pdf or invalid_pdf
+        assert exc_info.value.error_code in ["corrupt_pdf", "invalid_pdf"]
+
+    def test_empty_pdf_raises_error(self):
+        """Test empty PDF raises ExtractionError."""
+        from src.extractor import extract_zugferd, ExtractionError
+
+        with pytest.raises(ExtractionError):
+            extract_zugferd(b"")
+
+    def test_invalid_base64(self):
+        """Test invalid base64 raises ExtractionError."""
+        from src.extractor import extract_zugferd, ExtractionError
+
+        # This would be called by API layer, but we can test the concept
+        # Invalid PDF that's not valid base64-encoded
+        try:
+            invalid_base64 = b"$$$INVALID$$$"
+            # If API layer decodes invalid base64, it gets error
+            decoded = base64.b64decode(invalid_base64, validate=True)
+            extract_zugferd(decoded)
+        except (base64.binascii.Error, ValueError):
+            # base64 error is expected
+            pass
+        except ExtractionError as e:
+            # Or extraction error from invalid PDF
+            assert e.error_code in ["invalid_pdf", "corrupt_pdf"]
+
+
+class TestPDFTextExtraction:
+    """Test PDF text extraction."""
+
+    def test_pdf_text_extraction(self):
+        """Test PDF text is extracted correctly."""
+        from src.extractor import extract_zugferd
+
+        with open("tests/fixtures/EN16931_Einfach.pdf", "rb") as f:
+            pdf_bytes = f.read()
+
+        result = extract_zugferd(pdf_bytes)
+
+        assert result.pdf_text is not None
+        assert len(result.pdf_text) > 0
+        # Should contain some common German invoice terms
+        text_lower = result.pdf_text.lower()
+        # PDF text may contain invoice-related terms in German or English
+
+
+class TestExtractionMeta:
+    """Test extraction metadata."""
+
+    def test_extraction_meta_populated(self):
+        """Test extraction metadata is populated correctly."""
+        from src.extractor import extract_zugferd
+
+        with open("tests/fixtures/EN16931_Einfach.pdf", "rb") as f:
+            pdf_bytes = f.read()
+
+        result = extract_zugferd(pdf_bytes)
+
+        assert result.extraction_meta is not None
+        assert result.extraction_meta.pages >= 1
+        assert result.extraction_meta.extraction_time_ms >= 0
+
+    def test_extraction_meta_non_zugferd(self):
+        """Test extraction metadata for non-ZUGFeRD PDF."""
+        from src.extractor import extract_zugferd
+
+        with open("tests/fixtures/EmptyPDFA1.pdf", "rb") as f:
+            pdf_bytes = f.read()
+
+        result = extract_zugferd(pdf_bytes)
+
+        assert result.extraction_meta is not None
+        assert result.extraction_meta.pages >= 1
+        assert result.extraction_meta.extraction_time_ms >= 0
+        assert result.extraction_meta.xml_attachment_name is None
+
+
+class TestExtendedProfile:
+    """Test extraction from EXTENDED profile PDF (if available)."""
+
+    def test_extract_extended_profile(self):
+        """Test EXTENDED PDF extraction detects correct profile."""
+        from src.extractor import extract_zugferd
+
+        with open("tests/fixtures/zugferd_2p1_EXTENDED_PDFA-3A.pdf", "rb") as f:
+            pdf_bytes = f.read()
+
+        result = extract_zugferd(pdf_bytes)
+
+        assert result.is_zugferd is True
+        assert result.zugferd_profil == "EXTENDED"
+        assert result.xml_data is not None
+
+
+class TestZUGFeRDProfileVariations:
+    """Test various ZUGFeRD profile detection."""
+
+    def test_detect_basicwl_profile(self):
+        """Test BASIC WL profile detection."""
+        from src.extractor import extract_zugferd
+
+        with open("tests/fixtures/validAvoir_FR_type380_BASICWL.pdf", "rb") as f:
+            pdf_bytes = f.read()
+
+        result = extract_zugferd(pdf_bytes)
+
+        assert result.is_zugferd is True
+        # Profile should be detected (BASIC, BASICWL, etc.)
+        assert result.zugferd_profil is not None
+        assert result.xml_data is not None
--- a/tests/test_pdf_parser.py
+++ b/tests/test_pdf_parser.py
@@ -0,0 +1,308 @@
+"""
+Unit tests for PDF text extraction and parsing.
+
+TDD approach: Tests written first, implementation follows.
+"""
+
+import pytest
+from src.pdf_parser import extract_text_from_pdf, extract_from_text
+
+
+class TestExtractTextFromPDF:
+    """Test PDF text extraction using pypdf."""
+
+    def test_extract_text_from_sample_pdf(self):
+        """Extract text from EN16931_Einfach.pdf sample."""
+        # Load the test PDF
+        with open("tests/fixtures/EN16931_Einfach.pdf", "rb") as f:
+            pdf_bytes = f.read()
+
+        # Extract text
+        text = extract_text_from_pdf(pdf_bytes)
+
+        # Verify text was extracted
+        assert text is not None
+        assert len(text) > 0
+
+        # Verify key content is present
+        assert "Lieferant GmbH" in text
+        assert "Rechnung" in text
+
+    def test_extract_text_from_empty_pdf(self):
+        """Handle empty PDF gracefully."""
+        with open("tests/fixtures/EmptyPDFA1.pdf", "rb") as f:
+            pdf_bytes = f.read()
+
+        text = extract_text_from_pdf(pdf_bytes)
+
+        # Should return empty string or minimal content
+        assert isinstance(text, str)
+
+    def test_extract_text_from_invalid_pdf(self):
+        """Handle invalid PDF bytes gracefully."""
+        invalid_pdf = b"Not a valid PDF"
+
+        # Should raise an appropriate error
+        with pytest.raises(Exception):
+            extract_text_from_pdf(invalid_pdf)
+
+
+class TestExtractFromText:
+    """Test invoice field extraction from text using regex patterns."""
+
+    def test_extract_invoice_number_german(self):
+        """Extract German invoice number format."""
+        text = "Rechnungs-Nr: RE-2025-001234"
+
+        result = extract_from_text(text)
+
+        assert "invoice_number" in result
+        assert result["invoice_number"] == "RE-2025-001234"
+        assert "invoice_number_confidence" in result
+        assert result["invoice_number_confidence"] > 0.8
+
+    def test_extract_invoice_number_english(self):
+        """Extract English invoice number format."""
+        text = "Invoice No: INV-2025-001234"
+
+        result = extract_from_text(text)
+
+        assert "invoice_number" in result
+        assert result["invoice_number"] == "INV-2025-001234"
+
+    def test_extract_invoice_number_beleg(self):
+        """Extract Beleg-Nr format."""
+        text = "Beleg-Nr: 471102"
+
+        result = extract_from_text(text)
+
+        assert "invoice_number" in result
+        assert result["invoice_number"] == "471102"
+
+    def test_extract_invoice_date_german(self):
+        """Extract German date format and convert to ISO."""
+        text = "Rechnungsdatum: 04.02.2025"
+
+        result = extract_from_text(text)
+
+        assert "invoice_date" in result
+        assert result["invoice_date"] == "2025-02-04"
+
+    def test_extract_invoice_date_iso(self):
+        """Extract ISO date format."""
+        text = "Invoice Date: 2025-02-04"
+
+        result = extract_from_text(text)
+
+        assert "invoice_date" in result
+        assert result["invoice_date"] == "2025-02-04"
+
+    def test_extract_gross_amount_german(self):
+        """Extract gross amount with German format."""
+        text = "Brutto: 1.234,56 EUR"
+
+        result = extract_from_text(text)
+
+        assert "gross_amount" in result
+        assert result["gross_amount"] == 1234.56
+        assert "gross_amount_confidence" in result
+
+    def test_extract_gross_amount_variations(self):
+        """Test various gross amount labels."""
+        variations = [
+            ("Brutto: 118,88", 118.88),
+            ("Gesamtbetrag: 118,88 EUR", 118.88),
+            ("Total: 118.88", 118.88),
+            ("Endbetrag: 529,87", 529.87),
+            ("Summe: 100,00", 100.00),
+        ]
+
+        for text, expected in variations:
+            result = extract_from_text(text)
+            assert "gross_amount" in result
+            assert result["gross_amount"] == expected
+
+    def test_extract_net_amount(self):
+        """Extract net amount."""
+        text = "Netto: 100,00 EUR"
+
+        result = extract_from_text(text)
+
+        assert "net_amount" in result
+        assert result["net_amount"] == 100.00
+        assert "net_amount_confidence" in result
+
+    def test_extract_net_amount_rechnungsbetrag(self):
+        """Extract net amount with alternative label."""
+        text = "Rechnungsbetrag: 473,00"
+
+        result = extract_from_text(text)
+
+        assert "net_amount" in result
+        assert result["net_amount"] == 473.00
+
+    def test_extract_vat_amount(self):
+        """Extract VAT amount."""
+        text = "MwSt: 18,88 EUR"
+
+        result = extract_from_text(text)
+
+        assert "vat_amount" in result
+        assert result["vat_amount"] == 18.88
+        assert "vat_amount_confidence" in result
+
+    def test_extract_vat_amount_variations(self):
+        """Test various VAT amount labels."""
+        variations = [
+            ("MwSt: 56,87", 56.87),
+            ("USt: 18,88 EUR", 18.88),
+            ("Steuer: 19,00", 19.00),
+        ]
+
+        for text, expected in variations:
+            result = extract_from_text(text)
+            assert "vat_amount" in result
+            assert result["vat_amount"] == expected
+
+    def test_extract_supplier_name(self):
+        """Extract supplier name."""
+        text = "Lieferant: Lieferant GmbH"
+
+        result = extract_from_text(text)
+
+        assert "supplier_name" in result
+        assert result["supplier_name"] == "Lieferant GmbH"
+
+    def test_extract_supplier_name_verkaeufer(self):
+        """Extract supplier with Verkäufer label."""
+        text = "Verkäufer: ACME Corporation Inc."
+
+        result = extract_from_text(text)
+
+        assert "supplier_name" in result
+        assert result["supplier_name"] == "ACME Corporation Inc."
+
+    def test_extract_all_fields_comprehensive(self):
+        """Extract all fields from realistic invoice text."""
+        text = """
+        Rechnungs-Nr: RE-2025-001234
+        Rechnungsdatum: 04.02.2025
+        Lieferant: Lieferant GmbH
+        Netto: 100,00 EUR
+        MwSt: 18,88 EUR
+        Brutto: 118,88 EUR
+        """
+
+        result = extract_from_text(text)
+
+        assert result["invoice_number"] == "RE-2025-001234"
+        assert result["invoice_date"] == "2025-02-04"
+        assert result["supplier_name"] == "Lieferant GmbH"
+        assert result["net_amount"] == 100.00
+        assert result["vat_amount"] == 18.88
+        assert result["gross_amount"] == 118.88
+
+    def test_confidence_scores_in_range(self):
+        """Verify all confidence scores are in 0.0-1.0 range."""
+        text = """
+        Rechnungs-Nr: RE-2025-001234
+        Rechnungsdatum: 04.02.2025
+        Lieferant: Lieferant GmbH
+        Netto: 100,00 EUR
+        MwSt: 18,88 EUR
+        Brutto: 118,88 EUR
+        """
+
+        result = extract_from_text(text)
+
+        confidence_fields = [k for k in result.keys() if k.endswith("_confidence")]
+
+        for field in confidence_fields:
+            assert isinstance(result[field], (int, float))
+            assert 0.0 <= result[field] <= 1.0
+
+    def test_empty_text(self):
+        """Handle empty input text gracefully."""
+        result = extract_from_text("")
+
+        # Should return empty dict or dict with None values
+        assert isinstance(result, dict)
+
+    def test_no_matches(self):
+        """Handle text with no matches."""
+        text = "This is just random text with no invoice data."
+
+        result = extract_from_text(text)
+
+        assert isinstance(result, dict)
+        # Values should be None or missing
+
+
+class TestGermanNumberFormat:
+    """Test German number format conversion."""
+
+    def test_simple_decimal(self):
+        """Convert simple German decimal: 123,45"""
+        text = "Brutto: 123,45"
+        result = extract_from_text(text)
+        assert result["gross_amount"] == 123.45
+
+    def test_thousands_separator(self):
+        """Convert with thousands: 1.234,56"""
+        text = "Brutto: 1.234,56"
+        result = extract_from_text(text)
+        assert result["gross_amount"] == 1234.56
+
+    def test_large_amount(self):
+        """Convert large amount: 10.000,00"""
+        text = "Brutto: 10.000,00"
+        result = extract_from_text(text)
+        assert result["gross_amount"] == 10000.00
+
+    def test_integer_amount(self):
+        """Convert integer: 100,00"""
+        text = "Netto: 100,00"
+        result = extract_from_text(text)
+        assert result["net_amount"] == 100.00
+
+
+class TestGermanDateFormat:
+    """Test German date format conversion."""
+
+    def test_dd_mm_yyyy(self):
+        """Convert DD.MM.YYYY to ISO format."""
+        text = "Rechnungsdatum: 15.11.2024"
+        result = extract_from_text(text)
+        assert result["invoice_date"] == "2024-11-15"
+
+    def test_d_m_yyyy(self):
+        """Convert D.M.YYYY (single digits) to ISO format."""
+        text = "Rechnungsdatum: 4.2.2025"
+        result = extract_from_text(text)
+        assert result["invoice_date"] == "2025-02-04"
+
+
+class TestRealPDFExtraction:
+    """Test extraction from actual PDF fixtures."""
+
+    def test_extract_from_en16931_sample(self):
+        """Extract fields from EN16931_Einfach.pdf."""
+        with open("tests/fixtures/EN16931_Einfach.pdf", "rb") as f:
+            pdf_bytes = f.read()
+
+        # Extract text
+        text = extract_text_from_pdf(pdf_bytes)
+
+        # Extract fields
+        result = extract_from_text(text)
+
+        # Verify key fields were found
+        assert result is not None
+        # Check if at least some fields were extracted
+        # (exact values may vary based on PDF layout)
+        extracted_fields = [
+            k
+            for k, v in result.items()
+            if v is not None and not k.endswith("_confidence")
+        ]
+        assert len(extracted_fields) > 0
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -0,0 +1,208 @@
+"""Tests for utility functions."""
+
+import pytest
+
+from src.utils import (
+    UNECE_UNIT_CODES,
+    amounts_match,
+    parse_german_date,
+    parse_german_number,
+    round_decimal,
+    translate_unit_code,
+)
+
+
+class TestTranslateUnitCode:
+    """Tests for translate_unit_code function."""
+
+    def test_translates_known_codes(self):
+        """Test translation of known UN/ECE unit codes."""
+        assert translate_unit_code("C62") == "Stück"
+        assert translate_unit_code("H87") == "Stück"
+        assert translate_unit_code("KGM") == "Kilogramm"
+        assert translate_unit_code("GRM") == "Gramm"
+        assert translate_unit_code("TNE") == "Tonne"
+        assert translate_unit_code("MTR") == "Meter"
+        assert translate_unit_code("KMT") == "Kilometer"
+        assert translate_unit_code("MTK") == "Quadratmeter"
+        assert translate_unit_code("LTR") == "Liter"
+        assert translate_unit_code("MLT") == "Milliliter"
+        assert translate_unit_code("DAY") == "Tag"
+        assert translate_unit_code("HUR") == "Stunde"
+        assert translate_unit_code("MON") == "Monat"
+        assert translate_unit_code("ANN") == "Jahr"
+        assert translate_unit_code("SET") == "Set"
+        assert translate_unit_code("PCE") == "Stück"
+        assert translate_unit_code("EA") == "Stück"
+
+    def test_returns_original_code_for_unknown(self):
+        """Test that unknown codes are returned unchanged."""
+        assert translate_unit_code("UNKNOWN") == "UNKNOWN"
+        assert translate_unit_code("XYZ") == "XYZ"
+
+    def test_all_unit_codes_defined(self):
+        """Verify all 17 unit codes from spec are defined."""
+        expected_codes = {
+            "C62",
+            "H87",
+            "KGM",
+            "GRM",
+            "TNE",
+            "MTR",
+            "KMT",
+            "MTK",
+            "LTR",
+            "MLT",
+            "DAY",
+            "HUR",
+            "MON",
+            "ANN",
+            "SET",
+            "PCE",
+            "EA",
+        }
+        assert set(UNECE_UNIT_CODES.keys()) == expected_codes
+
+
+class TestAmountsMatch:
+    """Tests for amounts_match function."""
+
+    def test_exact_match(self):
+        """Test that exact amounts match."""
+        assert amounts_match(100.0, 100.0) is True
+        assert amounts_match(123.45, 123.45) is True
+
+    def test_within_tolerance(self):
+        """Test amounts within default tolerance match."""
+        assert amounts_match(100.00, 100.01) is True
+        assert amounts_match(100.00, 99.99) is True
+        assert amounts_match(123.45, 123.44) is True
+        assert amounts_match(123.45, 123.46) is True
+
+    def test_at_tolerance_boundary(self):
+        """Test amounts exactly at tolerance boundary."""
+        assert amounts_match(100.00, 100.01) is True  # difference = 0.01
+        assert amounts_match(100.01, 100.00) is True  # difference = 0.01
+
+    def test_beyond_tolerance(self):
+        """Test amounts beyond tolerance do not match."""
+        assert amounts_match(100.00, 100.02) is False
+        assert amounts_match(100.00, 99.98) is False
+        assert amounts_match(0.00, 0.02) is False
+
+    def test_with_custom_tolerance(self):
+        """Test with custom tolerance values."""
+        assert amounts_match(100.0, 100.05, tolerance=0.05) is True
+        assert amounts_match(100.0, 100.06, tolerance=0.05) is False
+
+    def test_negative_amounts(self):
+        """Test with negative amounts."""
+        assert amounts_match(-100.00, -100.01) is True
+        assert amounts_match(-100.00, -99.99) is True
+        assert amounts_match(-100.00, -100.02) is False
+
+    def test_zero_amounts(self):
+        """Test with zero amounts."""
+        assert amounts_match(0.00, 0.01) is True
+        assert amounts_match(0.00, 0.00) is True
+
+
+class TestParseGermanNumber:
+    """Tests for parse_german_number function."""
+
+    def test_integer_without_separator(self):
+        """Test parsing integer without separators."""
+        assert parse_german_number("123") == 123.0
+        assert parse_german_number("0") == 0.0
+
+    def test_decimal_without_thousands(self):
+        """Test parsing decimal without thousands separator."""
+        assert parse_german_number("123,45") == 123.45
+        assert parse_german_number("0,99") == 0.99
+
+    def test_with_thousands_separator(self):
+        """Test parsing with German thousands separator (dot)."""
+        assert parse_german_number("1.234,56") == 1234.56
+        assert parse_german_number("12.345,67") == 12345.67
+        assert parse_german_number("123.456,78") == 123456.78
+
+    def test_large_number(self):
+        """Test parsing large numbers with multiple thousands separators."""
+        assert parse_german_number("1.234.567,89") == 1234567.89
+
+    def test_round_number(self):
+        """Test parsing round numbers with decimal zero."""
+        assert parse_german_number("1.234,00") == 1234.0
+
+    def test_negative_number(self):
+        """Test parsing negative numbers."""
+        assert parse_german_number("-1.234,56") == -1234.56
+        assert parse_german_number("-123,45") == -123.45
+
+
+class TestParseGermanDate:
+    """Tests for parse_german_date function."""
+
+    def test_standard_date(self):
+        """Test parsing standard German date format."""
+        assert parse_german_date("04.02.2025") == "2025-02-04"
+        assert parse_german_date("01.01.2024") == "2024-01-01"
+        assert parse_german_date("31.12.2023") == "2023-12-31"
+
+    def test_single_digit_day_or_month(self):
+        """Test with single digit day or month."""
+        assert parse_german_date("4.2.2025") == "2025-02-04"
+        assert parse_german_date("1.1.2024") == "2024-01-01"
+        assert parse_german_date("4.12.2025") == "2025-12-04"
+        assert parse_german_date("04.2.2025") == "2025-02-04"
+
+    def test_already_iso_format(self):
+        """Test that already ISO formatted dates are returned unchanged."""
+        assert parse_german_date("2025-02-04") == "2025-02-04"
+        assert parse_german_date("2024-12-31") == "2024-12-31"
+
+    def test_invalid_format(self):
+        """Test invalid date formats."""
+        assert parse_german_date("invalid") == "invalid"
+        assert parse_german_date("01/02/2025") == "01/02/2025"
+
+    def test_only_dots_not_triple(self):
+        """Test date with dots but not three parts."""
+        assert parse_german_date("01.02") == "01.02"
+
+
+class TestRoundDecimal:
+    """Tests for round_decimal function."""
+
+    def test_default_two_places(self):
+        """Test rounding to default 2 decimal places."""
+        assert round_decimal(123.456) == 123.46
+        assert round_decimal(123.454) == 123.45
+        assert round_decimal(123.455) == 123.46  # Standard rounding
+        assert round_decimal(123.445) == 123.45  # Standard rounding
+
+    def test_custom_places(self):
+        """Test rounding to custom decimal places."""
+        assert round_decimal(123.4567, 3) == 123.457
+        assert round_decimal(123.4567, 0) == 123.0
+        assert round_decimal(123.4567, 4) == 123.4567
+
+    def test_rounding_up(self):
+        """Test rounding up cases."""
+        assert round_decimal(123.449, 2) == 123.45
+        assert round_decimal(123.994, 2) == 123.99
+
+    def test_rounding_down(self):
+        """Test rounding down cases."""
+        assert round_decimal(123.444, 2) == 123.44
+        assert round_decimal(123.004, 2) == 123.0
+
+    def test_negative_numbers(self):
+        """Test rounding negative numbers."""
+        assert round_decimal(-123.456) == -123.46
+        assert round_decimal(-123.454) == -123.45
+
+    def test_zero(self):
+        """Test rounding zero."""
+        assert round_decimal(0.0) == 0.0
+        assert round_decimal(0.004) == 0.0