feat(core): implement extractor, pdf_parser, and utils with TDD
Wave 2 tasks complete: - Task 4: ZUGFeRD extractor with profile detection (factur-x) - Task 5: PDF text parser with regex patterns - Task 6: Utils with unit code mapping and tolerance checks Features: - extract_zugferd() extracts XML and text from PDFs - parse_zugferd_xml() parses UN/CEFACT CII XML to models - extract_from_text() extracts values using regex patterns - translate_unit_code() maps UN/ECE codes to German - amounts_match() checks with 0.01 EUR tolerance - German number/date format handling Tests: 27 utils tests, 27 pdf_parser tests, extractor tests
This commit is contained in:
208
tests/test_utils.py
Normal file
208
tests/test_utils.py
Normal file
@@ -0,0 +1,208 @@
|
||||
"""Tests for utility functions."""
|
||||
|
||||
import pytest
|
||||
|
||||
from src.utils import (
|
||||
UNECE_UNIT_CODES,
|
||||
amounts_match,
|
||||
parse_german_date,
|
||||
parse_german_number,
|
||||
round_decimal,
|
||||
translate_unit_code,
|
||||
)
|
||||
|
||||
|
||||
class TestTranslateUnitCode:
|
||||
"""Tests for translate_unit_code function."""
|
||||
|
||||
def test_translates_known_codes(self):
|
||||
"""Test translation of known UN/ECE unit codes."""
|
||||
assert translate_unit_code("C62") == "Stück"
|
||||
assert translate_unit_code("H87") == "Stück"
|
||||
assert translate_unit_code("KGM") == "Kilogramm"
|
||||
assert translate_unit_code("GRM") == "Gramm"
|
||||
assert translate_unit_code("TNE") == "Tonne"
|
||||
assert translate_unit_code("MTR") == "Meter"
|
||||
assert translate_unit_code("KMT") == "Kilometer"
|
||||
assert translate_unit_code("MTK") == "Quadratmeter"
|
||||
assert translate_unit_code("LTR") == "Liter"
|
||||
assert translate_unit_code("MLT") == "Milliliter"
|
||||
assert translate_unit_code("DAY") == "Tag"
|
||||
assert translate_unit_code("HUR") == "Stunde"
|
||||
assert translate_unit_code("MON") == "Monat"
|
||||
assert translate_unit_code("ANN") == "Jahr"
|
||||
assert translate_unit_code("SET") == "Set"
|
||||
assert translate_unit_code("PCE") == "Stück"
|
||||
assert translate_unit_code("EA") == "Stück"
|
||||
|
||||
def test_returns_original_code_for_unknown(self):
|
||||
"""Test that unknown codes are returned unchanged."""
|
||||
assert translate_unit_code("UNKNOWN") == "UNKNOWN"
|
||||
assert translate_unit_code("XYZ") == "XYZ"
|
||||
|
||||
def test_all_unit_codes_defined(self):
|
||||
"""Verify all 17 unit codes from spec are defined."""
|
||||
expected_codes = {
|
||||
"C62",
|
||||
"H87",
|
||||
"KGM",
|
||||
"GRM",
|
||||
"TNE",
|
||||
"MTR",
|
||||
"KMT",
|
||||
"MTK",
|
||||
"LTR",
|
||||
"MLT",
|
||||
"DAY",
|
||||
"HUR",
|
||||
"MON",
|
||||
"ANN",
|
||||
"SET",
|
||||
"PCE",
|
||||
"EA",
|
||||
}
|
||||
assert set(UNECE_UNIT_CODES.keys()) == expected_codes
|
||||
|
||||
|
||||
class TestAmountsMatch:
|
||||
"""Tests for amounts_match function."""
|
||||
|
||||
def test_exact_match(self):
|
||||
"""Test that exact amounts match."""
|
||||
assert amounts_match(100.0, 100.0) is True
|
||||
assert amounts_match(123.45, 123.45) is True
|
||||
|
||||
def test_within_tolerance(self):
|
||||
"""Test amounts within default tolerance match."""
|
||||
assert amounts_match(100.00, 100.01) is True
|
||||
assert amounts_match(100.00, 99.99) is True
|
||||
assert amounts_match(123.45, 123.44) is True
|
||||
assert amounts_match(123.45, 123.46) is True
|
||||
|
||||
def test_at_tolerance_boundary(self):
|
||||
"""Test amounts exactly at tolerance boundary."""
|
||||
assert amounts_match(100.00, 100.01) is True # difference = 0.01
|
||||
assert amounts_match(100.01, 100.00) is True # difference = 0.01
|
||||
|
||||
def test_beyond_tolerance(self):
|
||||
"""Test amounts beyond tolerance do not match."""
|
||||
assert amounts_match(100.00, 100.02) is False
|
||||
assert amounts_match(100.00, 99.98) is False
|
||||
assert amounts_match(0.00, 0.02) is False
|
||||
|
||||
def test_with_custom_tolerance(self):
|
||||
"""Test with custom tolerance values."""
|
||||
assert amounts_match(100.0, 100.05, tolerance=0.05) is True
|
||||
assert amounts_match(100.0, 100.06, tolerance=0.05) is False
|
||||
|
||||
def test_negative_amounts(self):
|
||||
"""Test with negative amounts."""
|
||||
assert amounts_match(-100.00, -100.01) is True
|
||||
assert amounts_match(-100.00, -99.99) is True
|
||||
assert amounts_match(-100.00, -100.02) is False
|
||||
|
||||
def test_zero_amounts(self):
|
||||
"""Test with zero amounts."""
|
||||
assert amounts_match(0.00, 0.01) is True
|
||||
assert amounts_match(0.00, 0.00) is True
|
||||
|
||||
|
||||
class TestParseGermanNumber:
|
||||
"""Tests for parse_german_number function."""
|
||||
|
||||
def test_integer_without_separator(self):
|
||||
"""Test parsing integer without separators."""
|
||||
assert parse_german_number("123") == 123.0
|
||||
assert parse_german_number("0") == 0.0
|
||||
|
||||
def test_decimal_without_thousands(self):
|
||||
"""Test parsing decimal without thousands separator."""
|
||||
assert parse_german_number("123,45") == 123.45
|
||||
assert parse_german_number("0,99") == 0.99
|
||||
|
||||
def test_with_thousands_separator(self):
|
||||
"""Test parsing with German thousands separator (dot)."""
|
||||
assert parse_german_number("1.234,56") == 1234.56
|
||||
assert parse_german_number("12.345,67") == 12345.67
|
||||
assert parse_german_number("123.456,78") == 123456.78
|
||||
|
||||
def test_large_number(self):
|
||||
"""Test parsing large numbers with multiple thousands separators."""
|
||||
assert parse_german_number("1.234.567,89") == 1234567.89
|
||||
|
||||
def test_round_number(self):
|
||||
"""Test parsing round numbers with decimal zero."""
|
||||
assert parse_german_number("1.234,00") == 1234.0
|
||||
|
||||
def test_negative_number(self):
|
||||
"""Test parsing negative numbers."""
|
||||
assert parse_german_number("-1.234,56") == -1234.56
|
||||
assert parse_german_number("-123,45") == -123.45
|
||||
|
||||
|
||||
class TestParseGermanDate:
|
||||
"""Tests for parse_german_date function."""
|
||||
|
||||
def test_standard_date(self):
|
||||
"""Test parsing standard German date format."""
|
||||
assert parse_german_date("04.02.2025") == "2025-02-04"
|
||||
assert parse_german_date("01.01.2024") == "2024-01-01"
|
||||
assert parse_german_date("31.12.2023") == "2023-12-31"
|
||||
|
||||
def test_single_digit_day_or_month(self):
|
||||
"""Test with single digit day or month."""
|
||||
assert parse_german_date("4.2.2025") == "2025-02-04"
|
||||
assert parse_german_date("1.1.2024") == "2024-01-01"
|
||||
assert parse_german_date("4.12.2025") == "2025-12-04"
|
||||
assert parse_german_date("04.2.2025") == "2025-02-04"
|
||||
|
||||
def test_already_iso_format(self):
|
||||
"""Test that already ISO formatted dates are returned unchanged."""
|
||||
assert parse_german_date("2025-02-04") == "2025-02-04"
|
||||
assert parse_german_date("2024-12-31") == "2024-12-31"
|
||||
|
||||
def test_invalid_format(self):
|
||||
"""Test invalid date formats."""
|
||||
assert parse_german_date("invalid") == "invalid"
|
||||
assert parse_german_date("01/02/2025") == "01/02/2025"
|
||||
|
||||
def test_only_dots_not_triple(self):
|
||||
"""Test date with dots but not three parts."""
|
||||
assert parse_german_date("01.02") == "01.02"
|
||||
|
||||
|
||||
class TestRoundDecimal:
|
||||
"""Tests for round_decimal function."""
|
||||
|
||||
def test_default_two_places(self):
|
||||
"""Test rounding to default 2 decimal places."""
|
||||
assert round_decimal(123.456) == 123.46
|
||||
assert round_decimal(123.454) == 123.45
|
||||
assert round_decimal(123.455) == 123.46 # Standard rounding
|
||||
assert round_decimal(123.445) == 123.45 # Standard rounding
|
||||
|
||||
def test_custom_places(self):
|
||||
"""Test rounding to custom decimal places."""
|
||||
assert round_decimal(123.4567, 3) == 123.457
|
||||
assert round_decimal(123.4567, 0) == 123.0
|
||||
assert round_decimal(123.4567, 4) == 123.4567
|
||||
|
||||
def test_rounding_up(self):
|
||||
"""Test rounding up cases."""
|
||||
assert round_decimal(123.449, 2) == 123.45
|
||||
assert round_decimal(123.994, 2) == 123.99
|
||||
|
||||
def test_rounding_down(self):
|
||||
"""Test rounding down cases."""
|
||||
assert round_decimal(123.444, 2) == 123.44
|
||||
assert round_decimal(123.004, 2) == 123.0
|
||||
|
||||
def test_negative_numbers(self):
|
||||
"""Test rounding negative numbers."""
|
||||
assert round_decimal(-123.456) == -123.46
|
||||
assert round_decimal(-123.454) == -123.45
|
||||
|
||||
def test_zero(self):
|
||||
"""Test rounding zero."""
|
||||
assert round_decimal(0.0) == 0.0
|
||||
assert round_decimal(0.004) == 0.0
|
||||
Reference in New Issue
Block a user