fallback in extractor if URN not valid
This commit is contained in:
@@ -402,6 +402,23 @@ def get_pdf_page_count(pdf_bytes: bytes) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
def _profile_from_urn(urn: str) -> str:
|
||||
"""Extract a short profile name from a Factur-X/ZUGFeRD URN.
|
||||
|
||||
Falls back to the last segment of the URN after '#', or 'unknown'.
|
||||
|
||||
Args:
|
||||
urn: The full profile URN (e.g.
|
||||
'urn:cen.eu:en16931:2017#compliant#urn:xeinkauf.de:kosit:xrechnung_3.0')
|
||||
|
||||
Returns:
|
||||
Short profile name (e.g. 'xrechnung_3.0')
|
||||
"""
|
||||
if not urn:
|
||||
return "unknown"
|
||||
return urn.rsplit("#", maxsplit=1)[-1].rsplit(":", maxsplit=1)[-1]
|
||||
|
||||
|
||||
def extract_zugferd(pdf_bytes: bytes) -> ExtractResponse:
|
||||
"""Extract ZUGFeRD data from PDF bytes.
|
||||
|
||||
@@ -460,7 +477,17 @@ def extract_zugferd(pdf_bytes: bytes) -> ExtractResponse:
|
||||
|
||||
xml_root = etree.fromstring(xml_bytes)
|
||||
flavor = get_flavor(xml_root)
|
||||
level = get_level(xml_root, flavor)
|
||||
|
||||
try:
|
||||
level = get_level(xml_root, flavor)
|
||||
except ValueError:
|
||||
urn_nodes = xml_root.xpath(
|
||||
"//rsm:ExchangedDocumentContext/"
|
||||
"rsm:GuidelineSpecifiedDocumentContextParameter/ram:ID/text()",
|
||||
namespaces=NAMESPACES,
|
||||
)
|
||||
urn = urn_nodes[0] if urn_nodes else ""
|
||||
level = _profile_from_urn(urn)
|
||||
|
||||
xml_data = parse_zugferd_xml(xml_bytes)
|
||||
pdf_text = extract_text_from_pdf(pdf_bytes)
|
||||
|
||||
Reference in New Issue
Block a user