fallback in extractor if URN not valid
This commit is contained in:
@@ -402,6 +402,23 @@ def get_pdf_page_count(pdf_bytes: bytes) -> int:
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _profile_from_urn(urn: str) -> str:
|
||||||
|
"""Extract a short profile name from a Factur-X/ZUGFeRD URN.
|
||||||
|
|
||||||
|
Falls back to the last segment of the URN after '#', or 'unknown'.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
urn: The full profile URN (e.g.
|
||||||
|
'urn:cen.eu:en16931:2017#compliant#urn:xeinkauf.de:kosit:xrechnung_3.0')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Short profile name (e.g. 'xrechnung_3.0')
|
||||||
|
"""
|
||||||
|
if not urn:
|
||||||
|
return "unknown"
|
||||||
|
return urn.rsplit("#", maxsplit=1)[-1].rsplit(":", maxsplit=1)[-1]
|
||||||
|
|
||||||
|
|
||||||
def extract_zugferd(pdf_bytes: bytes) -> ExtractResponse:
|
def extract_zugferd(pdf_bytes: bytes) -> ExtractResponse:
|
||||||
"""Extract ZUGFeRD data from PDF bytes.
|
"""Extract ZUGFeRD data from PDF bytes.
|
||||||
|
|
||||||
@@ -460,7 +477,17 @@ def extract_zugferd(pdf_bytes: bytes) -> ExtractResponse:
|
|||||||
|
|
||||||
xml_root = etree.fromstring(xml_bytes)
|
xml_root = etree.fromstring(xml_bytes)
|
||||||
flavor = get_flavor(xml_root)
|
flavor = get_flavor(xml_root)
|
||||||
level = get_level(xml_root, flavor)
|
|
||||||
|
try:
|
||||||
|
level = get_level(xml_root, flavor)
|
||||||
|
except ValueError:
|
||||||
|
urn_nodes = xml_root.xpath(
|
||||||
|
"//rsm:ExchangedDocumentContext/"
|
||||||
|
"rsm:GuidelineSpecifiedDocumentContextParameter/ram:ID/text()",
|
||||||
|
namespaces=NAMESPACES,
|
||||||
|
)
|
||||||
|
urn = urn_nodes[0] if urn_nodes else ""
|
||||||
|
level = _profile_from_urn(urn)
|
||||||
|
|
||||||
xml_data = parse_zugferd_xml(xml_bytes)
|
xml_data = parse_zugferd_xml(xml_bytes)
|
||||||
pdf_text = extract_text_from_pdf(pdf_bytes)
|
pdf_text = extract_text_from_pdf(pdf_bytes)
|
||||||
|
|||||||
Reference in New Issue
Block a user