Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/fundamend/reader/ahbreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
_is_segment_group,
_is_uebertragungsdatei,
)
from fundamend.utils import lstrip, remove_linebreaks_and_hyphens, strip
from fundamend.utils import lstrip, remove_linebreaks_and_hyphens, remove_unnecessary_hyphens, strip

# pylint:disable=duplicate-code
# yes, it's very similar to the MigReader
Expand Down Expand Up @@ -252,7 +252,9 @@ def _read_anwendungsfall(self, original_element: ET.Element) -> Anwendungsfall:
format_element = next((child for child in original_element[0] if child.tag.startswith("M_")))
return Anwendungsfall(
pruefidentifikator=original_element.attrib["Pruefidentifikator"],
beschreibung=remove_linebreaks_and_hyphens(original_element.attrib["Beschreibung"]),
beschreibung=remove_unnecessary_hyphens(
remove_linebreaks_and_hyphens(original_element.attrib["Beschreibung"])
),
kommunikation_von=original_element.attrib["Kommunikation_von"].strip(),
format=EdifactFormat(lstrip("M_", format_element.tag)),
elements=tuple(segments_and_groups),
Expand Down
30 changes: 28 additions & 2 deletions src/fundamend/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
"""

import re
from typing import Optional
from typing import Optional, overload

from fundamend.models.kommunikationsrichtung import Kommunikationsrichtung

_unnecessary_hyphen_pattern = re.compile(r"(?<=[a-zäüöß])-(?=[a-zäüöß])")
"""if before AND after a hyphen there are only lower case letters, then we can probably remove it"""


def lstrip(prefix: str, text: str) -> str:
"""Strip the given prefix from the given text. If the text does not start with the prefix, return the text as is.
Expand Down Expand Up @@ -143,4 +146,27 @@ def parse_kommunikation_von(kommunikation_von: Optional[str]) -> list[Kommunikat
return result


__all__ = ["lstrip", "rstrip", "strip", "parse_kommunikation_von", "remove_linebreaks_and_hyphens"]
@overload
def remove_unnecessary_hyphens(candidate: str) -> str: ...
@overload
def remove_unnecessary_hyphens(candidate: None) -> None: ...
def remove_unnecessary_hyphens(candidate: Optional[str]) -> Optional[str]:
"""
removes hyphens from the middle of words that are likely unnecessary
Example: "Ausfallarbeits-summenzeitreihe" returns "Ausfallarbeitssummenzeitreihe"
or "Bestäti-gung" returns "Bestätigung". But "Sperr-/Entsperrauftrag" stays untouched.
Handles multiple occurrences: "Bestäti-gung der Stornier-ung" returns "Bestätigung der Stornierung".
"""
if candidate is None:
return None
return _unnecessary_hyphen_pattern.sub("", candidate)


__all__ = [
"lstrip",
"rstrip",
"strip",
"parse_kommunikation_von",
"remove_linebreaks_and_hyphens",
"remove_unnecessary_hyphens",
]
Loading