risp/data/configs/euv.toml

61 lines
1.3 KiB
TOML
Raw Normal View History

2024-05-26 13:43:36 +02:00
[law]
id = 10008048
name = "EUV"
par_sign = "ARTIKEL"
[[law.classifiers]]
name = "Titel"
is_root = true
match_function = "contains"
[[law.classifiers]]
name = "Kapitel"
is_root = false
match_function = "contains"
[[law.classifiers]]
name = "Abschnitt"
is_root = false
match_function = "contains"
[parser]
remove_strings = [
"<tab />",
"<b>", "</b>",
"<i>", "</i>",
"<n>", "</n>",
"<super>", "</super>",
"""<abstand ct="text" halign="l" />"""
]
move_para_headers_into_content = true
[[parser.replace_rules]]
find = "<gdash />"
replace_with = "-"
[[parser.replace_rules]]
find = "Artikel 7"
replace_with = "ARTIKEL 7"
[[parser.replace_rules]]
find = "Artikel 38"
replace_with = "ARTIKEL 38"
[[parser.replace_rules]]
find = "Artikel 42"
replace_with = "ARTIKEL 42"
[[parser.replace_rules]]
find = "+ARTIKEL 48"
replace_with = "ARTIKEL 48"
2024-05-26 17:11:13 +02:00
# Replace all 'erltext's which have subsections...
2024-05-26 16:41:53 +02:00
[[parser.replace_rules]]
2024-05-26 17:02:36 +02:00
find = """<absatz typ="erltext" ct="text" halign="j">("""
replace_with = """<absatz typ="abs" ct="text" halign="j">("""
2024-05-26 16:41:53 +02:00
2024-05-26 17:11:13 +02:00
# ... and replace all 'erltext's which have only one section
[[parser.replace_rules]]
find = """/ueberschrift><absatz typ="erltext" ct="text" halign="j">"""
replace_with = """/ueberschrift><absatz typ="abs" ct="text" halign="j">"""