add hacks for abgb
This commit is contained in:
38
src/law.rs
38
src/law.rs
@ -108,6 +108,23 @@ pub(crate) fn contains(classifier_name: &str, instance_name: &str) -> bool {
|
||||
.contains(&classifier_name.to_lowercase())
|
||||
}
|
||||
|
||||
fn starts_with_roman_number(_: &str, s: &str) -> bool {
|
||||
// Define the prefixes for Roman numerals.
|
||||
let roman_prefixes = [
|
||||
"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII", "XIII", "XIV",
|
||||
"XV", "XVI", "XVII", "XVIII", "XIX", "XX",
|
||||
];
|
||||
|
||||
// Check if the string starts with one of the Roman numeral prefixes followed by a period.
|
||||
roman_prefixes
|
||||
.iter()
|
||||
.any(|&prefix| s.starts_with(&(prefix.to_string() + ".")))
|
||||
}
|
||||
|
||||
fn contains_at_start(_classifier_name: &str, instance_name: &str) -> bool {
|
||||
!instance_name.is_empty() && instance_name.starts_with('@')
|
||||
}
|
||||
|
||||
fn starts_with_number(_classifier_name: &str, instance_name: &str) -> bool {
|
||||
matches!(instance_name.trim().as_bytes().first(), Some(c) if c.is_ascii_digit())
|
||||
}
|
||||
@ -195,6 +212,20 @@ impl LawBuilder {
|
||||
|
||||
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)).root());
|
||||
classifiers.push(Classifier::new("Number", Arc::new(&starts_with_number)));
|
||||
} else if name == "ABGB" {
|
||||
law_id = Some(10001622);
|
||||
|
||||
classifiers.push(Classifier::new("Einleitung", Arc::new(&contains)).root());
|
||||
classifiers.push(Classifier::new("Theil", Arc::new(&contains)).root());
|
||||
|
||||
classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)));
|
||||
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
|
||||
classifiers.push(Classifier::new("Abtheilung", Arc::new(&contains)));
|
||||
|
||||
classifiers.push(Classifier::new("heading", Arc::new(&contains_at_start)));
|
||||
classifiers.push(Classifier::new("letter", Arc::new(&starts_with_letter)));
|
||||
classifiers.push(Classifier::new("num", Arc::new(&starts_with_number)));
|
||||
classifiers.push(Classifier::new("rom", Arc::new(&starts_with_roman_number)));
|
||||
}
|
||||
|
||||
let mut builder = Self {
|
||||
@ -209,7 +240,7 @@ impl LawBuilder {
|
||||
|
||||
let paragraphs = overview::parse(law_id.unwrap()).unwrap();
|
||||
|
||||
for paragraph in paragraphs {
|
||||
for paragraph in tqdm::tqdm(paragraphs.into_iter()) {
|
||||
let cont = par::parse(¶graph, &mut builder).unwrap();
|
||||
if !cont {
|
||||
break;
|
||||
@ -332,6 +363,11 @@ impl LawBuilder {
|
||||
pub(crate) fn new_next_para_header(&mut self, header: &str) {
|
||||
#[cfg(test)]
|
||||
self.history.push(format!("New_new_para_header: {header}"));
|
||||
|
||||
if let Some(next_para_header) = &self.next_para_header {
|
||||
self.new_header(&next_para_header.clone()); // promote to bigger header :-)
|
||||
}
|
||||
|
||||
debug!("new_next_para_header={header}");
|
||||
self.next_para_header = Some(header.trim().into());
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ impl From<roxmltree::Error> for Error {
|
||||
|
||||
fn main() {
|
||||
env_logger::init();
|
||||
let law = LawBuilder::new("MSchG");
|
||||
let law = LawBuilder::new("ABGB");
|
||||
|
||||
law.to_md();
|
||||
}
|
||||
|
@ -28,11 +28,50 @@ pub(crate) fn parse_from_str(xml: &str, builder: &mut LawBuilder) -> Result<bool
|
||||
r#"<ueberschrift typ="g1" ct="text" halign="c">1. Verwertungsrechte.</ueberschrift>"#,
|
||||
); // 1. Verwertungsrechte. before § 14
|
||||
|
||||
let xml = xml.replace(
|
||||
r#"<ueberschrift typ="g1" ct="text" halign="c">Medizinische Behandlung</ueberschrift>"#,
|
||||
r#"<ueberschrift typ="art" ct="text" halign="c">Medizinische Behandlung</ueberschrift>"#,
|
||||
); // 1. Verwertungsrechte. before § 14
|
||||
|
||||
let xml = xml.replace(
|
||||
r#"<ueberschrift typ="para" ct="text" halign="c">4b. Presseveröffentlichungen.</ueberschrift>"#,
|
||||
r#"<ueberschrift typ="g1" ct="text" halign="c">4b. Presseveröffentlichungen.</ueberschrift>"#,
|
||||
); // § 99d UrhG, Titel kein Para.... //TODO: not working
|
||||
|
||||
let xml = xml.replace(
|
||||
r#"<ueberschrift typ="para" ct="text" halign="c">Erfordernisse zur Ersitzung:</ueberschrift>"#,
|
||||
r#"<ueberschrift typ="art" ct="text" halign="c">Erfordernisse zur Ersitzung:</ueberschrift>"#,
|
||||
);
|
||||
|
||||
let xml = xml.replace(
|
||||
r#"<ueberschrift typ="art" ct="text" halign="c">"#,
|
||||
r#"<ueberschrift typ="g1" ct="text" halign="c">@"#,
|
||||
);
|
||||
|
||||
let xml = xml.replace(
|
||||
r#"<ueberschrift typ="g1" ct="text" halign="c">Zweyter Theil</ueberschrift><ueberschrift typ="g1" ct="text" halign="c">des</ueberschrift><ueberschrift typ="g1" ct="text" halign="c">bürgerlichen Gesetzbuches.</ueberschrift><ueberschrift typ="g1" ct="text" halign="c">Von dem Sachenrechte.</ueberschrift>"#,
|
||||
r#"<ueberschrift typ="g1" ct="text" halign="c">Zweyter Theil des bürgerlichen Gesetzbuches. Von dem Sachenrechte.</ueberschrift>"#,
|
||||
);
|
||||
|
||||
let xml = xml.replace(
|
||||
r#"<ueberschrift typ="g1" ct="text" halign="c">Dritter Theil</ueberschrift><ueberschrift typ="g1min" ct="text" halign="c">des</ueberschrift><ueberschrift typ="g1min" ct="text" halign="c">bürgerlichen Gesetzbuches.</ueberschrift>"#,
|
||||
r#"<ueberschrift typ="g1" ct="text" halign="c">Dritter Theil des bürgerlichen Gesetzbuches.</ueberschrift>"#,
|
||||
);
|
||||
let xml = xml.replace(
|
||||
r#"<ueberschrift typ="g1" ct="text" halign="c">Von den gemeinschaftlichen Bestimmungen der Personen- und Sachenrechte.</ueberschrift>"#,
|
||||
r#"<ueberschrift typ="g2" ct="text" halign="c">Von den gemeinschaftlichen Bestimmungen der Personen- und Sachenrechte.</ueberschrift>"#,
|
||||
);
|
||||
|
||||
let xml = xml.replace(
|
||||
r#"<ueberschrift typ="g1" ct="text" halign="c">Erste Abtheilung</ueberschrift><ueberschrift typ="g2" ct="text" halign="c">des Sachenrechtes.</ueberschrift>"#,
|
||||
r#"<ueberschrift typ="g1" ct="text" halign="c">Erste Abtheilung des Sachenrechtes.</ueberschrift>"#,
|
||||
);
|
||||
|
||||
let xml = xml.replace(
|
||||
r#"<ueberschrift typ="g1min" ct="text" halign="c">Von den dinglichen Rechten.</ueberschrift>"#,
|
||||
r#"<ueberschrift typ="g2" ct="text" halign="c">Von den dinglichen Rechten.</ueberschrift>"#,
|
||||
);
|
||||
|
||||
let xml = xml.replace("<super>", ""); // e.g. § 23a in MSchG
|
||||
let xml = xml.replace("</super>", ""); // e.g. § 23a in MSchG
|
||||
let xml = xml.replace("<i>", ""); // § 69 in MSchG
|
||||
@ -44,6 +83,8 @@ pub(crate) fn parse_from_str(xml: &str, builder: &mut LawBuilder) -> Result<bool
|
||||
let xml = xml.replace("<b>", ""); // § 14 in FSG
|
||||
let xml = xml.replace("</b>", "");
|
||||
|
||||
let xml = xml.replace("<tab />", "");
|
||||
|
||||
let xml = xml.replace(
|
||||
r#"(Anm.: § 69 aufgehoben durch Art. 1 Z 12, BGBl. I Nr. 124/2017)"#,
|
||||
r#"<gldsym>§ 69.</gldsym>(Anm.: § 69 aufgehoben durch Art. 1 Z 12, BGBl. I Nr. 124/2017)"#,
|
||||
|
@ -106,6 +106,13 @@ impl Abschnitt {
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(child) = c.peek() {
|
||||
if Ueberschrift::test(child, "para") {
|
||||
builder
|
||||
.new_next_para_header(&Ueberschrift::parse(c.next().unwrap(), "para").content);
|
||||
}
|
||||
}
|
||||
// e.g. § 405 abgb has two para (of diseased paragraph)
|
||||
if let Some(child) = c.peek() {
|
||||
if Ueberschrift::test(child, "para") {
|
||||
builder
|
||||
|
Reference in New Issue
Block a user