diff --git a/src/paragraph/parser/abschnitt.rs b/src/paragraph/parser/abschnitt.rs index 24068fa..d5bb4f7 100644 --- a/src/paragraph/parser/abschnitt.rs +++ b/src/paragraph/parser/abschnitt.rs @@ -100,7 +100,7 @@ impl Abschnitt { // data. fn handle_metadata(&mut self, c: &mut Peekable, builder: &mut LawBuilder) { while c.peek().is_some() { - let key = Ueberschrift::parse(c.next().unwrap(), "titel").content; + let key = Ueberschrift::parse(c, "titel").0.content; //TODO: fix .0. // We are done with meta-data parsing if key == "Text" { @@ -187,8 +187,8 @@ impl Abschnitt { // while (not if) because we can have two subsequent paraheaders (e.g. ยง 405 abgb) while let Some(child) = c.peek() { if Ueberschrift::test(child, "para") { - builder - .new_next_para_header(&Ueberschrift::parse(c.next().unwrap(), "para").content); + builder.new_next_para_header(&Ueberschrift::parse(c, "para").0.content); // TODO: + // fix .0. continue; } break; diff --git a/src/paragraph/parser/mod.rs b/src/paragraph/parser/mod.rs index 13b5897..680c5d4 100644 --- a/src/paragraph/parser/mod.rs +++ b/src/paragraph/parser/mod.rs @@ -360,10 +360,102 @@ impl Ueberschrift { assert_eq!(n.attribute("typ").unwrap(), typ); - Self { + let mut ret = Self { content: n.text().unwrap().into(), typ: typ.into(), + }; + + let mut next_para_header = None; + + if ret.content != "Text" { + if let Some(child) = c.peek() { + if Ueberschrift::test(child, "art") { + let (next_para, sub) = Ueberschrift::parse(c, "art"); + if sub.is_some() { + panic!("That should not happen"); + } + + next_para_header = Some(next_para); + } else if typ == "g1" && ret.content.starts_with("Artikel ") { + // wrongly tagged + // artikel + + // TODO: remove code duplication from further down + let mut clone = c.clone(); + if let Some(first_child) = clone.next() { + if let Some(second_child) = clone.next() { + if Ueberschrift::test(&first_child, "para") + && Ueberschrift::test(&second_child, "para") + { + let (first_para, empty) = Ueberschrift::parse(c, "para"); + if empty.is_some() { + panic!("That should not happen"); + } + let (second_para, empty) = Ueberschrift::parse(c, "para"); + if empty.is_some() { + panic!("That should not happen"); + } + next_para_header = Some(Ueberschrift { + typ: first_para.typ, + content: format!( + "{} - {}", + first_para.content, second_para.content + ), + }); + } + } + } + } else if typ == "art" && !ret.content.starts_with("Artikel") { + ret.content = format!("Artikel {}", ret.content); + } + } } + + debug!("Parsed Ueberschrift {ret:#?}"); + + (ret, next_para_header) + } + + fn parse_art(n: &mut Peekable) -> (Self, Option) { + let (mut art, empty) = Self::parse(n, "art"); + + if empty.is_some() { + panic!("That should not happen"); + } + let mut next_para_header = None; + + let mut clone = n.clone(); + if let Some(first_child) = clone.next() { + if let Some(second_child) = clone.next() { + if Ueberschrift::test(&first_child, "para") + && Ueberschrift::test(&second_child, "para") + { + let (first_para, empty) = Ueberschrift::parse(n, "para"); + if empty.is_some() { + panic!("That should not happen"); + } + let (second_para, empty) = Ueberschrift::parse(n, "para"); + if empty.is_some() { + panic!("That should not happen"); + } + next_para_header = Some(Ueberschrift { + typ: first_para.typ, + content: format!("{} - {}", first_para.content, second_para.content), + }); + } else if Ueberschrift::test(&first_child, "para") { + let (first_para, empty) = Ueberschrift::parse(n, "para"); + if empty.is_some() { + panic!("That should not happen"); + } + next_para_header = Some(Ueberschrift { + typ: first_para.typ, + content: format!("{}", first_para.content), + }); + } + } + } + + (art, next_para_header) } pub(crate) fn parse_full(c: &mut Peekable, typ: &str) -> (Self, Option) { let n = c.next().unwrap();