use log::{error, info}; use roxmltree::Node; use crate::{ law::{Content, LawBuilder}, Error, }; #[derive(Debug, PartialEq)] pub(crate) struct Risdok {} impl Risdok { pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool { assert!(n.tag_name().name() == "risdok"); let mut c = n.children(); Metadaten::parse(c.next().unwrap()); let nutzdaten = Nutzdaten::parse(c.next().unwrap(), builder); if !nutzdaten { return false; } Layoutdaten::parse(c.next().unwrap()); assert_eq!(c.next(), None); true } pub(crate) fn from_str(xml: &str, builder: &mut LawBuilder) -> Result { let doc = roxmltree::Document::parse(xml)?; let root = doc.root(); assert_eq!(root.children().count(), 1); Ok(Self::parse(root.children().next().unwrap(), builder)) } } #[derive(Debug, PartialEq)] pub(crate) struct Metadaten; impl Metadaten { pub(crate) fn parse(n: Node) -> Self { assert!(n.tag_name().name() == "metadaten"); assert_eq!(n.children().next(), None); Self {} } } #[derive(Debug, PartialEq)] pub(crate) struct Nutzdaten {} impl Nutzdaten { pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool { assert!(n.tag_name().name() == "nutzdaten"); let mut c = n.children(); let ret = Abschnitt::parse(c.next().unwrap(), builder); assert_eq!(c.next(), None); ret } } #[derive(Debug, PartialEq)] pub(crate) struct Abschnitt; impl Abschnitt { pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool { assert!(n.tag_name().name() == "abschnitt"); let mut c = n.children().peekable(); Kzinhalt::parse(c.next().unwrap()); Kzinhalt::parse(c.next().unwrap()); Fzinhalt::parse(c.next().unwrap()); Fzinhalt::parse(c.next().unwrap()); // Skip all UeberschriftTitle and Absatz while let Some(child) = c.peek() { if Ueberschrift::test(child, "titel") { c.next(); continue; } if Absatz::test_with_typ(child, "erltext") { c.next(); continue; } break; } while let Some(child) = c.peek() { if Ueberschrift::test(child, "g1") { let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1"); if ueberschrift.content.trim().starts_with("Artikel") { return false; } builder.new_header(&ueberschrift.content); } else if Ueberschrift::test(child, "g2") { let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g2"); builder.new_desc(&ueberschrift.content); } else if Ueberschrift::test(child, "g1min") { let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1min"); builder.new_header(&ueberschrift.content); } else { break; } } if let Some(child) = c.peek() { if Ueberschrift::test(child, "para") { builder .new_next_para_header(&Ueberschrift::parse(c.next().unwrap(), "para").content); } } // TODO: Continue here: We want to create a `Section`. // // We have 2 tasks // 1) Get paragraph id // 2) Get content let mut absatze = Vec::new(); let absatz = AbsatzAbs::parse(c.next().expect("We need at least one 'Absatz'")); let par_id = absatz .gldsym .clone() .expect("First 'Absatz' needs to have § id"); // If there's a "liste" after an "absatz", the "liste" should be part of the "absatz" if let Some(child) = c.peek() { if Liste::test(child) { let liste = Liste::parse(c.next().unwrap()); absatze.push(Content::List(vec![ Content::Text(absatz.content.replace('\u{a0}', " ")), liste.get_content(), ])); } else if Table::test(child) { // If there's a "table" after an "absatz", the "table" should be part of the "absatz" let table = Table::parse(c.next().unwrap()); if let Some(child) = c.peek() { if Absatz::test_with_typ(child, "erltext") { let after_absatz = Absatz::parse(c.next().unwrap()); absatze.push(Content::List(vec![ Content::Text(absatz.content.replace('\u{a0}', " ")), Content::List(table.get_list()), Content::Text(after_absatz.content), ])) } else { absatze.push(Content::List(vec![ Content::Text(absatz.content.replace('\u{a0}', " ")), Content::List(table.get_list()), ])); } } } else { absatze.push(Content::Text(absatz.content.replace('\u{a0}', " ").clone())); } } else { absatze.push(Content::Text(absatz.content.replace('\u{a0}', " ").clone())); } //There can be as many 'Absätze' as our lovely lawsetter wants while let Some(child) = c.peek() { if AbsatzAbs::test(child) { let abs = AbsatzAbs::parse(c.next().unwrap()); // If there's a "liste" after an "absatz", the "liste" should be part of the "absatz" if let Some(child) = c.peek() { if Liste::test(child) { let liste = Liste::parse(c.next().unwrap()); absatze.push(Content::List(vec![ Content::Text(abs.content.replace('\u{a0}', " ")), liste.get_content(), ])); } else { absatze.push(Content::Text(abs.content.replace('\u{a0}', " "))); } } else { absatze.push(Content::Text(abs.content.replace('\u{a0}', " "))); } continue; } break; } if absatze.len() == 1 { builder.new_par(par_id, absatze[0].clone()); } else { let mut contents = Vec::new(); for a in &absatze { contents.push(a.clone()); } builder.new_par(par_id, Content::Item(contents)); } // Skip all UeberschriftTitle and Absatz while let Some(child) = c.peek() { if Ueberschrift::test(child, "titel") { c.next(); continue; } if Absatz::test(child) { c.next(); continue; } break; } assert_eq!(c.next(), None); true } } #[derive(Debug, PartialEq, Clone)] pub(crate) struct Symbol { stellen: String, content: String, } impl Symbol { pub(crate) fn parse(n: Node) -> Self { assert!(n.tag_name().name() == "symbol"); assert_eq!(n.children().count(), 1); let stellen = n.attribute("stellen").unwrap().into(); let content = n.text().unwrap().into(); Self { stellen, content } } } #[derive(Debug, PartialEq, Clone)] pub(crate) struct Listelem { symbol: Symbol, text: String, } impl Listelem { pub(crate) fn parse(n: Node) -> Self { assert!(n.tag_name().name() == "listelem"); let mut c = n.children(); let symbol = Symbol::parse(c.next().unwrap()); let text = c.next().unwrap().text().unwrap().into(); assert_eq!(c.next(), None); Self { symbol, text } } } #[derive(Debug, PartialEq, Clone)] pub(crate) struct Ziffernliste { ebene: String, listelems: Vec, } impl Ziffernliste { pub(crate) fn test(n: &Node) -> bool { ["ziffernliste", "aufzaehlung", "literaliste"].contains(&n.tag_name().name()) } pub(crate) fn parse(n: Node) -> Self { assert!(Self::test(&n)); let ebene = n.attribute("ebene").unwrap().into(); let mut listelems = Vec::new(); for child in n.children() { listelems.push(Listelem::parse(child)); } Self { ebene, listelems } } pub(crate) fn get_content(&self) -> Content { let mut elems = Vec::new(); for elem in &self.listelems { elems.push(Content::Text( format!("{} {}", elem.symbol.content, elem.text).replace('\u{a0}', " "), )); } Content::List(elems) } } #[derive(Debug, PartialEq)] pub(crate) struct Td { absatz: Absatz, } impl Td { pub(crate) fn parse(n: &Node) -> Self { assert!(n.tag_name().name() == "td"); let mut c = n.children(); let absatz = Absatz::parse(c.next().unwrap()); assert_eq!(c.next(), None); Self { absatz } } } #[derive(Debug, PartialEq)] pub(crate) struct Tr { tds: Vec, } impl Tr { pub(crate) fn parse(n: &Node) -> Self { assert!(n.tag_name().name() == "tr"); let mut tds = Vec::new(); for child in n.children() { tds.push(Td::parse(&child)); } Self { tds } } } #[derive(Debug, PartialEq)] pub(crate) struct Table { trs: Vec, } impl Table { pub(crate) fn test(n: &Node) -> bool { n.tag_name().name() == "table" } pub(crate) fn parse(n: Node) -> Self { assert!(Self::test(&n)); let mut trs = Vec::new(); for child in n.children() { trs.push(Tr::parse(&child)); } Self { trs } } pub(crate) fn get_list(&self) -> Vec { let mut ret = Vec::new(); for tr in &self.trs { let mut txt = String::new(); for td in &tr.tds { txt.push_str(&format!("{} ", td.absatz.content)); } ret.push(Content::Text(format!("- {txt}",).replace('\u{a0}', " "))); } ret } } #[derive(Debug, PartialEq)] pub(crate) struct Schlussteil { content: String, } impl Schlussteil { pub(crate) fn test(n: &Node) -> bool { (n.tag_name().name() == "schlussteil" || n.tag_name().name() == "schluss") && n.children().count() == 1 } pub(crate) fn parse(n: Node) -> Self { assert!(Self::test(&n)); let content = n.children().next().unwrap().text().unwrap().into(); //not sure Self { content } } } #[derive(Debug)] pub(crate) struct Liste { content: Vec, } impl Liste { pub(crate) fn test(n: &Node) -> bool { n.tag_name().name() == "liste" } pub(crate) fn parse(n: Node) -> Self { assert!(Self::test(&n)); let mut content = Vec::new(); let mut c = n.children().peekable(); content.push(Ziffernliste::parse(c.next().unwrap()).get_content()); while let Some(child) = c.peek() { if Ziffernliste::test(child) { content.push(Ziffernliste::parse(c.next().unwrap()).get_content()); } else if Schlussteil::test(child) { content.push(Content::Text( Schlussteil::parse(c.next().unwrap()) .content .replace('\u{a0}', " "), )); } else { break; } } assert_eq!(c.next(), None); Self { content } } pub(crate) fn get_content(&self) -> Content { Content::List(self.content.clone()) } } #[derive(Debug, PartialEq)] pub(crate) struct AbsatzAbs { gldsym: Option, content: String, } impl AbsatzAbs { pub(crate) fn test(n: &Node) -> bool { n.tag_name().name() == "absatz" && n.attribute("typ").unwrap() == "abs" } pub(crate) fn parse(n: Node) -> Self { assert!(n.tag_name().name() == "absatz"); assert_eq!(n.attribute("typ").unwrap(), "abs"); let mut c = n.children().peekable(); let gldsym = match c.peek() { Some(child) => { if Leaf::test(child, "gldsym".into()) { Some(Leaf::parse(c.next().unwrap(), "gldsym".into()).replace('\u{a0}', " ")) } else { None } } None => None, }; let ret = Self { gldsym, content: c.next().unwrap().text().unwrap().trim().into(), }; assert_eq!(c.next(), None); ret } } #[derive(Debug, PartialEq)] pub(crate) struct Leaf { content: String, } impl Leaf { pub(crate) fn test(n: &Node, name: String) -> bool { n.tag_name().name() == name && n.children().count() == 1 } pub(crate) fn parse(n: Node, name: String) -> String { assert!(n.tag_name().name() == name); assert_eq!(n.children().count(), 1); n.text().unwrap().into() } } #[derive(Debug, PartialEq)] pub(crate) struct Absatz { content: String, } impl Absatz { pub(crate) fn test(n: &Node) -> bool { n.tag_name().name() == "absatz" } pub(crate) fn test_with_typ(n: &Node, typ: &str) -> bool { n.tag_name().name() == "absatz" && n.attribute("typ") == Some(typ) } pub(crate) fn parse(n: Node) -> Self { assert!(n.tag_name().name() == "absatz"); if let Some(text) = n.text() { Self { content: text.into(), } } else { Self { content: "".into() } } } } #[derive(Debug, PartialEq)] pub(crate) struct Ueberschrift { typ: String, content: String, } impl Ueberschrift { fn test(n: &Node, typ: &str) -> bool { n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == typ } pub(crate) fn parse(n: Node, typ: &str) -> Self { assert!(n.tag_name().name() == "ueberschrift"); assert_eq!(n.attribute("typ").unwrap(), typ); Self { content: n.text().unwrap().into(), typ: typ.into(), } } } #[derive(Debug, PartialEq)] pub(crate) struct Kzinhalt; impl Kzinhalt { pub(crate) fn parse(n: Node) -> Self { assert!(n.tag_name().name() == "kzinhalt"); //TODO parse if necessary Self {} } } #[derive(Debug, PartialEq)] pub(crate) struct Fzinhalt; impl Fzinhalt { pub(crate) fn parse(n: Node) -> Self { assert!(n.tag_name().name() == "fzinhalt"); //TODO parse if necessary Self {} } } #[derive(Debug, PartialEq)] pub(crate) struct Layoutdaten; impl Layoutdaten { pub(crate) fn parse(n: Node) -> Self { assert!(n.tag_name().name() == "layoutdaten"); assert_eq!(n.children().next(), None); Self {} } }