// Copyright (C) 2024 Philipp Hofer // // Licensed under the EUPL, Version 1.2 or - as soon they will be approved by // the European Commission - subsequent versions of the EUPL (the "Licence"). // You may not use this work except in compliance with the Licence. // // You should have received a copy of the European Union Public License along // with this program. If not, you may obtain a copy of the Licence at: // // // Unless required by applicable law or agreed to in writing, software // distributed under the Licence is distributed on an "AS IS" basis, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the Licence for the specific language governing permissions and // limitations under the Licence. mod absatz; mod abschnitt; mod liste; mod table; use std::iter::Peekable; use abschnitt::Abschnitt; use log::trace; use roxmltree::{Children, Node}; use crate::{ law::{Content, LawBuilder}, misc::Error, paragraph::parser::absatz::Absatz, }; struct Expect<'a> { node: &'a Node<'a, 'a>, } impl<'a> From<&'a Node<'a, 'a>> for Expect<'a> { fn from(node: &'a Node<'a, 'a>) -> Self { Expect { node } } } impl<'a> Expect<'a> { fn tag(&self, value: &str) { if self.node.tag_name().name() != value { panic!( "Expected tag '{value}', got {} (tag: {}, content: {:?})", self.node.tag_name().name(), self.node.tag_name().name(), self.node.text(), ); } } } #[derive(Debug, PartialEq)] pub(crate) struct Risdok {} impl Risdok { pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool { assert!(n.tag_name().name() == "risdok"); let mut c = n.children(); Metadaten::parse(c.next().unwrap()); let nutzdaten = Nutzdaten::parse(c.next().unwrap(), builder); if !nutzdaten { return false; } Layoutdaten::parse(c.next().unwrap()); assert_eq!(c.next(), None); true } pub(crate) fn from_str(xml: &str, builder: &mut LawBuilder) -> Result { let doc = roxmltree::Document::parse(xml)?; let root = doc.root(); assert_eq!(root.children().count(), 1); Ok(Self::parse(root.children().next().unwrap(), builder)) } } #[derive(Debug, PartialEq)] pub(crate) struct Metadaten; impl Metadaten { pub(crate) fn parse(n: Node) -> Self { assert!(n.tag_name().name() == "metadaten"); assert_eq!(n.children().next(), None); Self {} } } #[derive(Debug, PartialEq)] pub(crate) struct Nutzdaten {} impl Nutzdaten { pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool { assert!(n.tag_name().name() == "nutzdaten"); let mut c = n.children(); let ret = Abschnitt::parse(c.next().unwrap(), builder); assert_eq!(c.next(), None); ret.cont } } #[derive(Debug, PartialEq, Clone)] pub(crate) struct Symbol { stellen: String, content: String, } impl Symbol { pub(crate) fn parse(n: Node) -> Self { assert!(n.tag_name().name() == "symbol"); assert_eq!(n.children().count(), 1); let stellen = n.attribute("stellen").unwrap().into(); let content = n.text().unwrap().into(); Self { stellen, content } } } #[derive(Debug, PartialEq, Clone)] pub(crate) struct Listelem { symbol: Symbol, text: String, } impl Listelem { pub(crate) fn parse(n: Node) -> Self { assert!(n.tag_name().name() == "listelem"); let mut c = n.children(); let symbol = Symbol::parse(c.next().unwrap()); let text = c.next().unwrap().text().unwrap().into(); trace!("Parsed Listelem with text='{text}'"); assert_eq!(c.next(), None); Self { symbol, text } } } #[derive(Debug, PartialEq, Clone)] pub(crate) struct Ziffernliste { ebene: usize, listelems: Vec, sublist: Option>, schlussteile: Vec, } impl Ziffernliste { pub(crate) fn test(n: &Node) -> bool { // strichliste -> ยง 194b FSG ["ziffernliste", "aufzaehlung", "literaliste", "strichliste"].contains(&n.tag_name().name()) } pub(crate) fn test_with_level(n: &Node, level: usize) -> bool { match n.attribute("ebene") { Some(ebene) => Self::test(n) && ebene == level.to_string(), None => false, } } pub(crate) fn parse(c: &mut Peekable) -> Self { let n = c.next().unwrap(); assert!(Self::test(&n)); let ebene = n.attribute("ebene").unwrap().parse::().unwrap(); let mut listelems = Vec::new(); for child in n.children() { listelems.push(Listelem::parse(child)); } // If next element is ebene + 1 -> part of this list let mut sublist: Option> = None; while let Some(child) = c.peek() { if Ziffernliste::test_with_level(child, ebene + 1) { sublist = Some(Box::new(Ziffernliste::parse(c))); } else { break; } } let mut schlussteile = Vec::new(); while let Some(child) = c.peek() { if Schlussteil::test_with_ebene(child, ebene) { schlussteile.push(Schlussteil::parse(c.next().unwrap())); } else { break; } } Self { ebene, listelems, sublist, schlussteile, } } pub(crate) fn get_content(&self) -> Content { let mut elems = Vec::new(); for elem in &self.listelems { elems.push(Content::Text(format!( "{} {}", elem.symbol.content, elem.text ))); } if let Some(sublist) = &self.sublist { let sublist = *sublist.clone(); elems.push(sublist.get_content()); } for schlussteil in &self.schlussteile { elems.push(Content::Text(schlussteil.content.clone())); } if self.schlussteile.is_empty() { Content::List(elems) } else { Content::List(vec![Content::Multi(elems)]) } } } #[derive(Debug, PartialEq)] pub(crate) struct Td { absatz: Absatz, } impl Td { pub(crate) fn parse(n: &Node) -> Self { assert!(n.tag_name().name() == "td"); let mut c = n.children(); let absatz = Absatz::parse(c.next().unwrap()); assert_eq!(c.next(), None); Self { absatz } } } #[derive(Debug, PartialEq)] pub(crate) struct Tr { tds: Vec, } impl Tr { pub(crate) fn parse(n: &Node) -> Self { assert!(n.tag_name().name() == "tr"); let mut tds = Vec::new(); for child in n.children() { tds.push(Td::parse(&child)); } Self { tds } } } #[derive(Debug, PartialEq, Clone)] pub(crate) struct Schlussteil { pub(crate) content: String, } impl Schlussteil { pub(crate) fn test(n: &Node) -> bool { (n.tag_name().name() == "schlussteil" || n.tag_name().name() == "schluss") && n.children().count() == 1 } pub(crate) fn test_with_ebene(n: &Node, level: usize) -> bool { match n.attribute("ebene") { Some(ebene) => Self::test(n) && ebene == level.to_string(), None => false, } } pub(crate) fn parse(n: Node) -> Self { assert!(Self::test(&n)); let content = n.children().next().unwrap().text().unwrap().into(); //not sure Self { content } } } #[derive(Debug, PartialEq)] pub(crate) struct AbsatzAbs { gldsym: Option, content: String, } impl AbsatzAbs { pub(crate) fn test(n: &Node) -> bool { n.tag_name().name() == "absatz" && n.attribute("typ").unwrap() == "abs" } pub(crate) fn parse(n: Node) -> Self { assert!(Self::test(&n)); let mut c = n.children().peekable(); let gldsym = match c.peek() { Some(child) => { if Leaf::test(child, "gldsym") { Some(Leaf::parse(c.next().unwrap(), "gldsym")) } else { None } } None => None, }; let ret = Self { gldsym, content: c.next().unwrap().text().unwrap().trim().into(), }; assert_eq!(c.next(), None); ret } } #[derive(Debug, PartialEq)] pub(crate) struct Leaf { content: String, } impl Leaf { pub(crate) fn test(n: &Node, name: &str) -> bool { n.tag_name().name() == name && n.children().count() == 1 } pub(crate) fn parse(n: Node, name: &str) -> String { assert!(n.tag_name().name() == name); assert_eq!(n.children().count(), 1); n.text().unwrap().into() } } #[derive(Debug, PartialEq)] pub(crate) struct Ueberschrift { typ: String, content: String, } impl Ueberschrift { fn test(n: &Node, typ: &str) -> bool { n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == typ } pub(crate) fn parse(n: Node, typ: &str) -> Self { assert!(n.tag_name().name() == "ueberschrift"); assert_eq!(n.attribute("typ").unwrap(), typ); Self { content: n.text().unwrap().into(), typ: typ.into(), } } } #[derive(Debug, PartialEq)] pub(crate) struct Kzinhalt; impl Kzinhalt { pub(crate) fn parse(n: Node) -> Self { assert!(n.tag_name().name() == "kzinhalt"); //TODO parse if necessary Self {} } } #[derive(Debug, PartialEq)] pub(crate) struct Fzinhalt; impl Fzinhalt { pub(crate) fn parse(n: Node) -> Self { assert!(n.tag_name().name() == "fzinhalt"); //TODO parse if necessary Self {} } } #[derive(Debug, PartialEq)] pub(crate) struct Layoutdaten; impl Layoutdaten { pub(crate) fn parse(n: Node) -> Self { assert!(n.tag_name().name() == "layoutdaten"); assert_eq!(n.children().next(), None); Self {} } }