From 04cc7d9183d5d736df8f9554020fb799372cf51d Mon Sep 17 00:00:00 2001 From: philipp Date: Fri, 16 Feb 2024 09:54:38 +0100 Subject: [PATCH] extract to table rs --- src/paragraph/parser/abschnitt.rs | 61 ++++++++----------------------- src/paragraph/parser/liste.rs | 3 +- src/paragraph/parser/mod.rs | 36 +----------------- src/paragraph/parser/table.rs | 60 ++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 83 deletions(-) create mode 100644 src/paragraph/parser/table.rs diff --git a/src/paragraph/parser/abschnitt.rs b/src/paragraph/parser/abschnitt.rs index 5b0b464..b02482b 100644 --- a/src/paragraph/parser/abschnitt.rs +++ b/src/paragraph/parser/abschnitt.rs @@ -5,9 +5,9 @@ use roxmltree::{Children, Node}; use crate::law::LawBuilder; use crate::paragraph::parser::liste::Liste; -use crate::paragraph::parser::{ - Absatz, AbsatzAbs, Content, Fzinhalt, Kzinhalt, Table, Ueberschrift, -}; +use crate::paragraph::parser::{Absatz, AbsatzAbs, Content, Fzinhalt, Kzinhalt, Ueberschrift}; + +use super::table::Table; #[derive(Debug, PartialEq, Default)] pub(crate) struct Abschnitt { @@ -198,58 +198,27 @@ impl Abschnitt { let absatz = AbsatzAbs::parse(c.next().unwrap()); let par_id = absatz.gldsym; + let mut content = Vec::new(); + content.push(Content::Text(absatz.content)); + // If there's a "liste" after an "absatz", the "liste" should be part of the "absatz" if let Some(child) = c.peek() { if Liste::test(child) { - println!("11@"); - ( - par_id, - Content::List(vec![ - Content::Text(absatz.content), - Liste::parse_real(c).get_content(), - ]), - ) + content.push(Liste::parse_full(c).get_content()) } else if Table::test(child) { // If there's a "table" after an "absatz", the "table" should be part of the "absatz" - let table = Table::parse(c.next().unwrap()); - if let Some(child) = c.peek() { - if Absatz::test_with_typ(child, "erltext") { - let after_absatz = Absatz::parse(c.next().unwrap()); - ( - par_id, - Content::List(vec![ - Content::Text(absatz.content), - Content::List(table.get_list()), - Content::Text(after_absatz.content), - ]), - ) - } else { - ( - par_id, - Content::List(vec![ - Content::Text(absatz.content), - Content::List(table.get_list()), - ]), - ) - } - } else { - (par_id, Content::Text("THIS SHOULD NOT HAPPEN".into())) - } + let table = Table::parse_full(c); + content.extend(table.iter().cloned()); } else if Absatz::test_with_typ(child, "satz") { - // After a 'liste' there can be a ') -> Self { + pub(crate) fn parse_full(n: &mut Peekable) -> Self { Expect::from(n.peek().unwrap()).tag("liste"); let mut content = Vec::new(); diff --git a/src/paragraph/parser/mod.rs b/src/paragraph/parser/mod.rs index 224acc3..a7869af 100644 --- a/src/paragraph/parser/mod.rs +++ b/src/paragraph/parser/mod.rs @@ -16,6 +16,7 @@ mod abschnitt; mod liste; +mod table; use abschnitt::Abschnitt; use roxmltree::Node; @@ -216,41 +217,6 @@ impl Tr { Self { tds } } } -#[derive(Debug, PartialEq)] -pub(crate) struct Table { - trs: Vec, -} -impl Table { - pub(crate) fn test(n: &Node) -> bool { - n.tag_name().name() == "table" - } - - pub(crate) fn parse(n: Node) -> Self { - assert!(Self::test(&n)); - let mut trs = Vec::new(); - - for child in n.children() { - trs.push(Tr::parse(&child)); - } - - Self { trs } - } - - pub(crate) fn get_list(&self) -> Vec { - let mut ret = Vec::new(); - - for tr in &self.trs { - let mut txt = String::new(); - for td in &tr.tds { - txt.push_str(&format!("{} ", td.absatz.content)); - } - - ret.push(Content::Text(format!("- {txt}",))); - } - - ret - } -} #[derive(Debug, PartialEq)] pub(crate) struct Schlussteil { diff --git a/src/paragraph/parser/table.rs b/src/paragraph/parser/table.rs new file mode 100644 index 0000000..236ca1a --- /dev/null +++ b/src/paragraph/parser/table.rs @@ -0,0 +1,60 @@ +use std::iter::Peekable; + +use roxmltree::{Children, Node}; + +use crate::{law::Content, paragraph::parser::Expect}; + +use super::{Absatz, Tr}; + +#[derive(Debug, PartialEq)] +pub(crate) struct Table { + trs: Vec, +} +impl Table { + // TODO: Combine mandatory check in parse + optional check in test -> Expect trait? + pub(crate) fn test(n: &Node) -> bool { + n.tag_name().name() == "table" + } + + pub(crate) fn parse_full(n: &mut Peekable) -> Vec { + Expect::from(n.peek().unwrap()).tag("table"); + + let mut content = Vec::new(); + + let mut trs = Vec::new(); + + let c = n.next().unwrap(); + + // Parse table + for child in c.children() { + trs.push(Tr::parse(&child)); + } + content.push(Self::get_list(&trs)); + + // erltext after table belongs to table + while let Some(child) = n.peek() { + if Absatz::test_with_typ(child, "erltext") { + content.push(Content::Text(Absatz::parse(n.next().unwrap()).content)); + } else { + break; + } + } + + content + } + + pub(crate) fn get_list(trs: &Vec) -> Content { + let mut ret = Vec::new(); + + for tr in trs { + let mut txt = String::new(); + for td in &tr.tds { + txt.push_str(&format!("{} ", td.absatz.content)); + } + + ret.push(Content::Text(format!("- {txt}",))); + } + + Content::List(ret) + } +}