extract to table rs
All checks were successful
CI/CD Pipeline / test (push) Successful in 1m49s

This commit is contained in:
philipp 2024-02-16 09:54:38 +01:00
parent bb8e5641f6
commit 04cc7d9183
4 changed files with 77 additions and 83 deletions

View File

@ -5,9 +5,9 @@ use roxmltree::{Children, Node};
use crate::law::LawBuilder;
use crate::paragraph::parser::liste::Liste;
use crate::paragraph::parser::{
Absatz, AbsatzAbs, Content, Fzinhalt, Kzinhalt, Table, Ueberschrift,
};
use crate::paragraph::parser::{Absatz, AbsatzAbs, Content, Fzinhalt, Kzinhalt, Ueberschrift};
use super::table::Table;
#[derive(Debug, PartialEq, Default)]
pub(crate) struct Abschnitt {
@ -198,58 +198,27 @@ impl Abschnitt {
let absatz = AbsatzAbs::parse(c.next().unwrap());
let par_id = absatz.gldsym;
let mut content = Vec::new();
content.push(Content::Text(absatz.content));
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
if let Some(child) = c.peek() {
if Liste::test(child) {
println!("11@");
(
par_id,
Content::List(vec![
Content::Text(absatz.content),
Liste::parse_real(c).get_content(),
]),
)
content.push(Liste::parse_full(c).get_content())
} else if Table::test(child) {
// If there's a "table" after an "absatz", the "table" should be part of the "absatz"
let table = Table::parse(c.next().unwrap());
if let Some(child) = c.peek() {
if Absatz::test_with_typ(child, "erltext") {
let after_absatz = Absatz::parse(c.next().unwrap());
(
par_id,
Content::List(vec![
Content::Text(absatz.content),
Content::List(table.get_list()),
Content::Text(after_absatz.content),
]),
)
} else {
(
par_id,
Content::List(vec![
Content::Text(absatz.content),
Content::List(table.get_list()),
]),
)
}
} else {
(par_id, Content::Text("THIS SHOULD NOT HAPPEN".into()))
}
let table = Table::parse_full(c);
content.extend(table.iter().cloned());
} else if Absatz::test_with_typ(child, "satz") {
// After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list
// After a 'absatz' there can be a '<absatz typ="satz"' which should be part of the first absatz
// (e.g. 1209 ABGB)
(
par_id,
Content::List(vec![
Content::Text(absatz.content.clone()),
Content::Text(Absatz::parse(c.next().unwrap()).content),
]),
)
content.push(Content::Text(Absatz::parse(c.next().unwrap()).content))
}
}
if content.len() == 1 {
(par_id, content[0].clone())
} else {
(par_id, Content::Text(absatz.content.clone()))
}
} else {
(par_id, Content::Text(absatz.content.clone()))
(par_id, Content::List(content))
}
}
}

View File

@ -16,8 +16,7 @@ impl Liste {
n.tag_name().name() == "liste"
}
//TODO: rename: parse_full
pub(crate) fn parse_real(n: &mut Peekable<Children>) -> Self {
pub(crate) fn parse_full(n: &mut Peekable<Children>) -> Self {
Expect::from(n.peek().unwrap()).tag("liste");
let mut content = Vec::new();

View File

@ -16,6 +16,7 @@
mod abschnitt;
mod liste;
mod table;
use abschnitt::Abschnitt;
use roxmltree::Node;
@ -216,41 +217,6 @@ impl Tr {
Self { tds }
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Table {
trs: Vec<Tr>,
}
impl Table {
pub(crate) fn test(n: &Node) -> bool {
n.tag_name().name() == "table"
}
pub(crate) fn parse(n: Node) -> Self {
assert!(Self::test(&n));
let mut trs = Vec::new();
for child in n.children() {
trs.push(Tr::parse(&child));
}
Self { trs }
}
pub(crate) fn get_list(&self) -> Vec<Content> {
let mut ret = Vec::new();
for tr in &self.trs {
let mut txt = String::new();
for td in &tr.tds {
txt.push_str(&format!("{} ", td.absatz.content));
}
ret.push(Content::Text(format!("- {txt}",)));
}
ret
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Schlussteil {

View File

@ -0,0 +1,60 @@
use std::iter::Peekable;
use roxmltree::{Children, Node};
use crate::{law::Content, paragraph::parser::Expect};
use super::{Absatz, Tr};
#[derive(Debug, PartialEq)]
pub(crate) struct Table {
trs: Vec<Tr>,
}
impl Table {
// TODO: Combine mandatory check in parse + optional check in test -> Expect trait?
pub(crate) fn test(n: &Node) -> bool {
n.tag_name().name() == "table"
}
pub(crate) fn parse_full(n: &mut Peekable<Children>) -> Vec<Content> {
Expect::from(n.peek().unwrap()).tag("table");
let mut content = Vec::new();
let mut trs = Vec::new();
let c = n.next().unwrap();
// Parse table
for child in c.children() {
trs.push(Tr::parse(&child));
}
content.push(Self::get_list(&trs));
// erltext after table belongs to table
while let Some(child) = n.peek() {
if Absatz::test_with_typ(child, "erltext") {
content.push(Content::Text(Absatz::parse(n.next().unwrap()).content));
} else {
break;
}
}
content
}
pub(crate) fn get_list(trs: &Vec<Tr>) -> Content {
let mut ret = Vec::new();
for tr in trs {
let mut txt = String::new();
for td in &tr.tds {
txt.push_str(&format!("{} ", td.absatz.content));
}
ret.push(Content::Text(format!("- {txt}",)));
}
Content::List(ret)
}
}