This commit is contained in:
philipp 2023-11-04 20:39:17 +01:00
parent a78ba95775
commit eb6f3e8aba
4 changed files with 152 additions and 64 deletions

View File

@ -25,7 +25,7 @@ RISolve
# Next step
- [ ] call law struct fn with paragraph content
- [ ] UrhG § 17 Abs. 1 not parsed
# Naming

View File

@ -223,7 +223,8 @@ pub(crate) enum Content {
Text(String), //This is my direct law text
TextWithList(String, Vec<Box<Content>>),
Item(Vec<Box<Content>>), //(1) This is general law. (2) This is more specific law
List(Vec<Box<Content>>), //1. my first item
List(Vec<Box<Content>>),
TextWithListAndText(String, Vec<Box<Content>>, String), //1. my first item
}
#[cfg(test)]

View File

@ -10,6 +10,20 @@ pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> {
println!("{url}");
let xml = fetch_page(url)?;
let xml = xml.replace("<gdash />", "-"); // used e.g. in §11 Abs. 3 UrhG
//
//
let xml = xml.replace(
// e.g. in § 17 (2) TODO: check that this onyl happens here
r#"<liste><schlussteil ebene="0" art="normal" ct="text">"#,
r#"<absatz typ="abs" ct="text" halign="j">"#,
);
let xml = xml.replace(
// e.g. in § 17 (2) TODO: check that this onyl happens here
r#"</schlussteil></liste>"#,
"</absatz>",
);
println!("{xml}");
let risdok = Risdok::from_str(&xml, builder)?;
println!("{builder:#?}");

View File

@ -1,5 +1,3 @@
use std::fmt::Display;
use roxmltree::Node;
use crate::{
@ -39,20 +37,6 @@ impl Risdok {
}
}
impl Display for Risdok {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for abs in &self.nutzdaten.abschnitt.absatze {
let mut w = String::new();
if let Some(symb) = &abs.gldsym {
w.push_str(&format!("\n{symb} "));
}
w.push_str(&format!("{}\n", abs.content));
f.write_str(&w)?;
}
Ok(())
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Metadaten;
impl Metadaten {
@ -66,30 +50,25 @@ impl Metadaten {
}
#[derive(Debug, PartialEq)]
pub(crate) struct Nutzdaten {
abschnitt: Abschnitt,
}
pub(crate) struct Nutzdaten {}
impl Nutzdaten {
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self {
assert!(n.tag_name().name() == "nutzdaten");
let mut c = n.children();
let ret = Self {
abschnitt: Abschnitt::parse(c.next().unwrap(), builder),
};
Abschnitt::parse(c.next().unwrap(), builder);
assert_eq!(c.next(), None);
ret
Self {}
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Abschnitt {
absatze: Vec<AbsatzAbs>,
}
pub(crate) struct Abschnitt;
impl Abschnitt {
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self {
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) {
assert!(n.tag_name().name() == "abschnitt");
let mut c = n.children().peekable();
@ -156,27 +135,55 @@ impl Abschnitt {
.gldsym
.clone()
.expect("First 'Absatz' needs to have § id");
absatze.push(absatz);
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
if let Some(child) = c.peek() {
if Liste::test(child) {
let liste = Liste::parse(c.next().unwrap());
//TODO do something with list
absatze.push(Content::TextWithList(
absatz.content.clone(),
liste.get_list(),
))
} else if Table::test(child) {
// If there's a "table" after an "absatz", the "table" should be part of the "absatz"
let table = Table::parse(c.next().unwrap());
if let Some(child) = c.peek() {
if Absatz::test_with_typ(child, "erltext") {
let after_absatz = Absatz::parse(c.next().unwrap());
absatze.push(Content::TextWithListAndText(
absatz.content,
table.get_list(),
after_absatz.content,
))
} else {
absatze.push(Content::TextWithList(absatz.content, table.get_list()))
}
}
}
} else {
absatze.push(Content::Text(absatz.content.clone()));
}
//TODO: Continue here, (2) and (3) is somehow skipped
//There can be as many 'Absätze' as our lovely lawsetter wants
loop {
match c.peek() {
Some(child) => {
if AbsatzAbs::test(child) {
absatze.push(AbsatzAbs::parse(c.next().unwrap()));
let abs = AbsatzAbs::parse(c.next().unwrap());
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
if let Some(child) = c.peek() {
if Liste::test(child) {
if Liste::test(&child) {
let liste = Liste::parse(c.next().unwrap());
//TODO do something with list
absatze.push(Content::TextWithList(abs.content, liste.get_list()))
} else {
absatze.push(Content::Text(abs.content));
}
} else {
absatze.push(Content::Text(abs.content));
}
continue;
}
@ -187,38 +194,15 @@ impl Abschnitt {
}
if absatze.len() == 1 {
builder.new_par(par_id, Content::Text(absatze[0].content.clone()));
builder.new_par(par_id, absatze[0].clone());
} else {
let mut contents = Vec::new();
for a in &absatze {
contents.push(Box::new(Content::Text(a.content.clone())));
contents.push(Box::new(a.clone()));
}
builder.new_par(par_id, Content::Item(contents));
}
//if absatze.len() == 1 {
// builder.new_par(Content::Text(format!(
// "{} {}",
// absatze[0].gldsym.clone().unwrap(),
// absatze[0].content
// )));
//} else {
// let mut content = Vec::new();
// for a in &absatze {
// let mut txt = String::new();
// if let Some(sym) = &a.gldsym {
// if symb.is_some() {
// panic!("Two (or more) § symbols in single paragraph ?!?");
// } else {
// symb = Some(sym);
// }
// }
// txt.push_str(&a.content);
// content.push(Box::new(Content::Text(txt)));
// }
// builder.new_par(Content::Item(content));
//}
// Skip all UeberschriftTitle and Absatz
loop {
match c.peek() {
@ -237,11 +221,7 @@ impl Abschnitt {
}
}
println!("====");
println!("{c:#?}");
assert_eq!(c.next(), None);
Self { absatze }
}
}
@ -319,6 +299,78 @@ impl Ziffernliste {
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Td {
absatz: Absatz,
}
impl Td {
pub(crate) fn parse(n: &Node) -> Self {
assert!(n.tag_name().name() == "td");
let mut c = n.children();
let absatz = Absatz::parse(c.next().unwrap());
assert_eq!(c.next(), None);
Self { absatz }
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Tr {
tds: Vec<Td>,
}
impl Tr {
pub(crate) fn parse(n: &Node) -> Self {
assert!(n.tag_name().name() == "tr");
let mut tds = Vec::new();
let mut c = n.children();
for child in c {
tds.push(Td::parse(&child));
}
Self { tds }
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Table {
trs: Vec<Tr>,
}
impl Table {
pub(crate) fn test(n: &Node) -> bool {
n.tag_name().name() == "table"
}
pub(crate) fn parse(n: Node) -> Self {
assert!(Self::test(&n));
let mut trs = Vec::new();
let mut c = n.children();
for child in c {
trs.push(Tr::parse(&child));
}
Self { trs }
}
pub(crate) fn get_list(&self) -> Vec<Box<Content>> {
let mut ret = Vec::new();
for tr in &self.trs {
let mut txt = String::new();
for td in &tr.tds {
txt.push_str(&format!("{} ", td.absatz.content));
}
ret.push(Box::new(Content::Text(format!("- {txt}",))));
}
ret
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Liste {
ziffernliste: Ziffernliste,
@ -341,6 +393,20 @@ impl Liste {
Self { ziffernliste }
}
pub(crate) fn get_list(&self) -> Vec<Box<Content>> {
let mut ret = Vec::new();
for a in &self.ziffernliste.listelems {
ret.push(Box::new(Content::Text(format!(
"{} {}",
a.symbol.content,
a.text.clone()
))));
}
ret
}
}
#[derive(Debug, PartialEq)]
@ -405,12 +471,19 @@ impl Absatz {
pub(crate) fn test(n: &Node) -> bool {
n.tag_name().name() == "absatz"
}
pub(crate) fn test_with_typ(n: &Node, typ: &str) -> bool {
n.tag_name().name() == "absatz" && n.attribute("typ") == Some(typ)
}
pub(crate) fn parse(n: Node) -> Self {
assert!(n.tag_name().name() == "absatz");
Self {
content: n.text().unwrap().into(),
if let Some(text) = n.text() {
Self {
content: text.into(),
}
} else {
Self { content: "".into() }
}
}
}