push
This commit is contained in:
parent
a78ba95775
commit
eb6f3e8aba
@ -25,7 +25,7 @@ RISolve
|
||||
|
||||
# Next step
|
||||
|
||||
- [ ] call law struct fn with paragraph content
|
||||
- [ ] UrhG § 17 Abs. 1 not parsed
|
||||
|
||||
# Naming
|
||||
|
||||
|
@ -223,7 +223,8 @@ pub(crate) enum Content {
|
||||
Text(String), //This is my direct law text
|
||||
TextWithList(String, Vec<Box<Content>>),
|
||||
Item(Vec<Box<Content>>), //(1) This is general law. (2) This is more specific law
|
||||
List(Vec<Box<Content>>), //1. my first item
|
||||
List(Vec<Box<Content>>),
|
||||
TextWithListAndText(String, Vec<Box<Content>>, String), //1. my first item
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -10,6 +10,20 @@ pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> {
|
||||
println!("{url}");
|
||||
let xml = fetch_page(url)?;
|
||||
let xml = xml.replace("<gdash />", "-"); // used e.g. in §11 Abs. 3 UrhG
|
||||
//
|
||||
//
|
||||
let xml = xml.replace(
|
||||
// e.g. in § 17 (2) TODO: check that this onyl happens here
|
||||
r#"<liste><schlussteil ebene="0" art="normal" ct="text">"#,
|
||||
r#"<absatz typ="abs" ct="text" halign="j">"#,
|
||||
);
|
||||
let xml = xml.replace(
|
||||
// e.g. in § 17 (2) TODO: check that this onyl happens here
|
||||
r#"</schlussteil></liste>"#,
|
||||
"</absatz>",
|
||||
);
|
||||
println!("{xml}");
|
||||
|
||||
let risdok = Risdok::from_str(&xml, builder)?;
|
||||
|
||||
println!("{builder:#?}");
|
||||
|
@ -1,5 +1,3 @@
|
||||
use std::fmt::Display;
|
||||
|
||||
use roxmltree::Node;
|
||||
|
||||
use crate::{
|
||||
@ -39,20 +37,6 @@ impl Risdok {
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Risdok {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
for abs in &self.nutzdaten.abschnitt.absatze {
|
||||
let mut w = String::new();
|
||||
if let Some(symb) = &abs.gldsym {
|
||||
w.push_str(&format!("\n{symb} "));
|
||||
}
|
||||
w.push_str(&format!("{}\n", abs.content));
|
||||
f.write_str(&w)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Metadaten;
|
||||
impl Metadaten {
|
||||
@ -66,30 +50,25 @@ impl Metadaten {
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Nutzdaten {
|
||||
abschnitt: Abschnitt,
|
||||
}
|
||||
pub(crate) struct Nutzdaten {}
|
||||
impl Nutzdaten {
|
||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self {
|
||||
assert!(n.tag_name().name() == "nutzdaten");
|
||||
|
||||
let mut c = n.children();
|
||||
let ret = Self {
|
||||
abschnitt: Abschnitt::parse(c.next().unwrap(), builder),
|
||||
};
|
||||
|
||||
Abschnitt::parse(c.next().unwrap(), builder);
|
||||
|
||||
assert_eq!(c.next(), None);
|
||||
|
||||
ret
|
||||
Self {}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Abschnitt {
|
||||
absatze: Vec<AbsatzAbs>,
|
||||
}
|
||||
pub(crate) struct Abschnitt;
|
||||
impl Abschnitt {
|
||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self {
|
||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) {
|
||||
assert!(n.tag_name().name() == "abschnitt");
|
||||
|
||||
let mut c = n.children().peekable();
|
||||
@ -156,27 +135,55 @@ impl Abschnitt {
|
||||
.gldsym
|
||||
.clone()
|
||||
.expect("First 'Absatz' needs to have § id");
|
||||
absatze.push(absatz);
|
||||
|
||||
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
|
||||
if let Some(child) = c.peek() {
|
||||
if Liste::test(child) {
|
||||
let liste = Liste::parse(c.next().unwrap());
|
||||
//TODO do something with list
|
||||
absatze.push(Content::TextWithList(
|
||||
absatz.content.clone(),
|
||||
liste.get_list(),
|
||||
))
|
||||
} else if Table::test(child) {
|
||||
// If there's a "table" after an "absatz", the "table" should be part of the "absatz"
|
||||
let table = Table::parse(c.next().unwrap());
|
||||
if let Some(child) = c.peek() {
|
||||
if Absatz::test_with_typ(child, "erltext") {
|
||||
let after_absatz = Absatz::parse(c.next().unwrap());
|
||||
absatze.push(Content::TextWithListAndText(
|
||||
absatz.content,
|
||||
table.get_list(),
|
||||
after_absatz.content,
|
||||
))
|
||||
} else {
|
||||
absatze.push(Content::TextWithList(absatz.content, table.get_list()))
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
absatze.push(Content::Text(absatz.content.clone()));
|
||||
}
|
||||
|
||||
//TODO: Continue here, (2) and (3) is somehow skipped
|
||||
|
||||
//There can be as many 'Absätze' as our lovely lawsetter wants
|
||||
loop {
|
||||
match c.peek() {
|
||||
Some(child) => {
|
||||
if AbsatzAbs::test(child) {
|
||||
absatze.push(AbsatzAbs::parse(c.next().unwrap()));
|
||||
let abs = AbsatzAbs::parse(c.next().unwrap());
|
||||
|
||||
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
|
||||
if let Some(child) = c.peek() {
|
||||
if Liste::test(child) {
|
||||
if Liste::test(&child) {
|
||||
let liste = Liste::parse(c.next().unwrap());
|
||||
//TODO do something with list
|
||||
absatze.push(Content::TextWithList(abs.content, liste.get_list()))
|
||||
} else {
|
||||
absatze.push(Content::Text(abs.content));
|
||||
}
|
||||
} else {
|
||||
absatze.push(Content::Text(abs.content));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -187,38 +194,15 @@ impl Abschnitt {
|
||||
}
|
||||
|
||||
if absatze.len() == 1 {
|
||||
builder.new_par(par_id, Content::Text(absatze[0].content.clone()));
|
||||
builder.new_par(par_id, absatze[0].clone());
|
||||
} else {
|
||||
let mut contents = Vec::new();
|
||||
for a in &absatze {
|
||||
contents.push(Box::new(Content::Text(a.content.clone())));
|
||||
contents.push(Box::new(a.clone()));
|
||||
}
|
||||
builder.new_par(par_id, Content::Item(contents));
|
||||
}
|
||||
|
||||
//if absatze.len() == 1 {
|
||||
// builder.new_par(Content::Text(format!(
|
||||
// "{} {}",
|
||||
// absatze[0].gldsym.clone().unwrap(),
|
||||
// absatze[0].content
|
||||
// )));
|
||||
//} else {
|
||||
// let mut content = Vec::new();
|
||||
// for a in &absatze {
|
||||
// let mut txt = String::new();
|
||||
// if let Some(sym) = &a.gldsym {
|
||||
// if symb.is_some() {
|
||||
// panic!("Two (or more) § symbols in single paragraph ?!?");
|
||||
// } else {
|
||||
// symb = Some(sym);
|
||||
// }
|
||||
// }
|
||||
// txt.push_str(&a.content);
|
||||
// content.push(Box::new(Content::Text(txt)));
|
||||
// }
|
||||
// builder.new_par(Content::Item(content));
|
||||
//}
|
||||
|
||||
// Skip all UeberschriftTitle and Absatz
|
||||
loop {
|
||||
match c.peek() {
|
||||
@ -237,11 +221,7 @@ impl Abschnitt {
|
||||
}
|
||||
}
|
||||
|
||||
println!("====");
|
||||
println!("{c:#?}");
|
||||
assert_eq!(c.next(), None);
|
||||
|
||||
Self { absatze }
|
||||
}
|
||||
}
|
||||
|
||||
@ -319,6 +299,78 @@ impl Ziffernliste {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Td {
|
||||
absatz: Absatz,
|
||||
}
|
||||
impl Td {
|
||||
pub(crate) fn parse(n: &Node) -> Self {
|
||||
assert!(n.tag_name().name() == "td");
|
||||
|
||||
let mut c = n.children();
|
||||
let absatz = Absatz::parse(c.next().unwrap());
|
||||
|
||||
assert_eq!(c.next(), None);
|
||||
|
||||
Self { absatz }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Tr {
|
||||
tds: Vec<Td>,
|
||||
}
|
||||
impl Tr {
|
||||
pub(crate) fn parse(n: &Node) -> Self {
|
||||
assert!(n.tag_name().name() == "tr");
|
||||
|
||||
let mut tds = Vec::new();
|
||||
|
||||
let mut c = n.children();
|
||||
for child in c {
|
||||
tds.push(Td::parse(&child));
|
||||
}
|
||||
|
||||
Self { tds }
|
||||
}
|
||||
}
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Table {
|
||||
trs: Vec<Tr>,
|
||||
}
|
||||
impl Table {
|
||||
pub(crate) fn test(n: &Node) -> bool {
|
||||
n.tag_name().name() == "table"
|
||||
}
|
||||
|
||||
pub(crate) fn parse(n: Node) -> Self {
|
||||
assert!(Self::test(&n));
|
||||
let mut trs = Vec::new();
|
||||
|
||||
let mut c = n.children();
|
||||
for child in c {
|
||||
trs.push(Tr::parse(&child));
|
||||
}
|
||||
|
||||
Self { trs }
|
||||
}
|
||||
|
||||
pub(crate) fn get_list(&self) -> Vec<Box<Content>> {
|
||||
let mut ret = Vec::new();
|
||||
|
||||
for tr in &self.trs {
|
||||
let mut txt = String::new();
|
||||
for td in &tr.tds {
|
||||
txt.push_str(&format!("{} ", td.absatz.content));
|
||||
}
|
||||
|
||||
ret.push(Box::new(Content::Text(format!("- {txt}",))));
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Liste {
|
||||
ziffernliste: Ziffernliste,
|
||||
@ -341,6 +393,20 @@ impl Liste {
|
||||
|
||||
Self { ziffernliste }
|
||||
}
|
||||
|
||||
pub(crate) fn get_list(&self) -> Vec<Box<Content>> {
|
||||
let mut ret = Vec::new();
|
||||
|
||||
for a in &self.ziffernliste.listelems {
|
||||
ret.push(Box::new(Content::Text(format!(
|
||||
"{} {}",
|
||||
a.symbol.content,
|
||||
a.text.clone()
|
||||
))));
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
@ -405,12 +471,19 @@ impl Absatz {
|
||||
pub(crate) fn test(n: &Node) -> bool {
|
||||
n.tag_name().name() == "absatz"
|
||||
}
|
||||
pub(crate) fn test_with_typ(n: &Node, typ: &str) -> bool {
|
||||
n.tag_name().name() == "absatz" && n.attribute("typ") == Some(typ)
|
||||
}
|
||||
|
||||
pub(crate) fn parse(n: Node) -> Self {
|
||||
assert!(n.tag_name().name() == "absatz");
|
||||
|
||||
Self {
|
||||
content: n.text().unwrap().into(),
|
||||
if let Some(text) = n.text() {
|
||||
Self {
|
||||
content: text.into(),
|
||||
}
|
||||
} else {
|
||||
Self { content: "".into() }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user