push
This commit is contained in:
parent
a78ba95775
commit
eb6f3e8aba
@ -25,7 +25,7 @@ RISolve
|
|||||||
|
|
||||||
# Next step
|
# Next step
|
||||||
|
|
||||||
- [ ] call law struct fn with paragraph content
|
- [ ] UrhG § 17 Abs. 1 not parsed
|
||||||
|
|
||||||
# Naming
|
# Naming
|
||||||
|
|
||||||
|
@ -223,7 +223,8 @@ pub(crate) enum Content {
|
|||||||
Text(String), //This is my direct law text
|
Text(String), //This is my direct law text
|
||||||
TextWithList(String, Vec<Box<Content>>),
|
TextWithList(String, Vec<Box<Content>>),
|
||||||
Item(Vec<Box<Content>>), //(1) This is general law. (2) This is more specific law
|
Item(Vec<Box<Content>>), //(1) This is general law. (2) This is more specific law
|
||||||
List(Vec<Box<Content>>), //1. my first item
|
List(Vec<Box<Content>>),
|
||||||
|
TextWithListAndText(String, Vec<Box<Content>>, String), //1. my first item
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -10,6 +10,20 @@ pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> {
|
|||||||
println!("{url}");
|
println!("{url}");
|
||||||
let xml = fetch_page(url)?;
|
let xml = fetch_page(url)?;
|
||||||
let xml = xml.replace("<gdash />", "-"); // used e.g. in §11 Abs. 3 UrhG
|
let xml = xml.replace("<gdash />", "-"); // used e.g. in §11 Abs. 3 UrhG
|
||||||
|
//
|
||||||
|
//
|
||||||
|
let xml = xml.replace(
|
||||||
|
// e.g. in § 17 (2) TODO: check that this onyl happens here
|
||||||
|
r#"<liste><schlussteil ebene="0" art="normal" ct="text">"#,
|
||||||
|
r#"<absatz typ="abs" ct="text" halign="j">"#,
|
||||||
|
);
|
||||||
|
let xml = xml.replace(
|
||||||
|
// e.g. in § 17 (2) TODO: check that this onyl happens here
|
||||||
|
r#"</schlussteil></liste>"#,
|
||||||
|
"</absatz>",
|
||||||
|
);
|
||||||
|
println!("{xml}");
|
||||||
|
|
||||||
let risdok = Risdok::from_str(&xml, builder)?;
|
let risdok = Risdok::from_str(&xml, builder)?;
|
||||||
|
|
||||||
println!("{builder:#?}");
|
println!("{builder:#?}");
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
use std::fmt::Display;
|
|
||||||
|
|
||||||
use roxmltree::Node;
|
use roxmltree::Node;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -39,20 +37,6 @@ impl Risdok {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for Risdok {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
for abs in &self.nutzdaten.abschnitt.absatze {
|
|
||||||
let mut w = String::new();
|
|
||||||
if let Some(symb) = &abs.gldsym {
|
|
||||||
w.push_str(&format!("\n{symb} "));
|
|
||||||
}
|
|
||||||
w.push_str(&format!("{}\n", abs.content));
|
|
||||||
f.write_str(&w)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub(crate) struct Metadaten;
|
pub(crate) struct Metadaten;
|
||||||
impl Metadaten {
|
impl Metadaten {
|
||||||
@ -66,30 +50,25 @@ impl Metadaten {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub(crate) struct Nutzdaten {
|
pub(crate) struct Nutzdaten {}
|
||||||
abschnitt: Abschnitt,
|
|
||||||
}
|
|
||||||
impl Nutzdaten {
|
impl Nutzdaten {
|
||||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self {
|
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self {
|
||||||
assert!(n.tag_name().name() == "nutzdaten");
|
assert!(n.tag_name().name() == "nutzdaten");
|
||||||
|
|
||||||
let mut c = n.children();
|
let mut c = n.children();
|
||||||
let ret = Self {
|
|
||||||
abschnitt: Abschnitt::parse(c.next().unwrap(), builder),
|
Abschnitt::parse(c.next().unwrap(), builder);
|
||||||
};
|
|
||||||
|
|
||||||
assert_eq!(c.next(), None);
|
assert_eq!(c.next(), None);
|
||||||
|
|
||||||
ret
|
Self {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub(crate) struct Abschnitt {
|
pub(crate) struct Abschnitt;
|
||||||
absatze: Vec<AbsatzAbs>,
|
|
||||||
}
|
|
||||||
impl Abschnitt {
|
impl Abschnitt {
|
||||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self {
|
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) {
|
||||||
assert!(n.tag_name().name() == "abschnitt");
|
assert!(n.tag_name().name() == "abschnitt");
|
||||||
|
|
||||||
let mut c = n.children().peekable();
|
let mut c = n.children().peekable();
|
||||||
@ -156,27 +135,55 @@ impl Abschnitt {
|
|||||||
.gldsym
|
.gldsym
|
||||||
.clone()
|
.clone()
|
||||||
.expect("First 'Absatz' needs to have § id");
|
.expect("First 'Absatz' needs to have § id");
|
||||||
absatze.push(absatz);
|
|
||||||
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
|
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
|
||||||
if let Some(child) = c.peek() {
|
if let Some(child) = c.peek() {
|
||||||
if Liste::test(child) {
|
if Liste::test(child) {
|
||||||
let liste = Liste::parse(c.next().unwrap());
|
let liste = Liste::parse(c.next().unwrap());
|
||||||
//TODO do something with list
|
absatze.push(Content::TextWithList(
|
||||||
|
absatz.content.clone(),
|
||||||
|
liste.get_list(),
|
||||||
|
))
|
||||||
|
} else if Table::test(child) {
|
||||||
|
// If there's a "table" after an "absatz", the "table" should be part of the "absatz"
|
||||||
|
let table = Table::parse(c.next().unwrap());
|
||||||
|
if let Some(child) = c.peek() {
|
||||||
|
if Absatz::test_with_typ(child, "erltext") {
|
||||||
|
let after_absatz = Absatz::parse(c.next().unwrap());
|
||||||
|
absatze.push(Content::TextWithListAndText(
|
||||||
|
absatz.content,
|
||||||
|
table.get_list(),
|
||||||
|
after_absatz.content,
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
absatze.push(Content::TextWithList(absatz.content, table.get_list()))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
absatze.push(Content::Text(absatz.content.clone()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//TODO: Continue here, (2) and (3) is somehow skipped
|
||||||
|
|
||||||
//There can be as many 'Absätze' as our lovely lawsetter wants
|
//There can be as many 'Absätze' as our lovely lawsetter wants
|
||||||
loop {
|
loop {
|
||||||
match c.peek() {
|
match c.peek() {
|
||||||
Some(child) => {
|
Some(child) => {
|
||||||
if AbsatzAbs::test(child) {
|
if AbsatzAbs::test(child) {
|
||||||
absatze.push(AbsatzAbs::parse(c.next().unwrap()));
|
let abs = AbsatzAbs::parse(c.next().unwrap());
|
||||||
|
|
||||||
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
|
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
|
||||||
if let Some(child) = c.peek() {
|
if let Some(child) = c.peek() {
|
||||||
if Liste::test(child) {
|
if Liste::test(&child) {
|
||||||
let liste = Liste::parse(c.next().unwrap());
|
let liste = Liste::parse(c.next().unwrap());
|
||||||
//TODO do something with list
|
//TODO do something with list
|
||||||
|
absatze.push(Content::TextWithList(abs.content, liste.get_list()))
|
||||||
|
} else {
|
||||||
|
absatze.push(Content::Text(abs.content));
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
absatze.push(Content::Text(abs.content));
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -187,38 +194,15 @@ impl Abschnitt {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if absatze.len() == 1 {
|
if absatze.len() == 1 {
|
||||||
builder.new_par(par_id, Content::Text(absatze[0].content.clone()));
|
builder.new_par(par_id, absatze[0].clone());
|
||||||
} else {
|
} else {
|
||||||
let mut contents = Vec::new();
|
let mut contents = Vec::new();
|
||||||
for a in &absatze {
|
for a in &absatze {
|
||||||
contents.push(Box::new(Content::Text(a.content.clone())));
|
contents.push(Box::new(a.clone()));
|
||||||
}
|
}
|
||||||
builder.new_par(par_id, Content::Item(contents));
|
builder.new_par(par_id, Content::Item(contents));
|
||||||
}
|
}
|
||||||
|
|
||||||
//if absatze.len() == 1 {
|
|
||||||
// builder.new_par(Content::Text(format!(
|
|
||||||
// "{} {}",
|
|
||||||
// absatze[0].gldsym.clone().unwrap(),
|
|
||||||
// absatze[0].content
|
|
||||||
// )));
|
|
||||||
//} else {
|
|
||||||
// let mut content = Vec::new();
|
|
||||||
// for a in &absatze {
|
|
||||||
// let mut txt = String::new();
|
|
||||||
// if let Some(sym) = &a.gldsym {
|
|
||||||
// if symb.is_some() {
|
|
||||||
// panic!("Two (or more) § symbols in single paragraph ?!?");
|
|
||||||
// } else {
|
|
||||||
// symb = Some(sym);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// txt.push_str(&a.content);
|
|
||||||
// content.push(Box::new(Content::Text(txt)));
|
|
||||||
// }
|
|
||||||
// builder.new_par(Content::Item(content));
|
|
||||||
//}
|
|
||||||
|
|
||||||
// Skip all UeberschriftTitle and Absatz
|
// Skip all UeberschriftTitle and Absatz
|
||||||
loop {
|
loop {
|
||||||
match c.peek() {
|
match c.peek() {
|
||||||
@ -237,11 +221,7 @@ impl Abschnitt {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("====");
|
|
||||||
println!("{c:#?}");
|
|
||||||
assert_eq!(c.next(), None);
|
assert_eq!(c.next(), None);
|
||||||
|
|
||||||
Self { absatze }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -319,6 +299,78 @@ impl Ziffernliste {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub(crate) struct Td {
|
||||||
|
absatz: Absatz,
|
||||||
|
}
|
||||||
|
impl Td {
|
||||||
|
pub(crate) fn parse(n: &Node) -> Self {
|
||||||
|
assert!(n.tag_name().name() == "td");
|
||||||
|
|
||||||
|
let mut c = n.children();
|
||||||
|
let absatz = Absatz::parse(c.next().unwrap());
|
||||||
|
|
||||||
|
assert_eq!(c.next(), None);
|
||||||
|
|
||||||
|
Self { absatz }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub(crate) struct Tr {
|
||||||
|
tds: Vec<Td>,
|
||||||
|
}
|
||||||
|
impl Tr {
|
||||||
|
pub(crate) fn parse(n: &Node) -> Self {
|
||||||
|
assert!(n.tag_name().name() == "tr");
|
||||||
|
|
||||||
|
let mut tds = Vec::new();
|
||||||
|
|
||||||
|
let mut c = n.children();
|
||||||
|
for child in c {
|
||||||
|
tds.push(Td::parse(&child));
|
||||||
|
}
|
||||||
|
|
||||||
|
Self { tds }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub(crate) struct Table {
|
||||||
|
trs: Vec<Tr>,
|
||||||
|
}
|
||||||
|
impl Table {
|
||||||
|
pub(crate) fn test(n: &Node) -> bool {
|
||||||
|
n.tag_name().name() == "table"
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn parse(n: Node) -> Self {
|
||||||
|
assert!(Self::test(&n));
|
||||||
|
let mut trs = Vec::new();
|
||||||
|
|
||||||
|
let mut c = n.children();
|
||||||
|
for child in c {
|
||||||
|
trs.push(Tr::parse(&child));
|
||||||
|
}
|
||||||
|
|
||||||
|
Self { trs }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get_list(&self) -> Vec<Box<Content>> {
|
||||||
|
let mut ret = Vec::new();
|
||||||
|
|
||||||
|
for tr in &self.trs {
|
||||||
|
let mut txt = String::new();
|
||||||
|
for td in &tr.tds {
|
||||||
|
txt.push_str(&format!("{} ", td.absatz.content));
|
||||||
|
}
|
||||||
|
|
||||||
|
ret.push(Box::new(Content::Text(format!("- {txt}",))));
|
||||||
|
}
|
||||||
|
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub(crate) struct Liste {
|
pub(crate) struct Liste {
|
||||||
ziffernliste: Ziffernliste,
|
ziffernliste: Ziffernliste,
|
||||||
@ -341,6 +393,20 @@ impl Liste {
|
|||||||
|
|
||||||
Self { ziffernliste }
|
Self { ziffernliste }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get_list(&self) -> Vec<Box<Content>> {
|
||||||
|
let mut ret = Vec::new();
|
||||||
|
|
||||||
|
for a in &self.ziffernliste.listelems {
|
||||||
|
ret.push(Box::new(Content::Text(format!(
|
||||||
|
"{} {}",
|
||||||
|
a.symbol.content,
|
||||||
|
a.text.clone()
|
||||||
|
))));
|
||||||
|
}
|
||||||
|
|
||||||
|
ret
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
@ -405,12 +471,19 @@ impl Absatz {
|
|||||||
pub(crate) fn test(n: &Node) -> bool {
|
pub(crate) fn test(n: &Node) -> bool {
|
||||||
n.tag_name().name() == "absatz"
|
n.tag_name().name() == "absatz"
|
||||||
}
|
}
|
||||||
|
pub(crate) fn test_with_typ(n: &Node, typ: &str) -> bool {
|
||||||
|
n.tag_name().name() == "absatz" && n.attribute("typ") == Some(typ)
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn parse(n: Node) -> Self {
|
pub(crate) fn parse(n: Node) -> Self {
|
||||||
assert!(n.tag_name().name() == "absatz");
|
assert!(n.tag_name().name() == "absatz");
|
||||||
|
|
||||||
Self {
|
if let Some(text) = n.text() {
|
||||||
content: n.text().unwrap().into(),
|
Self {
|
||||||
|
content: text.into(),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Self { content: "".into() }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user