554 lines
15 KiB
Rust
554 lines
15 KiB
Rust
use log::{error, info};
|
|
use roxmltree::Node;
|
|
|
|
use crate::{
|
|
law::{Content, LawBuilder},
|
|
Error,
|
|
};
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct Risdok {}
|
|
|
|
impl Risdok {
|
|
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool {
|
|
assert!(n.tag_name().name() == "risdok");
|
|
|
|
let mut c = n.children();
|
|
|
|
Metadaten::parse(c.next().unwrap());
|
|
let nutzdaten = Nutzdaten::parse(c.next().unwrap(), builder);
|
|
if !nutzdaten {
|
|
return false;
|
|
}
|
|
Layoutdaten::parse(c.next().unwrap());
|
|
|
|
assert_eq!(c.next(), None);
|
|
|
|
true
|
|
}
|
|
|
|
pub(crate) fn from_str(xml: &str, builder: &mut LawBuilder) -> Result<bool, Error> {
|
|
let doc = roxmltree::Document::parse(xml)?;
|
|
let root = doc.root();
|
|
assert_eq!(root.children().count(), 1);
|
|
Ok(Self::parse(root.children().next().unwrap(), builder))
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct Metadaten;
|
|
impl Metadaten {
|
|
pub(crate) fn parse(n: Node) -> Self {
|
|
assert!(n.tag_name().name() == "metadaten");
|
|
|
|
assert_eq!(n.children().next(), None);
|
|
|
|
Self {}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct Nutzdaten {}
|
|
impl Nutzdaten {
|
|
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool {
|
|
assert!(n.tag_name().name() == "nutzdaten");
|
|
|
|
let mut c = n.children();
|
|
|
|
let ret = Abschnitt::parse(c.next().unwrap(), builder);
|
|
|
|
assert_eq!(c.next(), None);
|
|
|
|
ret
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct Abschnitt;
|
|
impl Abschnitt {
|
|
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool {
|
|
assert!(n.tag_name().name() == "abschnitt");
|
|
|
|
let mut c = n.children().peekable();
|
|
|
|
Kzinhalt::parse(c.next().unwrap());
|
|
Kzinhalt::parse(c.next().unwrap());
|
|
Fzinhalt::parse(c.next().unwrap());
|
|
Fzinhalt::parse(c.next().unwrap());
|
|
|
|
// Skip all UeberschriftTitle and Absatz
|
|
while let Some(child) = c.peek() {
|
|
if Ueberschrift::test(child, "titel") {
|
|
c.next();
|
|
continue;
|
|
}
|
|
if Absatz::test_with_typ(child, "erltext") {
|
|
c.next();
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
while let Some(child) = c.peek() {
|
|
if Ueberschrift::test(child, "g1") {
|
|
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1");
|
|
if ueberschrift.content.trim().starts_with("Artikel") {
|
|
return false;
|
|
}
|
|
builder.new_header(&ueberschrift.content);
|
|
} else if Ueberschrift::test(child, "g2") {
|
|
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g2");
|
|
builder.new_desc(&ueberschrift.content);
|
|
} else if Ueberschrift::test(child, "g1min") {
|
|
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1min");
|
|
builder.new_header(&ueberschrift.content);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if let Some(child) = c.peek() {
|
|
if Ueberschrift::test(child, "para") {
|
|
builder
|
|
.new_next_para_header(&Ueberschrift::parse(c.next().unwrap(), "para").content);
|
|
}
|
|
}
|
|
|
|
// TODO: Continue here: We want to create a `Section`.
|
|
//
|
|
// We have 2 tasks
|
|
// 1) Get paragraph id
|
|
// 2) Get content
|
|
|
|
let mut absatze = Vec::new();
|
|
let absatz = AbsatzAbs::parse(c.next().expect("We need at least one 'Absatz'"));
|
|
let par_id = absatz
|
|
.gldsym
|
|
.clone()
|
|
.expect("First 'Absatz' needs to have § id");
|
|
|
|
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
|
|
if let Some(child) = c.peek() {
|
|
if Liste::test(child) {
|
|
let liste = Liste::parse(c.next().unwrap());
|
|
absatze.push(Content::List(vec![
|
|
Content::Text(absatz.content.replace('\u{a0}', " ")),
|
|
liste.get_content(),
|
|
]));
|
|
} else if Table::test(child) {
|
|
// If there's a "table" after an "absatz", the "table" should be part of the "absatz"
|
|
let table = Table::parse(c.next().unwrap());
|
|
if let Some(child) = c.peek() {
|
|
if Absatz::test_with_typ(child, "erltext") {
|
|
let after_absatz = Absatz::parse(c.next().unwrap());
|
|
absatze.push(Content::List(vec![
|
|
Content::Text(absatz.content.replace('\u{a0}', " ")),
|
|
Content::List(table.get_list()),
|
|
Content::Text(after_absatz.content),
|
|
]))
|
|
} else {
|
|
absatze.push(Content::List(vec![
|
|
Content::Text(absatz.content.replace('\u{a0}', " ")),
|
|
Content::List(table.get_list()),
|
|
]));
|
|
}
|
|
}
|
|
} else {
|
|
absatze.push(Content::Text(absatz.content.replace('\u{a0}', " ").clone()));
|
|
}
|
|
} else {
|
|
absatze.push(Content::Text(absatz.content.replace('\u{a0}', " ").clone()));
|
|
}
|
|
|
|
//There can be as many 'Absätze' as our lovely lawsetter wants
|
|
while let Some(child) = c.peek() {
|
|
if AbsatzAbs::test(child) {
|
|
let abs = AbsatzAbs::parse(c.next().unwrap());
|
|
|
|
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
|
|
if let Some(child) = c.peek() {
|
|
if Liste::test(child) {
|
|
let liste = Liste::parse(c.next().unwrap());
|
|
absatze.push(Content::List(vec![
|
|
Content::Text(abs.content.replace('\u{a0}', " ")),
|
|
liste.get_content(),
|
|
]));
|
|
} else {
|
|
absatze.push(Content::Text(abs.content.replace('\u{a0}', " ")));
|
|
}
|
|
} else {
|
|
absatze.push(Content::Text(abs.content.replace('\u{a0}', " ")));
|
|
}
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if absatze.len() == 1 {
|
|
builder.new_par(par_id, absatze[0].clone());
|
|
} else {
|
|
let mut contents = Vec::new();
|
|
for a in &absatze {
|
|
contents.push(a.clone());
|
|
}
|
|
builder.new_par(par_id, Content::Item(contents));
|
|
}
|
|
|
|
// Skip all UeberschriftTitle and Absatz
|
|
while let Some(child) = c.peek() {
|
|
if Ueberschrift::test(child, "titel") {
|
|
c.next();
|
|
continue;
|
|
}
|
|
if Absatz::test(child) {
|
|
c.next();
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
assert_eq!(c.next(), None);
|
|
|
|
true
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Clone)]
|
|
pub(crate) struct Symbol {
|
|
stellen: String,
|
|
content: String,
|
|
}
|
|
impl Symbol {
|
|
pub(crate) fn parse(n: Node) -> Self {
|
|
assert!(n.tag_name().name() == "symbol");
|
|
assert_eq!(n.children().count(), 1);
|
|
|
|
let stellen = n.attribute("stellen").unwrap().into();
|
|
let content = n.text().unwrap().into();
|
|
|
|
Self { stellen, content }
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Clone)]
|
|
pub(crate) struct Listelem {
|
|
symbol: Symbol,
|
|
text: String,
|
|
}
|
|
impl Listelem {
|
|
pub(crate) fn parse(n: Node) -> Self {
|
|
assert!(n.tag_name().name() == "listelem");
|
|
|
|
let mut c = n.children();
|
|
|
|
let symbol = Symbol::parse(c.next().unwrap());
|
|
|
|
let text = c.next().unwrap().text().unwrap().into();
|
|
|
|
assert_eq!(c.next(), None);
|
|
|
|
Self { symbol, text }
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Clone)]
|
|
pub(crate) struct Ziffernliste {
|
|
ebene: String,
|
|
listelems: Vec<Listelem>,
|
|
}
|
|
impl Ziffernliste {
|
|
pub(crate) fn test(n: &Node) -> bool {
|
|
["ziffernliste", "aufzaehlung", "literaliste"].contains(&n.tag_name().name())
|
|
}
|
|
|
|
pub(crate) fn parse(n: Node) -> Self {
|
|
assert!(Self::test(&n));
|
|
|
|
let ebene = n.attribute("ebene").unwrap().into();
|
|
|
|
let mut listelems = Vec::new();
|
|
|
|
for child in n.children() {
|
|
listelems.push(Listelem::parse(child));
|
|
}
|
|
|
|
Self { ebene, listelems }
|
|
}
|
|
|
|
pub(crate) fn get_content(&self) -> Content {
|
|
let mut elems = Vec::new();
|
|
|
|
for elem in &self.listelems {
|
|
elems.push(Content::Text(
|
|
format!("{} {}", elem.symbol.content, elem.text).replace('\u{a0}', " "),
|
|
));
|
|
}
|
|
|
|
Content::List(elems)
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct Td {
|
|
absatz: Absatz,
|
|
}
|
|
impl Td {
|
|
pub(crate) fn parse(n: &Node) -> Self {
|
|
assert!(n.tag_name().name() == "td");
|
|
|
|
let mut c = n.children();
|
|
let absatz = Absatz::parse(c.next().unwrap());
|
|
|
|
assert_eq!(c.next(), None);
|
|
|
|
Self { absatz }
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct Tr {
|
|
tds: Vec<Td>,
|
|
}
|
|
impl Tr {
|
|
pub(crate) fn parse(n: &Node) -> Self {
|
|
assert!(n.tag_name().name() == "tr");
|
|
|
|
let mut tds = Vec::new();
|
|
|
|
for child in n.children() {
|
|
tds.push(Td::parse(&child));
|
|
}
|
|
|
|
Self { tds }
|
|
}
|
|
}
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct Table {
|
|
trs: Vec<Tr>,
|
|
}
|
|
impl Table {
|
|
pub(crate) fn test(n: &Node) -> bool {
|
|
n.tag_name().name() == "table"
|
|
}
|
|
|
|
pub(crate) fn parse(n: Node) -> Self {
|
|
assert!(Self::test(&n));
|
|
let mut trs = Vec::new();
|
|
|
|
for child in n.children() {
|
|
trs.push(Tr::parse(&child));
|
|
}
|
|
|
|
Self { trs }
|
|
}
|
|
|
|
pub(crate) fn get_list(&self) -> Vec<Content> {
|
|
let mut ret = Vec::new();
|
|
|
|
for tr in &self.trs {
|
|
let mut txt = String::new();
|
|
for td in &tr.tds {
|
|
txt.push_str(&format!("{} ", td.absatz.content));
|
|
}
|
|
|
|
ret.push(Content::Text(format!("- {txt}",).replace('\u{a0}', " ")));
|
|
}
|
|
|
|
ret
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct Schlussteil {
|
|
content: String,
|
|
}
|
|
impl Schlussteil {
|
|
pub(crate) fn test(n: &Node) -> bool {
|
|
(n.tag_name().name() == "schlussteil" || n.tag_name().name() == "schluss")
|
|
&& n.children().count() == 1
|
|
}
|
|
|
|
pub(crate) fn parse(n: Node) -> Self {
|
|
assert!(Self::test(&n));
|
|
|
|
let content = n.children().next().unwrap().text().unwrap().into(); //not sure
|
|
|
|
Self { content }
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub(crate) struct Liste {
|
|
content: Vec<Content>,
|
|
}
|
|
impl Liste {
|
|
pub(crate) fn test(n: &Node) -> bool {
|
|
n.tag_name().name() == "liste"
|
|
}
|
|
|
|
pub(crate) fn parse(n: Node) -> Self {
|
|
assert!(Self::test(&n));
|
|
|
|
let mut content = Vec::new();
|
|
|
|
let mut c = n.children().peekable();
|
|
|
|
content.push(Ziffernliste::parse(c.next().unwrap()).get_content());
|
|
|
|
while let Some(child) = c.peek() {
|
|
if Ziffernliste::test(child) {
|
|
content.push(Ziffernliste::parse(c.next().unwrap()).get_content());
|
|
} else if Schlussteil::test(child) {
|
|
content.push(Content::Text(
|
|
Schlussteil::parse(c.next().unwrap())
|
|
.content
|
|
.replace('\u{a0}', " "),
|
|
));
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
assert_eq!(c.next(), None);
|
|
|
|
Self { content }
|
|
}
|
|
|
|
pub(crate) fn get_content(&self) -> Content {
|
|
Content::List(self.content.clone())
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct AbsatzAbs {
|
|
gldsym: Option<String>,
|
|
content: String,
|
|
}
|
|
impl AbsatzAbs {
|
|
pub(crate) fn test(n: &Node) -> bool {
|
|
n.tag_name().name() == "absatz" && n.attribute("typ").unwrap() == "abs"
|
|
}
|
|
pub(crate) fn parse(n: Node) -> Self {
|
|
assert!(n.tag_name().name() == "absatz");
|
|
assert_eq!(n.attribute("typ").unwrap(), "abs");
|
|
|
|
let mut c = n.children().peekable();
|
|
|
|
let gldsym = match c.peek() {
|
|
Some(child) => {
|
|
if Leaf::test(child, "gldsym".into()) {
|
|
Some(Leaf::parse(c.next().unwrap(), "gldsym".into()).replace('\u{a0}', " "))
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
None => None,
|
|
};
|
|
|
|
let ret = Self {
|
|
gldsym,
|
|
content: c.next().unwrap().text().unwrap().trim().into(),
|
|
};
|
|
|
|
assert_eq!(c.next(), None);
|
|
|
|
ret
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct Leaf {
|
|
content: String,
|
|
}
|
|
impl Leaf {
|
|
pub(crate) fn test(n: &Node, name: String) -> bool {
|
|
n.tag_name().name() == name && n.children().count() == 1
|
|
}
|
|
pub(crate) fn parse(n: Node, name: String) -> String {
|
|
assert!(n.tag_name().name() == name);
|
|
|
|
assert_eq!(n.children().count(), 1);
|
|
|
|
n.text().unwrap().into()
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct Absatz {
|
|
content: String,
|
|
}
|
|
impl Absatz {
|
|
pub(crate) fn test(n: &Node) -> bool {
|
|
n.tag_name().name() == "absatz"
|
|
}
|
|
pub(crate) fn test_with_typ(n: &Node, typ: &str) -> bool {
|
|
n.tag_name().name() == "absatz" && n.attribute("typ") == Some(typ)
|
|
}
|
|
|
|
pub(crate) fn parse(n: Node) -> Self {
|
|
assert!(n.tag_name().name() == "absatz");
|
|
|
|
if let Some(text) = n.text() {
|
|
Self {
|
|
content: text.into(),
|
|
}
|
|
} else {
|
|
Self { content: "".into() }
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct Ueberschrift {
|
|
typ: String,
|
|
content: String,
|
|
}
|
|
impl Ueberschrift {
|
|
fn test(n: &Node, typ: &str) -> bool {
|
|
n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == typ
|
|
}
|
|
|
|
pub(crate) fn parse(n: Node, typ: &str) -> Self {
|
|
assert!(n.tag_name().name() == "ueberschrift");
|
|
|
|
assert_eq!(n.attribute("typ").unwrap(), typ);
|
|
|
|
Self {
|
|
content: n.text().unwrap().into(),
|
|
typ: typ.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct Kzinhalt;
|
|
impl Kzinhalt {
|
|
pub(crate) fn parse(n: Node) -> Self {
|
|
assert!(n.tag_name().name() == "kzinhalt");
|
|
//TODO parse if necessary
|
|
Self {}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct Fzinhalt;
|
|
impl Fzinhalt {
|
|
pub(crate) fn parse(n: Node) -> Self {
|
|
assert!(n.tag_name().name() == "fzinhalt");
|
|
//TODO parse if necessary
|
|
Self {}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct Layoutdaten;
|
|
impl Layoutdaten {
|
|
pub(crate) fn parse(n: Node) -> Self {
|
|
assert!(n.tag_name().name() == "layoutdaten");
|
|
|
|
assert_eq!(n.children().next(), None);
|
|
|
|
Self {}
|
|
}
|
|
}
|