492 lines
12 KiB
Rust

// Copyright (C) 2024 Philipp Hofer
//
//
// Licensed under the EUPL, Version 1.2 or - as soon they will be approved by
// the European Commission - subsequent versions of the EUPL (the "Licence").
// You may not use this work except in compliance with the Licence.
//
// You should have received a copy of the European Union Public License along
// with this program. If not, you may obtain a copy of the Licence at:
// <https://joinup.ec.europa.eu/software/page/eupl>
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Licence is distributed on an "AS IS" basis,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the Licence for the specific language governing permissions and
// limitations under the Licence.
mod absatz;
mod abschnitt;
mod liste;
mod table;
use std::{fmt::Display, iter::Peekable};
use abschnitt::Abschnitt;
use liste::Liste;
use roxmltree::{Children, Node};
use crate::{
law::{self, Content},
misc::Error,
paragraph::parser::absatz::Absatz,
};
struct Expect<'a> {
node: &'a Node<'a, 'a>,
}
impl<'a> From<&'a Node<'a, 'a>> for Expect<'a> {
fn from(node: &'a Node<'a, 'a>) -> Self {
Expect { node }
}
}
impl<'a> Expect<'a> {
fn tag(self, value: &str) -> Self {
assert!(
self.node.tag_name().name() == value,
"Expected tag '{value}', got {} ({})",
self.node.tag_name().name(),
self,
);
self
}
fn amount_children(self, value: usize) -> Self {
assert!(
self.node.children().count() == value,
"Expected {value} children elements for tag '{value}', got {} ({})",
self.node.children().count(),
self
);
self
}
fn typ(self, value: &str) -> Self {
if let Some(typ) = self.node.attribute("typ") {
assert!(
typ == value,
"Expected 'typ' attribute to have value {value}, got {typ} on node ({self})"
);
} else {
panic!("Expected 'typ' attribute on {self}");
}
self
}
fn empty(next: Option<Node<'_, '_>>) {
if let Some(n) = next {
let expect = Expect::from(&n);
panic!("Expected no more elements, got {expect}");
}
}
}
impl Display for Expect<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&format!(
"tag: {}, content: {:?}",
self.node.tag_name().name(),
self.node.text()
))
.unwrap();
Ok(())
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Risdok {}
impl Risdok {
pub(crate) fn parse(n: Node, builder: &mut law::Builder) -> bool {
Expect::from(&n).tag("risdok");
let mut c = n.children();
Metadaten::parse(c.next().unwrap());
let nutzdaten = Nutzdaten::parse(c.next().unwrap(), builder);
if !nutzdaten {
return false;
}
Layoutdaten::parse(c.next().unwrap());
Expect::empty(c.next());
true
}
pub(crate) fn from_str(xml: &str, builder: &mut law::Builder) -> Result<bool, Error> {
let doc = roxmltree::Document::parse(xml)?;
let root = doc.root();
assert_eq!(root.children().count(), 1);
Ok(Self::parse(root.children().next().unwrap(), builder))
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Metadaten;
impl Metadaten {
pub(crate) fn parse(n: Node) -> Self {
Expect::from(&n).tag("metadaten");
Expect::empty(n.children().next());
Self {}
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Nutzdaten {}
impl Nutzdaten {
pub(crate) fn parse(n: Node, builder: &mut law::Builder) -> bool {
Expect::from(&n).tag("nutzdaten");
let mut c = n.children();
let ret = Abschnitt::parse(c.next().unwrap(), builder);
Expect::empty(c.next());
ret.cont
}
}
#[derive(Debug, PartialEq, Clone)]
pub(crate) struct Symbol {
stellen: String,
content: String,
}
impl Symbol {
pub(crate) fn parse(n: Node) -> Self {
Expect::from(&n).tag("symbol").amount_children(1);
let stellen = n.attribute("stellen").unwrap().into();
let content = n.text().unwrap().into();
Self { stellen, content }
}
}
#[derive(Debug, PartialEq, Clone)]
pub(crate) struct Listelem {
symbol: Symbol,
text: String,
}
impl Listelem {
pub(crate) fn parse(n: Node) -> Self {
Expect::from(&n).tag("listelem");
let mut c = n.children();
let symbol = Symbol::parse(c.next().unwrap());
let text = if let Some(c) = c.next() {
c.text().unwrap().into()
} else {
String::new()
};
Expect::empty(c.next());
Self { symbol, text }
}
}
#[derive(Debug, PartialEq, Clone)]
pub(crate) struct Ziffernliste {
ebene: usize,
listelems: Vec<Listelem>,
sublist: Option<Box<Ziffernliste>>,
schlussteile: Vec<Schlussteil>,
}
impl Ziffernliste {
pub(crate) fn test(n: &Node) -> bool {
// strichliste -> § 194b FSG
[
"ziffernliste",
"aufzaehlung",
"literaliste",
"strichliste",
"erlliste",
]
.contains(&n.tag_name().name())
}
pub(crate) fn test_with_level(n: &Node, level: usize) -> bool {
match n.attribute("ebene") {
Some(ebene) => Self::test(n) && ebene == level.to_string(),
None => false,
}
}
pub(crate) fn parse(c: &mut Peekable<Children>) -> Self {
let n = c.next().unwrap();
assert!(Self::test(&n));
let ebene = n
.attribute("ebene")
.unwrap_or("1")
.parse::<usize>()
.unwrap();
let mut listelems = Vec::new();
for child in n.children() {
listelems.push(Listelem::parse(child));
}
// If next element is ebene + 1 -> part of this list
let mut sublist: Option<Box<Ziffernliste>> = None;
while let Some(child) = c.peek() {
if Ziffernliste::test_with_level(child, ebene + 1) {
sublist = Some(Box::new(Ziffernliste::parse(c)));
} else {
break;
}
}
let mut schlussteile = Vec::new();
while let Some(child) = c.peek() {
if Schlussteil::test_with_ebene(child, ebene) {
schlussteile.push(Schlussteil::parse(c.next().unwrap()));
} else {
break;
}
}
Self {
ebene,
listelems,
sublist,
schlussteile,
}
}
pub(crate) fn get_content(&self) -> Content {
let mut elems = Vec::new();
for elem in &self.listelems {
elems.push(Content::Text(format!(
"{} {}",
elem.symbol.content, elem.text
)));
}
if let Some(sublist) = &self.sublist {
let sublist = *sublist.clone();
elems.push(sublist.get_content());
}
for schlussteil in &self.schlussteile {
elems.push(Content::Text(schlussteil.content.clone()));
}
if self.schlussteile.is_empty() {
Content::List(elems)
} else {
Content::List(vec![Content::Multi(elems)])
}
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Td {
absatz: Absatz,
}
impl Td {
/// Returns `None` if td doesn't contain anything. used e.g. in § 71b to style table...
pub(crate) fn parse(n: &Node) -> Option<Self> {
Expect::from(n).tag("td");
let mut c = n.children();
let Some(next) = c.next() else { return None };
let absatz = Absatz::parse(next);
Expect::empty(c.next());
Some(Self { absatz })
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Tr {
tds: Vec<Td>,
}
impl Tr {
pub(crate) fn parse(n: &Node) -> Self {
Expect::from(n).tag("tr");
let mut tds = Vec::new();
for child in n.children() {
if let Some(td) = Td::parse(&child) {
tds.push(td);
}
}
Self { tds }
}
}
#[derive(Debug, PartialEq, Clone)]
pub(crate) struct Schlussteil {
pub(crate) content: String,
}
impl Schlussteil {
pub(crate) fn test(n: &Node) -> bool {
(n.tag_name().name() == "schlussteil" || n.tag_name().name() == "schluss")
&& n.children().count() == 1
}
pub(crate) fn test_with_ebene(n: &Node, level: usize) -> bool {
match n.attribute("ebene") {
Some(ebene) => Self::test(n) && ebene == level.to_string(),
None => false,
}
}
pub(crate) fn parse(n: Node) -> Self {
assert!(Self::test(&n));
let content = n.children().next().unwrap().text().unwrap().into(); //not sure
Self { content }
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct AbsatzAbs {
gldsym: Option<String>,
content: String,
}
impl AbsatzAbs {
pub(crate) fn test(n: &Node) -> bool {
n.tag_name().name() == "absatz" && n.attribute("typ").unwrap() == "abs"
}
pub(crate) fn parse(n: Node) -> Self {
assert!(Self::test(&n));
let mut c = n.children().peekable();
let gldsym = match c.peek() {
Some(child) => {
if Leaf::test(child, "gldsym") {
Some(Leaf::parse(c.next().unwrap(), "gldsym"))
} else {
None
}
}
None => None,
};
let ret = Self {
gldsym,
content: c.next().unwrap().text().unwrap().trim().into(),
};
Expect::empty(c.next());
ret
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Leaf {
content: String,
}
impl Leaf {
pub(crate) fn test(n: &Node, name: &str) -> bool {
n.tag_name().name() == name && n.children().count() == 1
}
pub(crate) fn parse(n: Node, name: &str) -> String {
Expect::from(&n).tag(name).amount_children(1);
n.text().unwrap().into()
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Ueberschrift {
typ: String,
content: String,
}
impl Ueberschrift {
fn test(n: &Node, typ: &str) -> bool {
n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == typ
}
pub(crate) fn parse(n: Node, typ: &str) -> Self {
Expect::from(&n).tag("ueberschrift").typ(typ);
Self {
content: n.text().unwrap().into(),
typ: typ.into(),
}
}
fn parse_full_erll(n: &mut Peekable<Children>) -> Content {
let mut ret = Vec::new();
let mut curr = Vec::new();
// We need at least 1 erll
curr.push(Content::Text(
Self::parse(n.next().unwrap(), "erll").content,
));
while let Some(child) = &mut n.peek() {
if Absatz::test_with_typ(child, "abbobj") {
curr.push(Absatz::parse_abbobj(n.next().unwrap()));
} else if Liste::test(child) {
curr.push(Content::List(Liste::parse_full(n).content));
} else if Absatz::test_with_typ(child, "abs") {
let (_, absatz) = Absatz::parse_full(n);
curr.push(absatz);
} else if Ueberschrift::test(child, "erll") {
ret.push(Content::Multi(curr));
curr = vec![Content::Text(
Self::parse(n.next().unwrap(), "erll").content,
)];
} else {
break;
}
}
Content::List(ret)
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Kzinhalt;
impl Kzinhalt {
pub(crate) fn parse(n: Node) -> Self {
Expect::from(&n).tag("kzinhalt");
//TODO parse if necessary
Self {}
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Fzinhalt;
impl Fzinhalt {
pub(crate) fn parse(n: Node) -> Self {
Expect::from(&n).tag("fzinhalt");
//TODO parse if necessary
Self {}
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Layoutdaten;
impl Layoutdaten {
pub(crate) fn parse(n: Node) -> Self {
Expect::from(&n).tag("layoutdaten");
Expect::empty(n.children().next());
Self {}
}
}