This commit is contained in:
parent
d07bc726b9
commit
76ddf9796b
72
src/paragraph/parser/absatz.rs
Normal file
72
src/paragraph/parser/absatz.rs
Normal file
@ -0,0 +1,72 @@
|
||||
use std::iter::Peekable;
|
||||
|
||||
use roxmltree::{Children, Node};
|
||||
|
||||
use crate::law::Content;
|
||||
|
||||
use super::{liste::Liste, table::Table, AbsatzAbs, Expect};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Absatz {
|
||||
pub(crate) content: String,
|
||||
pub(crate) typ: String,
|
||||
}
|
||||
impl Absatz {
|
||||
pub(crate) fn test_with_typ(n: &Node, typ: &str) -> bool {
|
||||
n.tag_name().name() == "absatz" && n.attribute("typ") == Some(typ)
|
||||
}
|
||||
|
||||
// Parses one logical 'Absatz'. If there's a List or Table after the Absatz, RIS assumes this
|
||||
// one to be included in the paragraph
|
||||
//
|
||||
// # Returns
|
||||
// - String: (optional) paragraph id
|
||||
// - Content: content of the paragraph
|
||||
pub(crate) fn parse_full(c: &mut Peekable<Children>) -> (Option<String>, Content) {
|
||||
let absatz = AbsatzAbs::parse(c.next().unwrap());
|
||||
let par_id = absatz.gldsym;
|
||||
|
||||
let mut content = Vec::new();
|
||||
content.push(Content::Text(absatz.content));
|
||||
|
||||
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
|
||||
while let Some(child) = c.peek() {
|
||||
if Liste::test(child) {
|
||||
content.push(Liste::parse_full(c).get_content())
|
||||
} else if Table::test(child) {
|
||||
// If there's a "table" after an "absatz", the "table" should be part of the "absatz"
|
||||
let table = Table::parse_full(c);
|
||||
content.extend(table.iter().cloned());
|
||||
} else if Absatz::test_with_typ(child, "satz")
|
||||
|| Absatz::test_with_typ(child, "erltext")
|
||||
{
|
||||
// After a 'absatz' there can be a '<absatz typ="[satz|erltext]"' which should be part of the first absatz
|
||||
// (e.g. 1209 ABGB)
|
||||
content.push(Content::Text(Absatz::parse(c.next().unwrap()).content))
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if content.len() == 1 {
|
||||
(par_id, content[0].clone())
|
||||
} else {
|
||||
(par_id, Content::List(content))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn parse(n: Node) -> Self {
|
||||
Expect::from(&n).tag("absatz");
|
||||
|
||||
let typ = n.attribute("typ").unwrap().into();
|
||||
|
||||
let mut content = String::new();
|
||||
// Get text from this element + all direct childs
|
||||
for c in n.children() {
|
||||
if let Some(text) = c.text() {
|
||||
content.push_str(text);
|
||||
}
|
||||
}
|
||||
|
||||
Self { content, typ }
|
||||
}
|
||||
}
|
@ -4,10 +4,8 @@ use std::iter::Peekable;
|
||||
use roxmltree::{Children, Node};
|
||||
|
||||
use crate::law::LawBuilder;
|
||||
use crate::paragraph::parser::liste::Liste;
|
||||
use crate::paragraph::parser::{Absatz, AbsatzAbs, Content, Fzinhalt, Kzinhalt, Ueberschrift};
|
||||
|
||||
use super::table::Table;
|
||||
use crate::paragraph::parser::absatz::Absatz;
|
||||
use crate::paragraph::parser::{AbsatzAbs, Content, Fzinhalt, Kzinhalt, Ueberschrift};
|
||||
|
||||
#[derive(Debug, PartialEq, Default)]
|
||||
pub(crate) struct Abschnitt {
|
||||
@ -33,7 +31,7 @@ impl Abschnitt {
|
||||
let mut absatze = Vec::new();
|
||||
|
||||
// Special handling of first paragraph (needs id)...
|
||||
let (par_id, first_abs) = ret.parse_absatz(&mut c);
|
||||
let (par_id, first_abs) = Absatz::parse_full(&mut c);
|
||||
let par_id = match par_id {
|
||||
Some(par_id) => par_id,
|
||||
None => panic!("First paragraph needs to have an id, not found"),
|
||||
@ -43,7 +41,7 @@ impl Abschnitt {
|
||||
// ... and then there can be as many 'Absätze' as our law-setter wants
|
||||
while let Some(child) = c.peek() {
|
||||
if AbsatzAbs::test(child) {
|
||||
let (_, absatz) = ret.parse_absatz(&mut c);
|
||||
let (_, absatz) = Absatz::parse_full(&mut c);
|
||||
absatze.push(absatz);
|
||||
} else {
|
||||
break;
|
||||
@ -168,42 +166,4 @@ impl Abschnitt {
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
// Parses one logical 'Absatz'. If there's a List or Table after the Absatz, RIS assumes this
|
||||
// one to be included in the paragraph
|
||||
//
|
||||
// # Returns
|
||||
// - String: (optional) paragraph id
|
||||
// - Content: content of the paragraph
|
||||
fn parse_absatz(&self, c: &mut Peekable<Children>) -> (Option<String>, Content) {
|
||||
let absatz = AbsatzAbs::parse(c.next().unwrap());
|
||||
let par_id = absatz.gldsym;
|
||||
|
||||
let mut content = Vec::new();
|
||||
content.push(Content::Text(absatz.content));
|
||||
|
||||
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
|
||||
while let Some(child) = c.peek() {
|
||||
if Liste::test(child) {
|
||||
content.push(Liste::parse_full(c).get_content())
|
||||
} else if Table::test(child) {
|
||||
// If there's a "table" after an "absatz", the "table" should be part of the "absatz"
|
||||
let table = Table::parse_full(c);
|
||||
content.extend(table.iter().cloned());
|
||||
} else if Absatz::test_with_typ(child, "satz")
|
||||
|| Absatz::test_with_typ(child, "erltext")
|
||||
{
|
||||
// After a 'absatz' there can be a '<absatz typ="[satz|erltext]"' which should be part of the first absatz
|
||||
// (e.g. 1209 ABGB)
|
||||
content.push(Content::Text(Absatz::parse(c.next().unwrap()).content))
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if content.len() == 1 {
|
||||
(par_id, content[0].clone())
|
||||
} else {
|
||||
(par_id, Content::List(content))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -14,6 +14,7 @@
|
||||
// See the Licence for the specific language governing permissions and
|
||||
// limitations under the Licence.
|
||||
|
||||
mod absatz;
|
||||
mod abschnitt;
|
||||
mod liste;
|
||||
mod table;
|
||||
@ -24,6 +25,7 @@ use roxmltree::Node;
|
||||
use crate::{
|
||||
law::{Content, LawBuilder},
|
||||
misc::Error,
|
||||
paragraph::parser::absatz::Absatz,
|
||||
};
|
||||
|
||||
struct Expect<'a> {
|
||||
@ -289,34 +291,6 @@ impl Leaf {
|
||||
n.text().unwrap().into()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Absatz {
|
||||
content: String,
|
||||
typ: String,
|
||||
}
|
||||
impl Absatz {
|
||||
pub(crate) fn test_with_typ(n: &Node, typ: &str) -> bool {
|
||||
n.tag_name().name() == "absatz" && n.attribute("typ") == Some(typ)
|
||||
}
|
||||
|
||||
pub(crate) fn parse(n: Node) -> Self {
|
||||
Expect::from(&n).tag("absatz");
|
||||
|
||||
let typ = n.attribute("typ").unwrap().into();
|
||||
|
||||
let mut content = String::new();
|
||||
// Get text from this element + all direct childs
|
||||
for c in n.children() {
|
||||
if let Some(text) = c.text() {
|
||||
content.push_str(text);
|
||||
}
|
||||
}
|
||||
|
||||
Self { content, typ }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Ueberschrift {
|
||||
typ: String,
|
||||
|
Loading…
Reference in New Issue
Block a user