This commit is contained in:
parent
90d9982add
commit
ac20dfcb48
@ -8,15 +8,12 @@ use crate::paragraph::parser::{
|
||||
Absatz, AbsatzAbs, Content, Fzinhalt, Kzinhalt, Liste, Table, Ueberschrift,
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
#[derive(Default)]
|
||||
#[derive(Debug, PartialEq, Default)]
|
||||
pub(crate) struct Abschnitt {
|
||||
metadata: HashMap<String, String>,
|
||||
pub(crate) cont: bool,
|
||||
}
|
||||
|
||||
|
||||
|
||||
impl Abschnitt {
|
||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Abschnitt {
|
||||
assert!(n.tag_name().name() == "abschnitt");
|
||||
@ -26,53 +23,12 @@ impl Abschnitt {
|
||||
|
||||
Self::skip_static_fields(&mut c);
|
||||
|
||||
ret.handle_headers(&mut c, builder);
|
||||
ret.handle_metadata(&mut c, builder);
|
||||
|
||||
while let Some(child) = c.peek() {
|
||||
// Schiffahrtsgesetz: stop @ anlagen (for now)
|
||||
if Ueberschrift::test(child, "anlage") {
|
||||
return ret;
|
||||
}
|
||||
if Ueberschrift::test(child, "g1") {
|
||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1");
|
||||
if ueberschrift.content.trim().starts_with("Artikel") {
|
||||
return ret;
|
||||
}
|
||||
builder.new_header(&ueberschrift.content);
|
||||
} else if Ueberschrift::test(child, "g2") {
|
||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g2");
|
||||
builder.new_desc(&ueberschrift.content);
|
||||
} else if Ueberschrift::test(child, "g1min") {
|
||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1min");
|
||||
builder.new_header(&ueberschrift.content);
|
||||
} else if Ueberschrift::test(child, "art") {
|
||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "art");
|
||||
if ueberschrift.content.trim().starts_with("Artikel") {
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
if !ret.handle_headers(&mut c, builder) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if let Some(child) = c.peek() {
|
||||
if Ueberschrift::test(child, "para") {
|
||||
builder
|
||||
.new_next_para_header(&Ueberschrift::parse(c.next().unwrap(), "para").content);
|
||||
}
|
||||
}
|
||||
// e.g. § 405 abgb has two para (of diseased paragraph)
|
||||
if let Some(child) = c.peek() {
|
||||
if Ueberschrift::test(child, "para") {
|
||||
builder
|
||||
.new_next_para_header(&Ueberschrift::parse(c.next().unwrap(), "para").content);
|
||||
}
|
||||
}
|
||||
|
||||
// We have 2 tasks
|
||||
// 1) Get paragraph id
|
||||
// 2) Get content
|
||||
|
||||
let mut absatze = Vec::new();
|
||||
let absatz = AbsatzAbs::parse(c.next().expect("We need at least one 'Absatz'"));
|
||||
let par_id = absatz
|
||||
@ -194,7 +150,7 @@ impl Abschnitt {
|
||||
|
||||
// There are paragraph-specific meta-data at the top of each xml file. We parse those. When we
|
||||
// encounter the title "Text" the real content starts, we stop parsing meta data.
|
||||
fn handle_headers(&mut self, c: &mut Peekable<Children>, builder: &mut LawBuilder) {
|
||||
fn handle_metadata(&mut self, c: &mut Peekable<Children>, builder: &mut LawBuilder) {
|
||||
loop {
|
||||
let key = Ueberschrift::parse(c.next().unwrap(), "titel").content;
|
||||
|
||||
@ -230,4 +186,47 @@ impl Abschnitt {
|
||||
Fzinhalt::parse(node.next().unwrap()); // "www.ris.bka.gv.at" and "Seite X von Y"
|
||||
Fzinhalt::parse(node.next().unwrap()); // "www.ris.bka.gv.at" and "Seite X von Y"
|
||||
}
|
||||
|
||||
// we have optionally headers. Such as "Einleitung", "Von den bürgerlichen Gesetzen üerhaupt,"
|
||||
// etc. If we have headers which indicate that we are done and we want to stop parsing
|
||||
// ("anlage" + "Artikel" we indicate this wish by returning false.
|
||||
fn handle_headers(&self, c: &mut Peekable<Children>, builder: &mut LawBuilder) -> bool {
|
||||
while let Some(child) = c.peek() {
|
||||
// Schiffahrtsgesetz: stop @ anlagen (for now)
|
||||
if Ueberschrift::test(child, "anlage") {
|
||||
return false;
|
||||
}
|
||||
if Ueberschrift::test(child, "g1") {
|
||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1");
|
||||
if ueberschrift.content.trim().starts_with("Artikel") {
|
||||
return false;
|
||||
}
|
||||
builder.new_header(&ueberschrift.content);
|
||||
} else if Ueberschrift::test(child, "g2") {
|
||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g2");
|
||||
builder.new_desc(&ueberschrift.content);
|
||||
} else if Ueberschrift::test(child, "g1min") {
|
||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1min");
|
||||
builder.new_header(&ueberschrift.content);
|
||||
} else if Ueberschrift::test(child, "art") {
|
||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "art");
|
||||
if ueberschrift.content.trim().starts_with("Artikel") {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// while (not if) because we can have two subsequent paraheaders (e.g. § 405 abgb)
|
||||
while let Some(child) = c.peek() {
|
||||
if Ueberschrift::test(child, "para") {
|
||||
builder
|
||||
.new_next_para_header(&Ueberschrift::parse(c.next().unwrap(), "para").content);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user