This commit is contained in:
parent
90d9982add
commit
ac20dfcb48
@ -8,15 +8,12 @@ use crate::paragraph::parser::{
|
|||||||
Absatz, AbsatzAbs, Content, Fzinhalt, Kzinhalt, Liste, Table, Ueberschrift,
|
Absatz, AbsatzAbs, Content, Fzinhalt, Kzinhalt, Liste, Table, Ueberschrift,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq, Default)]
|
||||||
#[derive(Default)]
|
|
||||||
pub(crate) struct Abschnitt {
|
pub(crate) struct Abschnitt {
|
||||||
metadata: HashMap<String, String>,
|
metadata: HashMap<String, String>,
|
||||||
pub(crate) cont: bool,
|
pub(crate) cont: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
impl Abschnitt {
|
impl Abschnitt {
|
||||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Abschnitt {
|
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Abschnitt {
|
||||||
assert!(n.tag_name().name() == "abschnitt");
|
assert!(n.tag_name().name() == "abschnitt");
|
||||||
@ -26,53 +23,12 @@ impl Abschnitt {
|
|||||||
|
|
||||||
Self::skip_static_fields(&mut c);
|
Self::skip_static_fields(&mut c);
|
||||||
|
|
||||||
ret.handle_headers(&mut c, builder);
|
ret.handle_metadata(&mut c, builder);
|
||||||
|
|
||||||
while let Some(child) = c.peek() {
|
if !ret.handle_headers(&mut c, builder) {
|
||||||
// Schiffahrtsgesetz: stop @ anlagen (for now)
|
return ret;
|
||||||
if Ueberschrift::test(child, "anlage") {
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
if Ueberschrift::test(child, "g1") {
|
|
||||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1");
|
|
||||||
if ueberschrift.content.trim().starts_with("Artikel") {
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
builder.new_header(&ueberschrift.content);
|
|
||||||
} else if Ueberschrift::test(child, "g2") {
|
|
||||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g2");
|
|
||||||
builder.new_desc(&ueberschrift.content);
|
|
||||||
} else if Ueberschrift::test(child, "g1min") {
|
|
||||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1min");
|
|
||||||
builder.new_header(&ueberschrift.content);
|
|
||||||
} else if Ueberschrift::test(child, "art") {
|
|
||||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "art");
|
|
||||||
if ueberschrift.content.trim().starts_with("Artikel") {
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(child) = c.peek() {
|
|
||||||
if Ueberschrift::test(child, "para") {
|
|
||||||
builder
|
|
||||||
.new_next_para_header(&Ueberschrift::parse(c.next().unwrap(), "para").content);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// e.g. § 405 abgb has two para (of diseased paragraph)
|
|
||||||
if let Some(child) = c.peek() {
|
|
||||||
if Ueberschrift::test(child, "para") {
|
|
||||||
builder
|
|
||||||
.new_next_para_header(&Ueberschrift::parse(c.next().unwrap(), "para").content);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// We have 2 tasks
|
|
||||||
// 1) Get paragraph id
|
|
||||||
// 2) Get content
|
|
||||||
|
|
||||||
let mut absatze = Vec::new();
|
let mut absatze = Vec::new();
|
||||||
let absatz = AbsatzAbs::parse(c.next().expect("We need at least one 'Absatz'"));
|
let absatz = AbsatzAbs::parse(c.next().expect("We need at least one 'Absatz'"));
|
||||||
let par_id = absatz
|
let par_id = absatz
|
||||||
@ -194,7 +150,7 @@ impl Abschnitt {
|
|||||||
|
|
||||||
// There are paragraph-specific meta-data at the top of each xml file. We parse those. When we
|
// There are paragraph-specific meta-data at the top of each xml file. We parse those. When we
|
||||||
// encounter the title "Text" the real content starts, we stop parsing meta data.
|
// encounter the title "Text" the real content starts, we stop parsing meta data.
|
||||||
fn handle_headers(&mut self, c: &mut Peekable<Children>, builder: &mut LawBuilder) {
|
fn handle_metadata(&mut self, c: &mut Peekable<Children>, builder: &mut LawBuilder) {
|
||||||
loop {
|
loop {
|
||||||
let key = Ueberschrift::parse(c.next().unwrap(), "titel").content;
|
let key = Ueberschrift::parse(c.next().unwrap(), "titel").content;
|
||||||
|
|
||||||
@ -230,4 +186,47 @@ impl Abschnitt {
|
|||||||
Fzinhalt::parse(node.next().unwrap()); // "www.ris.bka.gv.at" and "Seite X von Y"
|
Fzinhalt::parse(node.next().unwrap()); // "www.ris.bka.gv.at" and "Seite X von Y"
|
||||||
Fzinhalt::parse(node.next().unwrap()); // "www.ris.bka.gv.at" and "Seite X von Y"
|
Fzinhalt::parse(node.next().unwrap()); // "www.ris.bka.gv.at" and "Seite X von Y"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// we have optionally headers. Such as "Einleitung", "Von den bürgerlichen Gesetzen üerhaupt,"
|
||||||
|
// etc. If we have headers which indicate that we are done and we want to stop parsing
|
||||||
|
// ("anlage" + "Artikel" we indicate this wish by returning false.
|
||||||
|
fn handle_headers(&self, c: &mut Peekable<Children>, builder: &mut LawBuilder) -> bool {
|
||||||
|
while let Some(child) = c.peek() {
|
||||||
|
// Schiffahrtsgesetz: stop @ anlagen (for now)
|
||||||
|
if Ueberschrift::test(child, "anlage") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if Ueberschrift::test(child, "g1") {
|
||||||
|
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1");
|
||||||
|
if ueberschrift.content.trim().starts_with("Artikel") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
builder.new_header(&ueberschrift.content);
|
||||||
|
} else if Ueberschrift::test(child, "g2") {
|
||||||
|
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g2");
|
||||||
|
builder.new_desc(&ueberschrift.content);
|
||||||
|
} else if Ueberschrift::test(child, "g1min") {
|
||||||
|
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1min");
|
||||||
|
builder.new_header(&ueberschrift.content);
|
||||||
|
} else if Ueberschrift::test(child, "art") {
|
||||||
|
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "art");
|
||||||
|
if ueberschrift.content.trim().starts_with("Artikel") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// while (not if) because we can have two subsequent paraheaders (e.g. § 405 abgb)
|
||||||
|
while let Some(child) = c.peek() {
|
||||||
|
if Ueberschrift::test(child, "para") {
|
||||||
|
builder
|
||||||
|
.new_next_para_header(&Ueberschrift::parse(c.next().unwrap(), "para").content);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user