Compare commits
2 Commits
136b72c989
...
eff5c546c8
Author | SHA1 | Date | |
---|---|---|---|
eff5c546c8 | |||
d46212ba42 |
@ -42,6 +42,11 @@ name = "dash"
|
||||
is_root = false
|
||||
match_function = "starts_with_dash"
|
||||
|
||||
[[law.classifiers]]
|
||||
name = "Artikel"
|
||||
is_root = false
|
||||
match_function = "contains"
|
||||
|
||||
[parser]
|
||||
remove_strings = [
|
||||
"<tab />",
|
||||
|
@ -154,19 +154,19 @@ impl Abschnitt {
|
||||
return;
|
||||
}
|
||||
if Ueberschrift::test(child, "g1") {
|
||||
let (ueberschrift, para_header) = Ueberschrift::parse(c, "g1");
|
||||
let (ueberschrift, para_header) = Ueberschrift::parse_full(c, "g1");
|
||||
builder.new_header(&ueberschrift.content);
|
||||
if let Some(para_header) = para_header {
|
||||
builder.new_next_para_header(¶_header.content);
|
||||
}
|
||||
} else if Ueberschrift::test(child, "g2") {
|
||||
let (ueberschrift, para_header) = Ueberschrift::parse(c, "g2");
|
||||
let (ueberschrift, para_header) = Ueberschrift::parse_full(c, "g2");
|
||||
builder.new_desc(&ueberschrift.content);
|
||||
if let Some(para_header) = para_header {
|
||||
builder.new_next_para_header(¶_header.content);
|
||||
}
|
||||
} else if Ueberschrift::test(child, "g1min") {
|
||||
let (ueberschrift, para_header) = Ueberschrift::parse(c, "g1min");
|
||||
let (ueberschrift, para_header) = Ueberschrift::parse_full(c, "g1min");
|
||||
builder.new_header(&ueberschrift.content);
|
||||
if let Some(para_header) = para_header {
|
||||
builder.new_next_para_header(¶_header.content);
|
||||
|
@ -355,8 +355,7 @@ impl Ueberschrift {
|
||||
n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == typ
|
||||
}
|
||||
|
||||
pub(crate) fn parse(c: &mut Peekable<Children>, typ: &str) -> (Self, Option<Self>) {
|
||||
let n = c.next().unwrap();
|
||||
pub(crate) fn parse(n: Node, typ: &str) -> Self {
|
||||
Expect::from(&n).tag("ueberschrift");
|
||||
|
||||
assert_eq!(n.attribute("typ").unwrap(), typ);
|
||||
@ -456,6 +455,86 @@ impl Ueberschrift {
|
||||
}
|
||||
}
|
||||
|
||||
(art, next_para_header)
|
||||
}
|
||||
pub(crate) fn parse_full(c: &mut Peekable<Children>, typ: &str) -> (Self, Option<Self>) {
|
||||
let n = c.next().unwrap();
|
||||
Expect::from(&n).tag("ueberschrift");
|
||||
|
||||
assert_eq!(n.attribute("typ").unwrap(), typ);
|
||||
|
||||
let mut ret = Self {
|
||||
content: n.text().unwrap().into(),
|
||||
typ: typ.into(),
|
||||
};
|
||||
|
||||
let mut next_para_header = None;
|
||||
|
||||
if ret.content != "Text" {
|
||||
if let Some(child) = c.peek() {
|
||||
if Ueberschrift::test(child, "art") {
|
||||
let next_para = Ueberschrift::parse(c.next().unwrap(), "art");
|
||||
next_para_header = Some(next_para);
|
||||
} else if typ == "g1" && ret.content.starts_with("Artikel ") {
|
||||
// wrongly tagged
|
||||
// artikel
|
||||
|
||||
// TODO: remove code duplication from further down
|
||||
let mut clone = c.clone();
|
||||
if let Some(first_child) = clone.next() {
|
||||
if let Some(second_child) = clone.next() {
|
||||
if Ueberschrift::test(&first_child, "para")
|
||||
&& Ueberschrift::test(&second_child, "para")
|
||||
{
|
||||
let first_para = Ueberschrift::parse(c.next().unwrap(), "para");
|
||||
let second_para = Ueberschrift::parse(c.next().unwrap(), "para");
|
||||
next_para_header = Some(Ueberschrift {
|
||||
typ: first_para.typ,
|
||||
content: format!(
|
||||
"{} - {}",
|
||||
first_para.content, second_para.content
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if typ == "art" && !ret.content.starts_with("Artikel") {
|
||||
ret.content = format!("Artikel {}", ret.content);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Parsed Ueberschrift {ret:#?}");
|
||||
|
||||
(ret, next_para_header)
|
||||
}
|
||||
|
||||
fn parse_art(n: &mut Peekable<Children>) -> (Self, Option<Self>) {
|
||||
let art = Self::parse(n.next().unwrap(), "art");
|
||||
let mut next_para_header = None;
|
||||
|
||||
let mut clone = n.clone();
|
||||
if let Some(first_child) = clone.next() {
|
||||
if let Some(second_child) = clone.next() {
|
||||
if Ueberschrift::test(&first_child, "para")
|
||||
&& Ueberschrift::test(&second_child, "para")
|
||||
{
|
||||
let first_para = Ueberschrift::parse(n.next().unwrap(), "para");
|
||||
let second_para = Ueberschrift::parse(n.next().unwrap(), "para");
|
||||
next_para_header = Some(Ueberschrift {
|
||||
typ: first_para.typ,
|
||||
content: format!("{} - {}", first_para.content, second_para.content),
|
||||
});
|
||||
} else if Ueberschrift::test(&first_child, "para") {
|
||||
let first_para = Ueberschrift::parse(n.next().unwrap(), "para");
|
||||
next_para_header = Some(Ueberschrift {
|
||||
typ: first_para.typ,
|
||||
content: format!("{}", first_para.content),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(art, next_para_header)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user