Compare commits
2 Commits
136b72c989
...
eff5c546c8
Author | SHA1 | Date | |
---|---|---|---|
eff5c546c8 | |||
d46212ba42 |
@ -42,6 +42,11 @@ name = "dash"
|
|||||||
is_root = false
|
is_root = false
|
||||||
match_function = "starts_with_dash"
|
match_function = "starts_with_dash"
|
||||||
|
|
||||||
|
[[law.classifiers]]
|
||||||
|
name = "Artikel"
|
||||||
|
is_root = false
|
||||||
|
match_function = "contains"
|
||||||
|
|
||||||
[parser]
|
[parser]
|
||||||
remove_strings = [
|
remove_strings = [
|
||||||
"<tab />",
|
"<tab />",
|
||||||
|
@ -154,19 +154,19 @@ impl Abschnitt {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if Ueberschrift::test(child, "g1") {
|
if Ueberschrift::test(child, "g1") {
|
||||||
let (ueberschrift, para_header) = Ueberschrift::parse(c, "g1");
|
let (ueberschrift, para_header) = Ueberschrift::parse_full(c, "g1");
|
||||||
builder.new_header(&ueberschrift.content);
|
builder.new_header(&ueberschrift.content);
|
||||||
if let Some(para_header) = para_header {
|
if let Some(para_header) = para_header {
|
||||||
builder.new_next_para_header(¶_header.content);
|
builder.new_next_para_header(¶_header.content);
|
||||||
}
|
}
|
||||||
} else if Ueberschrift::test(child, "g2") {
|
} else if Ueberschrift::test(child, "g2") {
|
||||||
let (ueberschrift, para_header) = Ueberschrift::parse(c, "g2");
|
let (ueberschrift, para_header) = Ueberschrift::parse_full(c, "g2");
|
||||||
builder.new_desc(&ueberschrift.content);
|
builder.new_desc(&ueberschrift.content);
|
||||||
if let Some(para_header) = para_header {
|
if let Some(para_header) = para_header {
|
||||||
builder.new_next_para_header(¶_header.content);
|
builder.new_next_para_header(¶_header.content);
|
||||||
}
|
}
|
||||||
} else if Ueberschrift::test(child, "g1min") {
|
} else if Ueberschrift::test(child, "g1min") {
|
||||||
let (ueberschrift, para_header) = Ueberschrift::parse(c, "g1min");
|
let (ueberschrift, para_header) = Ueberschrift::parse_full(c, "g1min");
|
||||||
builder.new_header(&ueberschrift.content);
|
builder.new_header(&ueberschrift.content);
|
||||||
if let Some(para_header) = para_header {
|
if let Some(para_header) = para_header {
|
||||||
builder.new_next_para_header(¶_header.content);
|
builder.new_next_para_header(¶_header.content);
|
||||||
|
@ -355,8 +355,7 @@ impl Ueberschrift {
|
|||||||
n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == typ
|
n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == typ
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn parse(c: &mut Peekable<Children>, typ: &str) -> (Self, Option<Self>) {
|
pub(crate) fn parse(n: Node, typ: &str) -> Self {
|
||||||
let n = c.next().unwrap();
|
|
||||||
Expect::from(&n).tag("ueberschrift");
|
Expect::from(&n).tag("ueberschrift");
|
||||||
|
|
||||||
assert_eq!(n.attribute("typ").unwrap(), typ);
|
assert_eq!(n.attribute("typ").unwrap(), typ);
|
||||||
@ -456,6 +455,86 @@ impl Ueberschrift {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(art, next_para_header)
|
||||||
|
}
|
||||||
|
pub(crate) fn parse_full(c: &mut Peekable<Children>, typ: &str) -> (Self, Option<Self>) {
|
||||||
|
let n = c.next().unwrap();
|
||||||
|
Expect::from(&n).tag("ueberschrift");
|
||||||
|
|
||||||
|
assert_eq!(n.attribute("typ").unwrap(), typ);
|
||||||
|
|
||||||
|
let mut ret = Self {
|
||||||
|
content: n.text().unwrap().into(),
|
||||||
|
typ: typ.into(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut next_para_header = None;
|
||||||
|
|
||||||
|
if ret.content != "Text" {
|
||||||
|
if let Some(child) = c.peek() {
|
||||||
|
if Ueberschrift::test(child, "art") {
|
||||||
|
let next_para = Ueberschrift::parse(c.next().unwrap(), "art");
|
||||||
|
next_para_header = Some(next_para);
|
||||||
|
} else if typ == "g1" && ret.content.starts_with("Artikel ") {
|
||||||
|
// wrongly tagged
|
||||||
|
// artikel
|
||||||
|
|
||||||
|
// TODO: remove code duplication from further down
|
||||||
|
let mut clone = c.clone();
|
||||||
|
if let Some(first_child) = clone.next() {
|
||||||
|
if let Some(second_child) = clone.next() {
|
||||||
|
if Ueberschrift::test(&first_child, "para")
|
||||||
|
&& Ueberschrift::test(&second_child, "para")
|
||||||
|
{
|
||||||
|
let first_para = Ueberschrift::parse(c.next().unwrap(), "para");
|
||||||
|
let second_para = Ueberschrift::parse(c.next().unwrap(), "para");
|
||||||
|
next_para_header = Some(Ueberschrift {
|
||||||
|
typ: first_para.typ,
|
||||||
|
content: format!(
|
||||||
|
"{} - {}",
|
||||||
|
first_para.content, second_para.content
|
||||||
|
),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if typ == "art" && !ret.content.starts_with("Artikel") {
|
||||||
|
ret.content = format!("Artikel {}", ret.content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
debug!("Parsed Ueberschrift {ret:#?}");
|
||||||
|
|
||||||
|
(ret, next_para_header)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_art(n: &mut Peekable<Children>) -> (Self, Option<Self>) {
|
||||||
|
let art = Self::parse(n.next().unwrap(), "art");
|
||||||
|
let mut next_para_header = None;
|
||||||
|
|
||||||
|
let mut clone = n.clone();
|
||||||
|
if let Some(first_child) = clone.next() {
|
||||||
|
if let Some(second_child) = clone.next() {
|
||||||
|
if Ueberschrift::test(&first_child, "para")
|
||||||
|
&& Ueberschrift::test(&second_child, "para")
|
||||||
|
{
|
||||||
|
let first_para = Ueberschrift::parse(n.next().unwrap(), "para");
|
||||||
|
let second_para = Ueberschrift::parse(n.next().unwrap(), "para");
|
||||||
|
next_para_header = Some(Ueberschrift {
|
||||||
|
typ: first_para.typ,
|
||||||
|
content: format!("{} - {}", first_para.content, second_para.content),
|
||||||
|
});
|
||||||
|
} else if Ueberschrift::test(&first_child, "para") {
|
||||||
|
let first_para = Ueberschrift::parse(n.next().unwrap(), "para");
|
||||||
|
next_para_header = Some(Ueberschrift {
|
||||||
|
typ: first_para.typ,
|
||||||
|
content: format!("{}", first_para.content),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
(art, next_para_header)
|
(art, next_para_header)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user