Compare commits
No commits in common. "eff5c546c886c707b0b992528a6dc1209bf49ffe" and "136b72c989840e70a342afcad712fe8c057df11d" have entirely different histories.
eff5c546c8
...
136b72c989
@ -42,11 +42,6 @@ name = "dash"
|
|||||||
is_root = false
|
is_root = false
|
||||||
match_function = "starts_with_dash"
|
match_function = "starts_with_dash"
|
||||||
|
|
||||||
[[law.classifiers]]
|
|
||||||
name = "Artikel"
|
|
||||||
is_root = false
|
|
||||||
match_function = "contains"
|
|
||||||
|
|
||||||
[parser]
|
[parser]
|
||||||
remove_strings = [
|
remove_strings = [
|
||||||
"<tab />",
|
"<tab />",
|
||||||
|
@ -154,19 +154,19 @@ impl Abschnitt {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if Ueberschrift::test(child, "g1") {
|
if Ueberschrift::test(child, "g1") {
|
||||||
let (ueberschrift, para_header) = Ueberschrift::parse_full(c, "g1");
|
let (ueberschrift, para_header) = Ueberschrift::parse(c, "g1");
|
||||||
builder.new_header(&ueberschrift.content);
|
builder.new_header(&ueberschrift.content);
|
||||||
if let Some(para_header) = para_header {
|
if let Some(para_header) = para_header {
|
||||||
builder.new_next_para_header(¶_header.content);
|
builder.new_next_para_header(¶_header.content);
|
||||||
}
|
}
|
||||||
} else if Ueberschrift::test(child, "g2") {
|
} else if Ueberschrift::test(child, "g2") {
|
||||||
let (ueberschrift, para_header) = Ueberschrift::parse_full(c, "g2");
|
let (ueberschrift, para_header) = Ueberschrift::parse(c, "g2");
|
||||||
builder.new_desc(&ueberschrift.content);
|
builder.new_desc(&ueberschrift.content);
|
||||||
if let Some(para_header) = para_header {
|
if let Some(para_header) = para_header {
|
||||||
builder.new_next_para_header(¶_header.content);
|
builder.new_next_para_header(¶_header.content);
|
||||||
}
|
}
|
||||||
} else if Ueberschrift::test(child, "g1min") {
|
} else if Ueberschrift::test(child, "g1min") {
|
||||||
let (ueberschrift, para_header) = Ueberschrift::parse_full(c, "g1min");
|
let (ueberschrift, para_header) = Ueberschrift::parse(c, "g1min");
|
||||||
builder.new_header(&ueberschrift.content);
|
builder.new_header(&ueberschrift.content);
|
||||||
if let Some(para_header) = para_header {
|
if let Some(para_header) = para_header {
|
||||||
builder.new_next_para_header(¶_header.content);
|
builder.new_next_para_header(¶_header.content);
|
||||||
|
@ -355,7 +355,8 @@ impl Ueberschrift {
|
|||||||
n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == typ
|
n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == typ
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn parse(n: Node, typ: &str) -> Self {
|
pub(crate) fn parse(c: &mut Peekable<Children>, typ: &str) -> (Self, Option<Self>) {
|
||||||
|
let n = c.next().unwrap();
|
||||||
Expect::from(&n).tag("ueberschrift");
|
Expect::from(&n).tag("ueberschrift");
|
||||||
|
|
||||||
assert_eq!(n.attribute("typ").unwrap(), typ);
|
assert_eq!(n.attribute("typ").unwrap(), typ);
|
||||||
@ -455,86 +456,6 @@ impl Ueberschrift {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
(art, next_para_header)
|
|
||||||
}
|
|
||||||
pub(crate) fn parse_full(c: &mut Peekable<Children>, typ: &str) -> (Self, Option<Self>) {
|
|
||||||
let n = c.next().unwrap();
|
|
||||||
Expect::from(&n).tag("ueberschrift");
|
|
||||||
|
|
||||||
assert_eq!(n.attribute("typ").unwrap(), typ);
|
|
||||||
|
|
||||||
let mut ret = Self {
|
|
||||||
content: n.text().unwrap().into(),
|
|
||||||
typ: typ.into(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut next_para_header = None;
|
|
||||||
|
|
||||||
if ret.content != "Text" {
|
|
||||||
if let Some(child) = c.peek() {
|
|
||||||
if Ueberschrift::test(child, "art") {
|
|
||||||
let next_para = Ueberschrift::parse(c.next().unwrap(), "art");
|
|
||||||
next_para_header = Some(next_para);
|
|
||||||
} else if typ == "g1" && ret.content.starts_with("Artikel ") {
|
|
||||||
// wrongly tagged
|
|
||||||
// artikel
|
|
||||||
|
|
||||||
// TODO: remove code duplication from further down
|
|
||||||
let mut clone = c.clone();
|
|
||||||
if let Some(first_child) = clone.next() {
|
|
||||||
if let Some(second_child) = clone.next() {
|
|
||||||
if Ueberschrift::test(&first_child, "para")
|
|
||||||
&& Ueberschrift::test(&second_child, "para")
|
|
||||||
{
|
|
||||||
let first_para = Ueberschrift::parse(c.next().unwrap(), "para");
|
|
||||||
let second_para = Ueberschrift::parse(c.next().unwrap(), "para");
|
|
||||||
next_para_header = Some(Ueberschrift {
|
|
||||||
typ: first_para.typ,
|
|
||||||
content: format!(
|
|
||||||
"{} - {}",
|
|
||||||
first_para.content, second_para.content
|
|
||||||
),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if typ == "art" && !ret.content.starts_with("Artikel") {
|
|
||||||
ret.content = format!("Artikel {}", ret.content);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
debug!("Parsed Ueberschrift {ret:#?}");
|
|
||||||
|
|
||||||
(ret, next_para_header)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_art(n: &mut Peekable<Children>) -> (Self, Option<Self>) {
|
|
||||||
let art = Self::parse(n.next().unwrap(), "art");
|
|
||||||
let mut next_para_header = None;
|
|
||||||
|
|
||||||
let mut clone = n.clone();
|
|
||||||
if let Some(first_child) = clone.next() {
|
|
||||||
if let Some(second_child) = clone.next() {
|
|
||||||
if Ueberschrift::test(&first_child, "para")
|
|
||||||
&& Ueberschrift::test(&second_child, "para")
|
|
||||||
{
|
|
||||||
let first_para = Ueberschrift::parse(n.next().unwrap(), "para");
|
|
||||||
let second_para = Ueberschrift::parse(n.next().unwrap(), "para");
|
|
||||||
next_para_header = Some(Ueberschrift {
|
|
||||||
typ: first_para.typ,
|
|
||||||
content: format!("{} - {}", first_para.content, second_para.content),
|
|
||||||
});
|
|
||||||
} else if Ueberschrift::test(&first_child, "para") {
|
|
||||||
let first_para = Ueberschrift::parse(n.next().unwrap(), "para");
|
|
||||||
next_para_header = Some(Ueberschrift {
|
|
||||||
typ: first_para.typ,
|
|
||||||
content: format!("{}", first_para.content),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
(art, next_para_header)
|
(art, next_para_header)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user