parse full urhg
This commit is contained in:
@ -8,7 +8,7 @@ fn fetch_page(url: &str) -> Result<String, Error> {
|
||||
Ok(ureq::get(url).call()?.into_string()?)
|
||||
}
|
||||
|
||||
pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> {
|
||||
pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<bool, Error> {
|
||||
info!("Parsing {url}");
|
||||
let xml = fetch_page(url)?;
|
||||
let xml = xml.replace("<gdash />", "-"); // used e.g. in §11 Abs. 3 UrhG
|
||||
@ -26,9 +26,14 @@ pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> {
|
||||
); // 1. Verwertungsrechte. before § 14
|
||||
|
||||
let xml = xml.replace("<i>.</i>", "."); // e.g. § 37d Abs. 4 (last point)...
|
||||
|
||||
// Artikel 18 UrhG
|
||||
let xml = xml.replace("<n><i>", "");
|
||||
let xml = xml.replace("</i></n>", "");
|
||||
|
||||
debug!("{xml}");
|
||||
|
||||
let risdok = Risdok::from_str(&xml, builder)?;
|
||||
let continue_parsing = Risdok::from_str(&xml, builder)?;
|
||||
|
||||
Ok(())
|
||||
Ok(continue_parsing)
|
||||
}
|
||||
|
@ -9,32 +9,33 @@ use crate::{
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Risdok {
|
||||
metadaten: Metadaten,
|
||||
nutzdaten: Nutzdaten,
|
||||
layoutdaten: Layoutdaten,
|
||||
}
|
||||
|
||||
impl Risdok {
|
||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self {
|
||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool {
|
||||
assert!(n.tag_name().name() == "risdok");
|
||||
|
||||
let mut c = n.children();
|
||||
|
||||
let ret = Self {
|
||||
metadaten: Metadaten::parse(c.next().unwrap()),
|
||||
nutzdaten: Nutzdaten::parse(c.next().unwrap(), builder),
|
||||
layoutdaten: Layoutdaten::parse(c.next().unwrap()),
|
||||
};
|
||||
let metadaten = Metadaten::parse(c.next().unwrap());
|
||||
let nutzdaten = Nutzdaten::parse(c.next().unwrap(), builder);
|
||||
if !nutzdaten {
|
||||
return false;
|
||||
}
|
||||
let layoutdaten = Layoutdaten::parse(c.next().unwrap());
|
||||
|
||||
assert_eq!(c.next(), None);
|
||||
|
||||
ret
|
||||
true
|
||||
}
|
||||
|
||||
pub(crate) fn from_str(xml: &str, builder: &mut LawBuilder) -> Result<Self, Error> {
|
||||
pub(crate) fn from_str(xml: &str, builder: &mut LawBuilder) -> Result<bool, Error> {
|
||||
let doc = roxmltree::Document::parse(&xml)?;
|
||||
let root = doc.root();
|
||||
assert_eq!(root.children().into_iter().count(), 1);
|
||||
Ok(Self::parse(root.children().next().unwrap(), builder))
|
||||
let continue_parsing = Self::parse(root.children().next().unwrap(), builder);
|
||||
Ok(continue_parsing)
|
||||
}
|
||||
}
|
||||
|
||||
@ -53,23 +54,23 @@ impl Metadaten {
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Nutzdaten {}
|
||||
impl Nutzdaten {
|
||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self {
|
||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool {
|
||||
assert!(n.tag_name().name() == "nutzdaten");
|
||||
|
||||
let mut c = n.children();
|
||||
|
||||
Abschnitt::parse(c.next().unwrap(), builder);
|
||||
let ret = Abschnitt::parse(c.next().unwrap(), builder);
|
||||
|
||||
assert_eq!(c.next(), None);
|
||||
|
||||
Self {}
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Abschnitt;
|
||||
impl Abschnitt {
|
||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) {
|
||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool {
|
||||
assert!(n.tag_name().name() == "abschnitt");
|
||||
|
||||
let mut c = n.children().peekable();
|
||||
@ -102,6 +103,9 @@ impl Abschnitt {
|
||||
Some(child) => {
|
||||
if Ueberschrift::test(&child, "g1") {
|
||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1");
|
||||
if ueberschrift.content.trim().starts_with("Artikel") {
|
||||
return false;
|
||||
}
|
||||
builder.new_header(&ueberschrift.content);
|
||||
} else if Ueberschrift::test(&child, "g2") {
|
||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g2");
|
||||
@ -230,6 +234,8 @@ impl Abschnitt {
|
||||
}
|
||||
|
||||
assert_eq!(c.next(), None);
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user