parse full urhg

This commit is contained in:
philipp 2023-11-05 14:10:38 +01:00
parent 55517758c2
commit bb3fd142c7
5 changed files with 58 additions and 28 deletions

View File

@ -44,5 +44,6 @@ fn main() {
env_logger::init(); env_logger::init();
let builder = LawBuilder::new("UrhG"); let builder = LawBuilder::new("UrhG");
println!("{:#?}", builder);
println!("{:#?}", builder.toc()); println!("{:#?}", builder.toc());
} }

View File

@ -17,14 +17,14 @@ fn current_date() -> String {
/// ///
/// # Errors /// # Errors
/// Fails if `ureq` can't create a connection, probably because there's no internet connection? (Or RIS is not online.) /// Fails if `ureq` can't create a connection, probably because there's no internet connection? (Or RIS is not online.)
fn fetch_page(overview_id: usize) -> Result<String, Error> { fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
Ok( Ok(
ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht") ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
.send_form(&[ .send_form(&[
("Applikation", "BrKons"), ("Applikation", "BrKons"),
("Gesetzesnummer", &format!("{}", overview_id)), ("Gesetzesnummer", &format!("{}", overview_id)),
("DokumenteProSeite", "OneHundred"), ("DokumenteProSeite", "OneHundred"),
("Seitennummer", &format!("{}", 1)), ("Seitennummer", &format!("{}", page)),
("Fassung.FassungVom", &current_date()), ("Fassung.FassungVom", &current_date()),
])? ])?
.into_string()?, .into_string()?,
@ -38,7 +38,9 @@ pub(crate) struct Wrapper {
} }
pub(crate) fn parse(overview_id: usize, builder: &mut LawBuilder) -> Result<(), Error> { pub(crate) fn parse(overview_id: usize, builder: &mut LawBuilder) -> Result<(), Error> {
let json = fetch_page(overview_id)?; let mut page = 1;
loop {
let json = fetch_page(overview_id, page)?;
let wrapper: Wrapper = serde_json::from_str(&json)?; let wrapper: Wrapper = serde_json::from_str(&json)?;
@ -46,7 +48,16 @@ pub(crate) fn parse(overview_id: usize, builder: &mut LawBuilder) -> Result<(),
// skip bc. first one is // skip bc. first one is
// always not relevant for // always not relevant for
// me :-) // me :-)
crate::par::parse(&par, builder).unwrap(); if !crate::par::parse(&par, builder).unwrap() {
break;
}
}
page += 1;
if !wrapper.ogd_search_result.has_next_page() {
break;
}
} }
Ok(()) Ok(())

View File

@ -17,8 +17,15 @@ pub(crate) struct OgdSearchResult {
} }
impl OgdSearchResult { impl OgdSearchResult {
fn has_next_page(&self) -> bool { pub(crate) fn has_next_page(&self) -> bool {
todo!(); let hits = &self.ogd_document_results.hits;
let curr_page_number = hits.page_number;
let page_size = hits.page_size;
let elements = hits.text;
let parsed_so_far = curr_page_number * page_size;
elements > parsed_so_far
} }
pub(crate) fn get_par(&self) -> Vec<String> { pub(crate) fn get_par(&self) -> Vec<String> {

View File

@ -8,7 +8,7 @@ fn fetch_page(url: &str) -> Result<String, Error> {
Ok(ureq::get(url).call()?.into_string()?) Ok(ureq::get(url).call()?.into_string()?)
} }
pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> { pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<bool, Error> {
info!("Parsing {url}"); info!("Parsing {url}");
let xml = fetch_page(url)?; let xml = fetch_page(url)?;
let xml = xml.replace("<gdash />", "-"); // used e.g. in §11 Abs. 3 UrhG let xml = xml.replace("<gdash />", "-"); // used e.g. in §11 Abs. 3 UrhG
@ -26,9 +26,14 @@ pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> {
); // 1. Verwertungsrechte. before § 14 ); // 1. Verwertungsrechte. before § 14
let xml = xml.replace("<i>.</i>", "."); // e.g. § 37d Abs. 4 (last point)... let xml = xml.replace("<i>.</i>", "."); // e.g. § 37d Abs. 4 (last point)...
// Artikel 18 UrhG
let xml = xml.replace("<n><i>", "");
let xml = xml.replace("</i></n>", "");
debug!("{xml}"); debug!("{xml}");
let risdok = Risdok::from_str(&xml, builder)?; let continue_parsing = Risdok::from_str(&xml, builder)?;
Ok(()) Ok(continue_parsing)
} }

View File

@ -9,32 +9,33 @@ use crate::{
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub(crate) struct Risdok { pub(crate) struct Risdok {
metadaten: Metadaten, metadaten: Metadaten,
nutzdaten: Nutzdaten,
layoutdaten: Layoutdaten, layoutdaten: Layoutdaten,
} }
impl Risdok { impl Risdok {
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self { pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool {
assert!(n.tag_name().name() == "risdok"); assert!(n.tag_name().name() == "risdok");
let mut c = n.children(); let mut c = n.children();
let ret = Self { let metadaten = Metadaten::parse(c.next().unwrap());
metadaten: Metadaten::parse(c.next().unwrap()), let nutzdaten = Nutzdaten::parse(c.next().unwrap(), builder);
nutzdaten: Nutzdaten::parse(c.next().unwrap(), builder), if !nutzdaten {
layoutdaten: Layoutdaten::parse(c.next().unwrap()), return false;
}; }
let layoutdaten = Layoutdaten::parse(c.next().unwrap());
assert_eq!(c.next(), None); assert_eq!(c.next(), None);
ret true
} }
pub(crate) fn from_str(xml: &str, builder: &mut LawBuilder) -> Result<Self, Error> { pub(crate) fn from_str(xml: &str, builder: &mut LawBuilder) -> Result<bool, Error> {
let doc = roxmltree::Document::parse(&xml)?; let doc = roxmltree::Document::parse(&xml)?;
let root = doc.root(); let root = doc.root();
assert_eq!(root.children().into_iter().count(), 1); assert_eq!(root.children().into_iter().count(), 1);
Ok(Self::parse(root.children().next().unwrap(), builder)) let continue_parsing = Self::parse(root.children().next().unwrap(), builder);
Ok(continue_parsing)
} }
} }
@ -53,23 +54,23 @@ impl Metadaten {
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub(crate) struct Nutzdaten {} pub(crate) struct Nutzdaten {}
impl Nutzdaten { impl Nutzdaten {
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self { pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool {
assert!(n.tag_name().name() == "nutzdaten"); assert!(n.tag_name().name() == "nutzdaten");
let mut c = n.children(); let mut c = n.children();
Abschnitt::parse(c.next().unwrap(), builder); let ret = Abschnitt::parse(c.next().unwrap(), builder);
assert_eq!(c.next(), None); assert_eq!(c.next(), None);
Self {} ret
} }
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub(crate) struct Abschnitt; pub(crate) struct Abschnitt;
impl Abschnitt { impl Abschnitt {
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) { pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool {
assert!(n.tag_name().name() == "abschnitt"); assert!(n.tag_name().name() == "abschnitt");
let mut c = n.children().peekable(); let mut c = n.children().peekable();
@ -102,6 +103,9 @@ impl Abschnitt {
Some(child) => { Some(child) => {
if Ueberschrift::test(&child, "g1") { if Ueberschrift::test(&child, "g1") {
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1"); let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1");
if ueberschrift.content.trim().starts_with("Artikel") {
return false;
}
builder.new_header(&ueberschrift.content); builder.new_header(&ueberschrift.content);
} else if Ueberschrift::test(&child, "g2") { } else if Ueberschrift::test(&child, "g2") {
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g2"); let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g2");
@ -230,6 +234,8 @@ impl Abschnitt {
} }
assert_eq!(c.next(), None); assert_eq!(c.next(), None);
true
} }
} }