parse full urhg
This commit is contained in:
parent
55517758c2
commit
bb3fd142c7
@ -44,5 +44,6 @@ fn main() {
|
|||||||
env_logger::init();
|
env_logger::init();
|
||||||
let builder = LawBuilder::new("UrhG");
|
let builder = LawBuilder::new("UrhG");
|
||||||
|
|
||||||
|
println!("{:#?}", builder);
|
||||||
println!("{:#?}", builder.toc());
|
println!("{:#?}", builder.toc());
|
||||||
}
|
}
|
||||||
|
@ -17,14 +17,14 @@ fn current_date() -> String {
|
|||||||
///
|
///
|
||||||
/// # Errors
|
/// # Errors
|
||||||
/// Fails if `ureq` can't create a connection, probably because there's no internet connection? (Or RIS is not online.)
|
/// Fails if `ureq` can't create a connection, probably because there's no internet connection? (Or RIS is not online.)
|
||||||
fn fetch_page(overview_id: usize) -> Result<String, Error> {
|
fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
|
||||||
Ok(
|
Ok(
|
||||||
ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
|
ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
|
||||||
.send_form(&[
|
.send_form(&[
|
||||||
("Applikation", "BrKons"),
|
("Applikation", "BrKons"),
|
||||||
("Gesetzesnummer", &format!("{}", overview_id)),
|
("Gesetzesnummer", &format!("{}", overview_id)),
|
||||||
("DokumenteProSeite", "OneHundred"),
|
("DokumenteProSeite", "OneHundred"),
|
||||||
("Seitennummer", &format!("{}", 1)),
|
("Seitennummer", &format!("{}", page)),
|
||||||
("Fassung.FassungVom", ¤t_date()),
|
("Fassung.FassungVom", ¤t_date()),
|
||||||
])?
|
])?
|
||||||
.into_string()?,
|
.into_string()?,
|
||||||
@ -38,15 +38,26 @@ pub(crate) struct Wrapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn parse(overview_id: usize, builder: &mut LawBuilder) -> Result<(), Error> {
|
pub(crate) fn parse(overview_id: usize, builder: &mut LawBuilder) -> Result<(), Error> {
|
||||||
let json = fetch_page(overview_id)?;
|
let mut page = 1;
|
||||||
|
loop {
|
||||||
|
let json = fetch_page(overview_id, page)?;
|
||||||
|
|
||||||
let wrapper: Wrapper = serde_json::from_str(&json)?;
|
let wrapper: Wrapper = serde_json::from_str(&json)?;
|
||||||
|
|
||||||
for par in wrapper.ogd_search_result.get_par().into_iter().skip(1) {
|
for par in wrapper.ogd_search_result.get_par().into_iter().skip(1) {
|
||||||
// skip bc. first one is
|
// skip bc. first one is
|
||||||
// always not relevant for
|
// always not relevant for
|
||||||
// me :-)
|
// me :-)
|
||||||
crate::par::parse(&par, builder).unwrap();
|
if !crate::par::parse(&par, builder).unwrap() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
page += 1;
|
||||||
|
|
||||||
|
if !wrapper.ogd_search_result.has_next_page() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -17,8 +17,15 @@ pub(crate) struct OgdSearchResult {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl OgdSearchResult {
|
impl OgdSearchResult {
|
||||||
fn has_next_page(&self) -> bool {
|
pub(crate) fn has_next_page(&self) -> bool {
|
||||||
todo!();
|
let hits = &self.ogd_document_results.hits;
|
||||||
|
let curr_page_number = hits.page_number;
|
||||||
|
let page_size = hits.page_size;
|
||||||
|
let elements = hits.text;
|
||||||
|
|
||||||
|
let parsed_so_far = curr_page_number * page_size;
|
||||||
|
|
||||||
|
elements > parsed_so_far
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_par(&self) -> Vec<String> {
|
pub(crate) fn get_par(&self) -> Vec<String> {
|
||||||
|
@ -8,7 +8,7 @@ fn fetch_page(url: &str) -> Result<String, Error> {
|
|||||||
Ok(ureq::get(url).call()?.into_string()?)
|
Ok(ureq::get(url).call()?.into_string()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> {
|
pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<bool, Error> {
|
||||||
info!("Parsing {url}");
|
info!("Parsing {url}");
|
||||||
let xml = fetch_page(url)?;
|
let xml = fetch_page(url)?;
|
||||||
let xml = xml.replace("<gdash />", "-"); // used e.g. in §11 Abs. 3 UrhG
|
let xml = xml.replace("<gdash />", "-"); // used e.g. in §11 Abs. 3 UrhG
|
||||||
@ -26,9 +26,14 @@ pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> {
|
|||||||
); // 1. Verwertungsrechte. before § 14
|
); // 1. Verwertungsrechte. before § 14
|
||||||
|
|
||||||
let xml = xml.replace("<i>.</i>", "."); // e.g. § 37d Abs. 4 (last point)...
|
let xml = xml.replace("<i>.</i>", "."); // e.g. § 37d Abs. 4 (last point)...
|
||||||
|
|
||||||
|
// Artikel 18 UrhG
|
||||||
|
let xml = xml.replace("<n><i>", "");
|
||||||
|
let xml = xml.replace("</i></n>", "");
|
||||||
|
|
||||||
debug!("{xml}");
|
debug!("{xml}");
|
||||||
|
|
||||||
let risdok = Risdok::from_str(&xml, builder)?;
|
let continue_parsing = Risdok::from_str(&xml, builder)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(continue_parsing)
|
||||||
}
|
}
|
||||||
|
@ -9,32 +9,33 @@ use crate::{
|
|||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub(crate) struct Risdok {
|
pub(crate) struct Risdok {
|
||||||
metadaten: Metadaten,
|
metadaten: Metadaten,
|
||||||
nutzdaten: Nutzdaten,
|
|
||||||
layoutdaten: Layoutdaten,
|
layoutdaten: Layoutdaten,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Risdok {
|
impl Risdok {
|
||||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self {
|
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool {
|
||||||
assert!(n.tag_name().name() == "risdok");
|
assert!(n.tag_name().name() == "risdok");
|
||||||
|
|
||||||
let mut c = n.children();
|
let mut c = n.children();
|
||||||
|
|
||||||
let ret = Self {
|
let metadaten = Metadaten::parse(c.next().unwrap());
|
||||||
metadaten: Metadaten::parse(c.next().unwrap()),
|
let nutzdaten = Nutzdaten::parse(c.next().unwrap(), builder);
|
||||||
nutzdaten: Nutzdaten::parse(c.next().unwrap(), builder),
|
if !nutzdaten {
|
||||||
layoutdaten: Layoutdaten::parse(c.next().unwrap()),
|
return false;
|
||||||
};
|
}
|
||||||
|
let layoutdaten = Layoutdaten::parse(c.next().unwrap());
|
||||||
|
|
||||||
assert_eq!(c.next(), None);
|
assert_eq!(c.next(), None);
|
||||||
|
|
||||||
ret
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn from_str(xml: &str, builder: &mut LawBuilder) -> Result<Self, Error> {
|
pub(crate) fn from_str(xml: &str, builder: &mut LawBuilder) -> Result<bool, Error> {
|
||||||
let doc = roxmltree::Document::parse(&xml)?;
|
let doc = roxmltree::Document::parse(&xml)?;
|
||||||
let root = doc.root();
|
let root = doc.root();
|
||||||
assert_eq!(root.children().into_iter().count(), 1);
|
assert_eq!(root.children().into_iter().count(), 1);
|
||||||
Ok(Self::parse(root.children().next().unwrap(), builder))
|
let continue_parsing = Self::parse(root.children().next().unwrap(), builder);
|
||||||
|
Ok(continue_parsing)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -53,23 +54,23 @@ impl Metadaten {
|
|||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub(crate) struct Nutzdaten {}
|
pub(crate) struct Nutzdaten {}
|
||||||
impl Nutzdaten {
|
impl Nutzdaten {
|
||||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self {
|
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool {
|
||||||
assert!(n.tag_name().name() == "nutzdaten");
|
assert!(n.tag_name().name() == "nutzdaten");
|
||||||
|
|
||||||
let mut c = n.children();
|
let mut c = n.children();
|
||||||
|
|
||||||
Abschnitt::parse(c.next().unwrap(), builder);
|
let ret = Abschnitt::parse(c.next().unwrap(), builder);
|
||||||
|
|
||||||
assert_eq!(c.next(), None);
|
assert_eq!(c.next(), None);
|
||||||
|
|
||||||
Self {}
|
ret
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub(crate) struct Abschnitt;
|
pub(crate) struct Abschnitt;
|
||||||
impl Abschnitt {
|
impl Abschnitt {
|
||||||
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) {
|
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> bool {
|
||||||
assert!(n.tag_name().name() == "abschnitt");
|
assert!(n.tag_name().name() == "abschnitt");
|
||||||
|
|
||||||
let mut c = n.children().peekable();
|
let mut c = n.children().peekable();
|
||||||
@ -102,6 +103,9 @@ impl Abschnitt {
|
|||||||
Some(child) => {
|
Some(child) => {
|
||||||
if Ueberschrift::test(&child, "g1") {
|
if Ueberschrift::test(&child, "g1") {
|
||||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1");
|
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1");
|
||||||
|
if ueberschrift.content.trim().starts_with("Artikel") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
builder.new_header(&ueberschrift.content);
|
builder.new_header(&ueberschrift.content);
|
||||||
} else if Ueberschrift::test(&child, "g2") {
|
} else if Ueberschrift::test(&child, "g2") {
|
||||||
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g2");
|
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g2");
|
||||||
@ -230,6 +234,8 @@ impl Abschnitt {
|
|||||||
}
|
}
|
||||||
|
|
||||||
assert_eq!(c.next(), None);
|
assert_eq!(c.next(), None);
|
||||||
|
|
||||||
|
true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user