diff --git a/src/config.rs b/src/config.rs index f8ca4cf..a78ca42 100644 --- a/src/config.rs +++ b/src/config.rs @@ -19,6 +19,7 @@ use crate::law::{self, responsible::*}; use crate::misc::Error; use crate::paragraph::Parser; +use crate::{default_type, Typ}; use crate::{law::ClassifierApplicable, misc}; use serde::Deserialize; use std::fs; @@ -90,13 +91,13 @@ impl Config { /// use risp::Config; /// use std::path::Path; /// - /// let (law_id, builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); + /// let (_, law_id, builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); /// /// assert_eq!(law_id, 10001622); /// ``` pub fn load + std::fmt::Debug>( path: P, - ) -> Result<(usize, law::Builder, Parser), Error> { + ) -> Result<(Typ, usize, law::Builder, Parser), Error> { info!("Using cache dir: {}", misc::get_cache_dir().unwrap()); let config_str = fs::read_to_string(path)?; @@ -152,7 +153,7 @@ impl Config { ); parser.move_para_headers_into_content(); } - Ok((config.law.id, builder, parser)) + Ok((config.law.typ, config.law.id, builder, parser)) } } @@ -160,6 +161,8 @@ impl Config { struct Law { id: usize, name: String, + #[serde(default = "default_type")] + typ: Typ, par_sign: Option, classifiers: Option>, } diff --git a/src/law/mod.rs b/src/law/mod.rs index 7f0d5dc..3af5485 100644 --- a/src/law/mod.rs +++ b/src/law/mod.rs @@ -76,8 +76,8 @@ impl Law { pub fn from_config + tracing::Value + std::fmt::Debug>( path: P, ) -> Result { - let (law_id, mut builder, parser) = Config::load(path)?; - let pars = parse(law_id)?; + let (typ, law_id, mut builder, parser) = Config::load(path)?; + let pars = parse(&typ, law_id)?; for par in pars { let cont = parser.parse(&par, &mut builder)?; @@ -308,7 +308,7 @@ impl Builder { /// use risp::{Config, law::{Law, Heading}}; /// use std::path::Path; /// - /// let (_, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); + /// let (_, _, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); /// /// builder.new_header("1. Theil"); /// @@ -401,7 +401,7 @@ impl Builder { /// use risp::{Config, law::{Law, Heading}}; /// use std::path::Path; /// - /// let (_, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); + /// let (_, _, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); /// /// builder.new_header("1. Theil"); /// builder.new_desc("Description of the first header"); @@ -460,7 +460,7 @@ impl Builder { /// use risp::{Config, law::{Law, Heading, Content, HeadingContent, Section}}; /// use std::path::Path; /// - /// let (_, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); + /// let (_, _, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); /// builder.new_header("1. Theil"); /// /// let par = "§ 1".to_string(); @@ -736,7 +736,7 @@ mod tests { for config in configs { let path = format!("{}", config.unwrap().path().display()); if path.contains("abgb") { - let (_law_id, mut builder, _) = Config::load(&path).unwrap(); + let (_, _law_id, mut builder, _) = Config::load(&path).unwrap(); builder.new_header("Nullter Theil. Einleitung"); builder.new_par("§ 1.".into(), Content::Text("My test law text.".into())); builder.new_header("Erster Hauptstück. Einleitung"); diff --git a/src/lib.rs b/src/lib.rs index 7a515b8..be3fb9f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,18 @@ pub use config::Config; mod misc; pub use misc::clear_cache; pub use misc::Error; +use serde::Deserialize; pub mod law; pub mod overview; pub mod paragraph; + +#[derive(Debug, Deserialize)] +pub enum Typ { + Bund, + Land(String), +} + +fn default_type() -> Typ { + Typ::Bund +} diff --git a/src/main.rs b/src/main.rs index 47494fe..b01f605 100644 --- a/src/main.rs +++ b/src/main.rs @@ -58,7 +58,7 @@ fn main() { } else { let config = &args.config.unwrap(); // ok, checked with clap if let Some(par_url) = &args.par_url { - let (_, mut builder, parser) = Config::load(config).unwrap(); + let (_, _, mut builder, parser) = Config::load(config).unwrap(); builder.add_classifier(Classifier::new("always-true", Arc::new(always_true))); // builder.new_header("initial"); parser.parse(par_url, &mut builder).unwrap(); diff --git a/src/overview/mod.rs b/src/overview/mod.rs index 98259fb..55e0b9b 100644 --- a/src/overview/mod.rs +++ b/src/overview/mod.rs @@ -23,7 +23,10 @@ use std::path::Path; use serde::Deserialize; use tracing::{event, instrument, Level}; -use crate::misc::{current_date, get_cache_dir, Error}; +use crate::{ + misc::{current_date, get_cache_dir, Error}, + Typ, +}; use ris_structure::OgdSearchResult; @@ -57,20 +60,20 @@ use ris_structure::OgdSearchResult; /// /// # Example /// ``` -/// use risp::overview::parse; +/// use risp::{Typ, overview::parse}; /// -/// let list_with_xml_links_to_paragraphs = parse(10001905).unwrap(); +/// let list_with_xml_links_to_paragraphs = parse(&Typ::Bund, 10001905).unwrap(); /// assert_eq!(list_with_xml_links_to_paragraphs.len(), 31); // TEG has 31 paragraphs /// assert_eq!(list_with_xml_links_to_paragraphs[0], "https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12025190/NOR12025190.xml"); // Link to first paragraph /// ``` -pub fn parse(law_id: usize) -> Result, Error> { +pub fn parse(typ: &Typ, law_id: usize) -> Result, Error> { let mut page = 1; let mut skip = true; let mut ret = Vec::new(); loop { //info!("=== Fetching overview page #{page} ==="); event!(Level::INFO, "Fetching over page #{page}"); - let json = fetch_page(law_id, page)?; + let json = fetch_page(typ, law_id, page)?; let (cont, nodes) = parse_from_str(&json, skip)?; for n in nodes { ret.push(n.clone()); @@ -111,7 +114,7 @@ fn parse_from_str(content: &str, skip_first: bool) -> Result<(bool, Vec) Ok((true, ret)) } -fn fetch_page(overview_id: usize, page: usize) -> Result { +fn fetch_page(typ: &Typ, overview_id: usize, page: usize) -> Result { use std::fs; let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?); @@ -126,15 +129,37 @@ fn fetch_page(overview_id: usize, page: usize) -> Result { Level::INFO, "Not finding law_id {overview_id} (page {page}) in the cache, downloading..." ); - let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht") - .send_form(&[ - ("Applikation", "BrKons"), - ("Gesetzesnummer", &format!("{overview_id}")), - ("DokumenteProSeite", "OneHundred"), - ("Seitennummer", &format!("{page}")), - ("Fassung.FassungVom", ¤t_date()), - ])? + let (path, application, additional_params) = match typ { + Typ::Bund => ("Bundesrecht", "BrKons", None), + Typ::Land(land) => { + let additional = if land == "OÖ" { + Some(("Bundesland.SucheInOberoesterreich", "true")) + } else { + None + }; + ("Landesrecht", "LrKons", additional) + } + }; + + let overview_id = format!("{overview_id}"); + let page = format!("{page}"); + let current_date = current_date(); + let mut form_params = vec![ + ("Applikation", application), + ("Gesetzesnummer", &overview_id), + ("DokumenteProSeite", "OneHundred"), + ("Seitennummer", &page), + ("Fassung.FassungVom", ¤t_date), + ]; + + if let Some(additional) = additional_params { + form_params.push(additional); + } + + let data = ureq::post(&format!("https://data.bka.gv.at/ris/api/v2.6/{path}")) + .send_form(&form_params)? .into_string()?; + let path = Path::new(&expected_filename); if let Some(parent) = path.parent() { // Try to create the directory (and any necessary parent directories) @@ -158,9 +183,9 @@ mod tests { for config in configs { let path = format!("{}", config.unwrap().path().display()); - let (law_id, _, _) = Config::load(&path).unwrap(); + let (typ, law_id, _, _) = Config::load(&path).unwrap(); - let actual = parse(law_id).unwrap(); + let actual = parse(&typ, law_id).unwrap(); let expected_path = format!("./data/expected/overview/{law_id}"); match fs::read_to_string(&expected_path) { Ok(expected) => { diff --git a/src/overview/ris_structure.rs b/src/overview/ris_structure.rs index be6676e..96da78b 100644 --- a/src/overview/ris_structure.rs +++ b/src/overview/ris_structure.rs @@ -111,8 +111,16 @@ pub(crate) struct Metadata { #[serde(rename = "Allgemein")] general: GeneralMetadata, + #[serde(flatten)] + law_type: LawType, +} + +#[derive(Deserialize)] +enum LawType { #[serde(rename = "Bundesrecht")] - fed: FedMetadata, + Federal(FedMetadata), + #[serde(rename = "Landesrecht")] + State(LocMetadata), } #[derive(Deserialize)] @@ -150,6 +158,18 @@ pub(crate) struct GeneralMetadata { document_url: String, } +// TODO: check if more data is given to me (nom nom nom) +#[derive(Deserialize)] +#[allow(dead_code)] +#[serde(rename_all = "PascalCase")] +pub(crate) struct LocMetadata { + #[serde(rename = "Kurztitel")] + short_title: String, + + #[serde(rename = "Titel")] + title: Option, +} + #[derive(Deserialize)] #[allow(dead_code)] #[serde(rename_all = "PascalCase")] diff --git a/src/paragraph/mod.rs b/src/paragraph/mod.rs index b02c747..d624814 100644 --- a/src/paragraph/mod.rs +++ b/src/paragraph/mod.rs @@ -105,7 +105,7 @@ impl Parser { /// use risp::{Config, law::{Law, Heading, Content, Section, HeadingContent}}; /// use std::path::Path; /// - /// let (_, mut builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); + /// let (_, _, mut builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); /// let result = parser.parse("https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12017691/NOR12017691.xml", &mut builder).unwrap(); /// /// let law: Law = builder.into(); @@ -223,7 +223,7 @@ mod tests { let path = format!("{}", config.unwrap().path().display()); println!("Testing {path}"); - let (law_id, mut builder, parser) = Config::load(&path).unwrap(); + let (_, law_id, mut builder, parser) = Config::load(&path).unwrap(); let paragraph_path = format!("./data/expected/overview/{law_id}"); let expected_path = format!("./data/expected/par/{law_id}");