allow specifying applications, preparation to also add 'landesgesetze'

2024-08-22 10:46:59 +02:00 · 2024-08-22 10:46:59 +02:00 · 0154d00c58
commit 0154d00c58
parent f00d44f275
7 changed files with 88 additions and 29 deletions
--- a/src/config.rs
+++ b/src/config.rs
@ -19,6 +19,7 @@
 use crate::law::{self, responsible::*};
 use crate::misc::Error;
 use crate::paragraph::Parser;
 use crate::{default_type, Typ};
 use crate::{law::ClassifierApplicable, misc};
 use serde::Deserialize;
 use std::fs;
@ -90,13 +91,13 @@ impl Config {
    /// use risp::Config;
    /// use std::path::Path;
    ///
-    /// let (law_id, builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
+    /// let (_, law_id, builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
    ///
    /// assert_eq!(law_id, 10001622);
    /// ```    
    pub fn load<P: AsRef<Path> + std::fmt::Debug>(
        path: P,
-    ) -> Result<(usize, law::Builder, Parser), Error> {
+    ) -> Result<(Typ, usize, law::Builder, Parser), Error> {
        info!("Using cache dir: {}", misc::get_cache_dir().unwrap());
        let config_str = fs::read_to_string(path)?;
@ -152,7 +153,7 @@ impl Config {
            );
            parser.move_para_headers_into_content();
        }
-        Ok((config.law.id, builder, parser))
+        Ok((config.law.typ, config.law.id, builder, parser))
    }
 }
@ -160,6 +161,8 @@ impl Config {
 struct Law {
    id: usize,
    name: String,
    #[serde(default = "default_type")]
    typ: Typ,
    par_sign: Option<String>,
    classifiers: Option<Vec<Classifier>>,
 }
--- a/src/law/mod.rs
+++ b/src/law/mod.rs
@ -76,8 +76,8 @@ impl Law {
    pub fn from_config<P: AsRef<Path> + tracing::Value + std::fmt::Debug>(
        path: P,
    ) -> Result<Law, Error> {
-        let (law_id, mut builder, parser) = Config::load(path)?;
+        let (typ, law_id, mut builder, parser) = Config::load(path)?;
-        let pars = parse(law_id)?;
+        let pars = parse(&typ, law_id)?;
        for par in pars {
            let cont = parser.parse(&par, &mut builder)?;
@ -308,7 +308,7 @@ impl Builder {
    /// use risp::{Config, law::{Law, Heading}};
    /// use std::path::Path;
    ///
-    /// let (_, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
+    /// let (_, _, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
    ///
    /// builder.new_header("1. Theil");
    ///
@ -401,7 +401,7 @@ impl Builder {
    /// use risp::{Config, law::{Law, Heading}};
    /// use std::path::Path;
    ///
-    /// let (_, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
+    /// let (_, _, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
    ///
    /// builder.new_header("1. Theil");
    /// builder.new_desc("Description of the first header");
@ -460,7 +460,7 @@ impl Builder {
    /// use risp::{Config, law::{Law, Heading, Content, HeadingContent, Section}};
    /// use std::path::Path;
    ///
-    /// let (_, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
+    /// let (_, _, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
    /// builder.new_header("1. Theil");
    ///
    /// let par = "§ 1".to_string();
@ -736,7 +736,7 @@ mod tests {
        for config in configs {
            let path = format!("{}", config.unwrap().path().display());
            if path.contains("abgb") {
-                let (_law_id, mut builder, _) = Config::load(&path).unwrap();
+                let (_, _law_id, mut builder, _) = Config::load(&path).unwrap();
                builder.new_header("Nullter Theil. Einleitung");
                builder.new_par("§ 1.".into(), Content::Text("My test law text.".into()));
                builder.new_header("Erster Hauptstück. Einleitung");
--- a/src/lib.rs
+++ b/src/lib.rs
@ -20,7 +20,18 @@ pub use config::Config;
 mod misc;
 pub use misc::clear_cache;
 pub use misc::Error;
 use serde::Deserialize;
 pub mod law;
 pub mod overview;
 pub mod paragraph;
 #[derive(Debug, Deserialize)]
 pub enum Typ {
    Bund,
    Land(String),
 }
 fn default_type() -> Typ {
    Typ::Bund
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -58,7 +58,7 @@ fn main() {
    } else {
        let config = &args.config.unwrap(); // ok, checked with clap
        if let Some(par_url) = &args.par_url {
-            let (_, mut builder, parser) = Config::load(config).unwrap();
+            let (_, _, mut builder, parser) = Config::load(config).unwrap();
            builder.add_classifier(Classifier::new("always-true", Arc::new(always_true)));
            // builder.new_header("initial");
            parser.parse(par_url, &mut builder).unwrap();
--- a/src/overview/mod.rs
+++ b/src/overview/mod.rs
@ -23,7 +23,10 @@ use std::path::Path;
 use serde::Deserialize;
 use tracing::{event, instrument, Level};
-use crate::misc::{current_date, get_cache_dir, Error};
+use crate::{
    misc::{current_date, get_cache_dir, Error},
    Typ,
 };
 use ris_structure::OgdSearchResult;
@ -57,20 +60,20 @@ use ris_structure::OgdSearchResult;
 ///
 /// # Example
 /// ```
-/// use risp::overview::parse;
+/// use risp::{Typ, overview::parse};
 ///
-/// let list_with_xml_links_to_paragraphs = parse(10001905).unwrap();
+/// let list_with_xml_links_to_paragraphs = parse(&Typ::Bund, 10001905).unwrap();
 /// assert_eq!(list_with_xml_links_to_paragraphs.len(), 31); // TEG has 31 paragraphs
 /// assert_eq!(list_with_xml_links_to_paragraphs[0], "https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12025190/NOR12025190.xml"); // Link to first paragraph
 /// ```
-pub fn parse(law_id: usize) -> Result<Vec<String>, Error> {
+pub fn parse(typ: &Typ, law_id: usize) -> Result<Vec<String>, Error> {
    let mut page = 1;
    let mut skip = true;
    let mut ret = Vec::new();
    loop {
        //info!("=== Fetching overview page #{page} ===");
        event!(Level::INFO, "Fetching over page #{page}");
-        let json = fetch_page(law_id, page)?;
+        let json = fetch_page(typ, law_id, page)?;
        let (cont, nodes) = parse_from_str(&json, skip)?;
        for n in nodes {
            ret.push(n.clone());
@ -111,7 +114,7 @@ fn parse_from_str(content: &str, skip_first: bool) -> Result<(bool, Vec<String>)
    Ok((true, ret))
 }
-fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
+fn fetch_page(typ: &Typ, overview_id: usize, page: usize) -> Result<String, Error> {
    use std::fs;
    let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?);
@ -126,15 +129,37 @@ fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
            Level::INFO,
            "Not finding law_id {overview_id} (page {page}) in the cache, downloading..."
        );
-        let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
+        let (path, application, additional_params) = match typ {
-            .send_form(&[
+            Typ::Bund => ("Bundesrecht", "BrKons", None),
-                ("Applikation", "BrKons"),
+            Typ::Land(land) => {
-                ("Gesetzesnummer", &format!("{overview_id}")),
+                let additional = if land == "OÖ" {
                    Some(("Bundesland.SucheInOberoesterreich", "true"))
                } else {
                    None
                };
                ("Landesrecht", "LrKons", additional)
            }
        };
        let overview_id = format!("{overview_id}");
        let page = format!("{page}");
        let current_date = current_date();
        let mut form_params = vec![
            ("Applikation", application),
            ("Gesetzesnummer", &overview_id),
            ("DokumenteProSeite", "OneHundred"),
-                ("Seitennummer", &format!("{page}")),
+            ("Seitennummer", &page),
-                ("Fassung.FassungVom", &current_date()),
+            ("Fassung.FassungVom", &current_date),
-            ])?
+        ];
        if let Some(additional) = additional_params {
            form_params.push(additional);
        }
        let data = ureq::post(&format!("https://data.bka.gv.at/ris/api/v2.6/{path}"))
            .send_form(&form_params)?
            .into_string()?;
        let path = Path::new(&expected_filename);
        if let Some(parent) = path.parent() {
            // Try to create the directory (and any necessary parent directories)
@ -158,9 +183,9 @@ mod tests {
        for config in configs {
            let path = format!("{}", config.unwrap().path().display());
-            let (law_id, _, _) = Config::load(&path).unwrap();
+            let (typ, law_id, _, _) = Config::load(&path).unwrap();
-            let actual = parse(law_id).unwrap();
+            let actual = parse(&typ, law_id).unwrap();
            let expected_path = format!("./data/expected/overview/{law_id}");
            match fs::read_to_string(&expected_path) {
                Ok(expected) => {
--- a/src/overview/ris_structure.rs
+++ b/src/overview/ris_structure.rs
@ -111,8 +111,16 @@ pub(crate) struct Metadata {
    #[serde(rename = "Allgemein")]
    general: GeneralMetadata,
    #[serde(flatten)]
    law_type: LawType,
 }
 #[derive(Deserialize)]
 enum LawType {
    #[serde(rename = "Bundesrecht")]
-    fed: FedMetadata,
+    Federal(FedMetadata),
    #[serde(rename = "Landesrecht")]
    State(LocMetadata),
 }
 #[derive(Deserialize)]
@ -150,6 +158,18 @@ pub(crate) struct GeneralMetadata {
    document_url: String,
 }
 // TODO: check if more data is given to me (nom nom nom)
 #[derive(Deserialize)]
 #[allow(dead_code)]
 #[serde(rename_all = "PascalCase")]
 pub(crate) struct LocMetadata {
    #[serde(rename = "Kurztitel")]
    short_title: String,
    #[serde(rename = "Titel")]
    title: Option<String>,
 }
 #[derive(Deserialize)]
 #[allow(dead_code)]
 #[serde(rename_all = "PascalCase")]
--- a/src/paragraph/mod.rs
+++ b/src/paragraph/mod.rs
@ -105,7 +105,7 @@ impl Parser {
    /// use risp::{Config, law::{Law, Heading, Content, Section, HeadingContent}};
    /// use std::path::Path;
    ///
-    /// let (_, mut builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
+    /// let (_, _, mut builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
    /// let result = parser.parse("https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12017691/NOR12017691.xml", &mut builder).unwrap();
    ///
    /// let law: Law = builder.into();
@ -223,7 +223,7 @@ mod tests {
            let path = format!("{}", config.unwrap().path().display());
            println!("Testing {path}");
-            let (law_id, mut builder, parser) = Config::load(&path).unwrap();
+            let (_, law_id, mut builder, parser) = Config::load(&path).unwrap();
            let paragraph_path = format!("./data/expected/overview/{law_id}");
            let expected_path = format!("./data/expected/par/{law_id}");