allow specifying applications, preparation to also add 'landesgesetze'
All checks were successful
CI/CD Pipeline / test (push) Successful in 3m3s

This commit is contained in:
philipp 2024-08-22 10:46:59 +02:00
parent f00d44f275
commit 0154d00c58
7 changed files with 88 additions and 29 deletions

View File

@ -19,6 +19,7 @@
use crate::law::{self, responsible::*}; use crate::law::{self, responsible::*};
use crate::misc::Error; use crate::misc::Error;
use crate::paragraph::Parser; use crate::paragraph::Parser;
use crate::{default_type, Typ};
use crate::{law::ClassifierApplicable, misc}; use crate::{law::ClassifierApplicable, misc};
use serde::Deserialize; use serde::Deserialize;
use std::fs; use std::fs;
@ -90,13 +91,13 @@ impl Config {
/// use risp::Config; /// use risp::Config;
/// use std::path::Path; /// use std::path::Path;
/// ///
/// let (law_id, builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); /// let (_, law_id, builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
/// ///
/// assert_eq!(law_id, 10001622); /// assert_eq!(law_id, 10001622);
/// ``` /// ```
pub fn load<P: AsRef<Path> + std::fmt::Debug>( pub fn load<P: AsRef<Path> + std::fmt::Debug>(
path: P, path: P,
) -> Result<(usize, law::Builder, Parser), Error> { ) -> Result<(Typ, usize, law::Builder, Parser), Error> {
info!("Using cache dir: {}", misc::get_cache_dir().unwrap()); info!("Using cache dir: {}", misc::get_cache_dir().unwrap());
let config_str = fs::read_to_string(path)?; let config_str = fs::read_to_string(path)?;
@ -152,7 +153,7 @@ impl Config {
); );
parser.move_para_headers_into_content(); parser.move_para_headers_into_content();
} }
Ok((config.law.id, builder, parser)) Ok((config.law.typ, config.law.id, builder, parser))
} }
} }
@ -160,6 +161,8 @@ impl Config {
struct Law { struct Law {
id: usize, id: usize,
name: String, name: String,
#[serde(default = "default_type")]
typ: Typ,
par_sign: Option<String>, par_sign: Option<String>,
classifiers: Option<Vec<Classifier>>, classifiers: Option<Vec<Classifier>>,
} }

View File

@ -76,8 +76,8 @@ impl Law {
pub fn from_config<P: AsRef<Path> + tracing::Value + std::fmt::Debug>( pub fn from_config<P: AsRef<Path> + tracing::Value + std::fmt::Debug>(
path: P, path: P,
) -> Result<Law, Error> { ) -> Result<Law, Error> {
let (law_id, mut builder, parser) = Config::load(path)?; let (typ, law_id, mut builder, parser) = Config::load(path)?;
let pars = parse(law_id)?; let pars = parse(&typ, law_id)?;
for par in pars { for par in pars {
let cont = parser.parse(&par, &mut builder)?; let cont = parser.parse(&par, &mut builder)?;
@ -308,7 +308,7 @@ impl Builder {
/// use risp::{Config, law::{Law, Heading}}; /// use risp::{Config, law::{Law, Heading}};
/// use std::path::Path; /// use std::path::Path;
/// ///
/// let (_, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); /// let (_, _, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
/// ///
/// builder.new_header("1. Theil"); /// builder.new_header("1. Theil");
/// ///
@ -401,7 +401,7 @@ impl Builder {
/// use risp::{Config, law::{Law, Heading}}; /// use risp::{Config, law::{Law, Heading}};
/// use std::path::Path; /// use std::path::Path;
/// ///
/// let (_, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); /// let (_, _, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
/// ///
/// builder.new_header("1. Theil"); /// builder.new_header("1. Theil");
/// builder.new_desc("Description of the first header"); /// builder.new_desc("Description of the first header");
@ -460,7 +460,7 @@ impl Builder {
/// use risp::{Config, law::{Law, Heading, Content, HeadingContent, Section}}; /// use risp::{Config, law::{Law, Heading, Content, HeadingContent, Section}};
/// use std::path::Path; /// use std::path::Path;
/// ///
/// let (_, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); /// let (_, _, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
/// builder.new_header("1. Theil"); /// builder.new_header("1. Theil");
/// ///
/// let par = "§ 1".to_string(); /// let par = "§ 1".to_string();
@ -736,7 +736,7 @@ mod tests {
for config in configs { for config in configs {
let path = format!("{}", config.unwrap().path().display()); let path = format!("{}", config.unwrap().path().display());
if path.contains("abgb") { if path.contains("abgb") {
let (_law_id, mut builder, _) = Config::load(&path).unwrap(); let (_, _law_id, mut builder, _) = Config::load(&path).unwrap();
builder.new_header("Nullter Theil. Einleitung"); builder.new_header("Nullter Theil. Einleitung");
builder.new_par("§ 1.".into(), Content::Text("My test law text.".into())); builder.new_par("§ 1.".into(), Content::Text("My test law text.".into()));
builder.new_header("Erster Hauptstück. Einleitung"); builder.new_header("Erster Hauptstück. Einleitung");

View File

@ -20,7 +20,18 @@ pub use config::Config;
mod misc; mod misc;
pub use misc::clear_cache; pub use misc::clear_cache;
pub use misc::Error; pub use misc::Error;
use serde::Deserialize;
pub mod law; pub mod law;
pub mod overview; pub mod overview;
pub mod paragraph; pub mod paragraph;
#[derive(Debug, Deserialize)]
pub enum Typ {
Bund,
Land(String),
}
fn default_type() -> Typ {
Typ::Bund
}

View File

@ -58,7 +58,7 @@ fn main() {
} else { } else {
let config = &args.config.unwrap(); // ok, checked with clap let config = &args.config.unwrap(); // ok, checked with clap
if let Some(par_url) = &args.par_url { if let Some(par_url) = &args.par_url {
let (_, mut builder, parser) = Config::load(config).unwrap(); let (_, _, mut builder, parser) = Config::load(config).unwrap();
builder.add_classifier(Classifier::new("always-true", Arc::new(always_true))); builder.add_classifier(Classifier::new("always-true", Arc::new(always_true)));
// builder.new_header("initial"); // builder.new_header("initial");
parser.parse(par_url, &mut builder).unwrap(); parser.parse(par_url, &mut builder).unwrap();

View File

@ -23,7 +23,10 @@ use std::path::Path;
use serde::Deserialize; use serde::Deserialize;
use tracing::{event, instrument, Level}; use tracing::{event, instrument, Level};
use crate::misc::{current_date, get_cache_dir, Error}; use crate::{
misc::{current_date, get_cache_dir, Error},
Typ,
};
use ris_structure::OgdSearchResult; use ris_structure::OgdSearchResult;
@ -57,20 +60,20 @@ use ris_structure::OgdSearchResult;
/// ///
/// # Example /// # Example
/// ``` /// ```
/// use risp::overview::parse; /// use risp::{Typ, overview::parse};
/// ///
/// let list_with_xml_links_to_paragraphs = parse(10001905).unwrap(); /// let list_with_xml_links_to_paragraphs = parse(&Typ::Bund, 10001905).unwrap();
/// assert_eq!(list_with_xml_links_to_paragraphs.len(), 31); // TEG has 31 paragraphs /// assert_eq!(list_with_xml_links_to_paragraphs.len(), 31); // TEG has 31 paragraphs
/// assert_eq!(list_with_xml_links_to_paragraphs[0], "https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12025190/NOR12025190.xml"); // Link to first paragraph /// assert_eq!(list_with_xml_links_to_paragraphs[0], "https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12025190/NOR12025190.xml"); // Link to first paragraph
/// ``` /// ```
pub fn parse(law_id: usize) -> Result<Vec<String>, Error> { pub fn parse(typ: &Typ, law_id: usize) -> Result<Vec<String>, Error> {
let mut page = 1; let mut page = 1;
let mut skip = true; let mut skip = true;
let mut ret = Vec::new(); let mut ret = Vec::new();
loop { loop {
//info!("=== Fetching overview page #{page} ==="); //info!("=== Fetching overview page #{page} ===");
event!(Level::INFO, "Fetching over page #{page}"); event!(Level::INFO, "Fetching over page #{page}");
let json = fetch_page(law_id, page)?; let json = fetch_page(typ, law_id, page)?;
let (cont, nodes) = parse_from_str(&json, skip)?; let (cont, nodes) = parse_from_str(&json, skip)?;
for n in nodes { for n in nodes {
ret.push(n.clone()); ret.push(n.clone());
@ -111,7 +114,7 @@ fn parse_from_str(content: &str, skip_first: bool) -> Result<(bool, Vec<String>)
Ok((true, ret)) Ok((true, ret))
} }
fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> { fn fetch_page(typ: &Typ, overview_id: usize, page: usize) -> Result<String, Error> {
use std::fs; use std::fs;
let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?); let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?);
@ -126,15 +129,37 @@ fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
Level::INFO, Level::INFO,
"Not finding law_id {overview_id} (page {page}) in the cache, downloading..." "Not finding law_id {overview_id} (page {page}) in the cache, downloading..."
); );
let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht") let (path, application, additional_params) = match typ {
.send_form(&[ Typ::Bund => ("Bundesrecht", "BrKons", None),
("Applikation", "BrKons"), Typ::Land(land) => {
("Gesetzesnummer", &format!("{overview_id}")), let additional = if land == "" {
("DokumenteProSeite", "OneHundred"), Some(("Bundesland.SucheInOberoesterreich", "true"))
("Seitennummer", &format!("{page}")), } else {
("Fassung.FassungVom", &current_date()), None
])? };
("Landesrecht", "LrKons", additional)
}
};
let overview_id = format!("{overview_id}");
let page = format!("{page}");
let current_date = current_date();
let mut form_params = vec![
("Applikation", application),
("Gesetzesnummer", &overview_id),
("DokumenteProSeite", "OneHundred"),
("Seitennummer", &page),
("Fassung.FassungVom", &current_date),
];
if let Some(additional) = additional_params {
form_params.push(additional);
}
let data = ureq::post(&format!("https://data.bka.gv.at/ris/api/v2.6/{path}"))
.send_form(&form_params)?
.into_string()?; .into_string()?;
let path = Path::new(&expected_filename); let path = Path::new(&expected_filename);
if let Some(parent) = path.parent() { if let Some(parent) = path.parent() {
// Try to create the directory (and any necessary parent directories) // Try to create the directory (and any necessary parent directories)
@ -158,9 +183,9 @@ mod tests {
for config in configs { for config in configs {
let path = format!("{}", config.unwrap().path().display()); let path = format!("{}", config.unwrap().path().display());
let (law_id, _, _) = Config::load(&path).unwrap(); let (typ, law_id, _, _) = Config::load(&path).unwrap();
let actual = parse(law_id).unwrap(); let actual = parse(&typ, law_id).unwrap();
let expected_path = format!("./data/expected/overview/{law_id}"); let expected_path = format!("./data/expected/overview/{law_id}");
match fs::read_to_string(&expected_path) { match fs::read_to_string(&expected_path) {
Ok(expected) => { Ok(expected) => {

View File

@ -111,8 +111,16 @@ pub(crate) struct Metadata {
#[serde(rename = "Allgemein")] #[serde(rename = "Allgemein")]
general: GeneralMetadata, general: GeneralMetadata,
#[serde(flatten)]
law_type: LawType,
}
#[derive(Deserialize)]
enum LawType {
#[serde(rename = "Bundesrecht")] #[serde(rename = "Bundesrecht")]
fed: FedMetadata, Federal(FedMetadata),
#[serde(rename = "Landesrecht")]
State(LocMetadata),
} }
#[derive(Deserialize)] #[derive(Deserialize)]
@ -150,6 +158,18 @@ pub(crate) struct GeneralMetadata {
document_url: String, document_url: String,
} }
// TODO: check if more data is given to me (nom nom nom)
#[derive(Deserialize)]
#[allow(dead_code)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct LocMetadata {
#[serde(rename = "Kurztitel")]
short_title: String,
#[serde(rename = "Titel")]
title: Option<String>,
}
#[derive(Deserialize)] #[derive(Deserialize)]
#[allow(dead_code)] #[allow(dead_code)]
#[serde(rename_all = "PascalCase")] #[serde(rename_all = "PascalCase")]

View File

@ -105,7 +105,7 @@ impl Parser {
/// use risp::{Config, law::{Law, Heading, Content, Section, HeadingContent}}; /// use risp::{Config, law::{Law, Heading, Content, Section, HeadingContent}};
/// use std::path::Path; /// use std::path::Path;
/// ///
/// let (_, mut builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); /// let (_, _, mut builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
/// let result = parser.parse("https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12017691/NOR12017691.xml", &mut builder).unwrap(); /// let result = parser.parse("https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12017691/NOR12017691.xml", &mut builder).unwrap();
/// ///
/// let law: Law = builder.into(); /// let law: Law = builder.into();
@ -223,7 +223,7 @@ mod tests {
let path = format!("{}", config.unwrap().path().display()); let path = format!("{}", config.unwrap().path().display());
println!("Testing {path}"); println!("Testing {path}");
let (law_id, mut builder, parser) = Config::load(&path).unwrap(); let (_, law_id, mut builder, parser) = Config::load(&path).unwrap();
let paragraph_path = format!("./data/expected/overview/{law_id}"); let paragraph_path = format!("./data/expected/overview/{law_id}");
let expected_path = format!("./data/expected/par/{law_id}"); let expected_path = format!("./data/expected/par/{law_id}");