allow specifying applications, preparation to also add 'landesgesetze'
All checks were successful
CI/CD Pipeline / test (push) Successful in 3m3s
All checks were successful
CI/CD Pipeline / test (push) Successful in 3m3s
This commit is contained in:
parent
f00d44f275
commit
0154d00c58
@ -19,6 +19,7 @@
|
|||||||
use crate::law::{self, responsible::*};
|
use crate::law::{self, responsible::*};
|
||||||
use crate::misc::Error;
|
use crate::misc::Error;
|
||||||
use crate::paragraph::Parser;
|
use crate::paragraph::Parser;
|
||||||
|
use crate::{default_type, Typ};
|
||||||
use crate::{law::ClassifierApplicable, misc};
|
use crate::{law::ClassifierApplicable, misc};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
@ -90,13 +91,13 @@ impl Config {
|
|||||||
/// use risp::Config;
|
/// use risp::Config;
|
||||||
/// use std::path::Path;
|
/// use std::path::Path;
|
||||||
///
|
///
|
||||||
/// let (law_id, builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
|
/// let (_, law_id, builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
|
||||||
///
|
///
|
||||||
/// assert_eq!(law_id, 10001622);
|
/// assert_eq!(law_id, 10001622);
|
||||||
/// ```
|
/// ```
|
||||||
pub fn load<P: AsRef<Path> + std::fmt::Debug>(
|
pub fn load<P: AsRef<Path> + std::fmt::Debug>(
|
||||||
path: P,
|
path: P,
|
||||||
) -> Result<(usize, law::Builder, Parser), Error> {
|
) -> Result<(Typ, usize, law::Builder, Parser), Error> {
|
||||||
info!("Using cache dir: {}", misc::get_cache_dir().unwrap());
|
info!("Using cache dir: {}", misc::get_cache_dir().unwrap());
|
||||||
|
|
||||||
let config_str = fs::read_to_string(path)?;
|
let config_str = fs::read_to_string(path)?;
|
||||||
@ -152,7 +153,7 @@ impl Config {
|
|||||||
);
|
);
|
||||||
parser.move_para_headers_into_content();
|
parser.move_para_headers_into_content();
|
||||||
}
|
}
|
||||||
Ok((config.law.id, builder, parser))
|
Ok((config.law.typ, config.law.id, builder, parser))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -160,6 +161,8 @@ impl Config {
|
|||||||
struct Law {
|
struct Law {
|
||||||
id: usize,
|
id: usize,
|
||||||
name: String,
|
name: String,
|
||||||
|
#[serde(default = "default_type")]
|
||||||
|
typ: Typ,
|
||||||
par_sign: Option<String>,
|
par_sign: Option<String>,
|
||||||
classifiers: Option<Vec<Classifier>>,
|
classifiers: Option<Vec<Classifier>>,
|
||||||
}
|
}
|
||||||
|
@ -76,8 +76,8 @@ impl Law {
|
|||||||
pub fn from_config<P: AsRef<Path> + tracing::Value + std::fmt::Debug>(
|
pub fn from_config<P: AsRef<Path> + tracing::Value + std::fmt::Debug>(
|
||||||
path: P,
|
path: P,
|
||||||
) -> Result<Law, Error> {
|
) -> Result<Law, Error> {
|
||||||
let (law_id, mut builder, parser) = Config::load(path)?;
|
let (typ, law_id, mut builder, parser) = Config::load(path)?;
|
||||||
let pars = parse(law_id)?;
|
let pars = parse(&typ, law_id)?;
|
||||||
|
|
||||||
for par in pars {
|
for par in pars {
|
||||||
let cont = parser.parse(&par, &mut builder)?;
|
let cont = parser.parse(&par, &mut builder)?;
|
||||||
@ -308,7 +308,7 @@ impl Builder {
|
|||||||
/// use risp::{Config, law::{Law, Heading}};
|
/// use risp::{Config, law::{Law, Heading}};
|
||||||
/// use std::path::Path;
|
/// use std::path::Path;
|
||||||
///
|
///
|
||||||
/// let (_, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
|
/// let (_, _, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
|
||||||
///
|
///
|
||||||
/// builder.new_header("1. Theil");
|
/// builder.new_header("1. Theil");
|
||||||
///
|
///
|
||||||
@ -401,7 +401,7 @@ impl Builder {
|
|||||||
/// use risp::{Config, law::{Law, Heading}};
|
/// use risp::{Config, law::{Law, Heading}};
|
||||||
/// use std::path::Path;
|
/// use std::path::Path;
|
||||||
///
|
///
|
||||||
/// let (_, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
|
/// let (_, _, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
|
||||||
///
|
///
|
||||||
/// builder.new_header("1. Theil");
|
/// builder.new_header("1. Theil");
|
||||||
/// builder.new_desc("Description of the first header");
|
/// builder.new_desc("Description of the first header");
|
||||||
@ -460,7 +460,7 @@ impl Builder {
|
|||||||
/// use risp::{Config, law::{Law, Heading, Content, HeadingContent, Section}};
|
/// use risp::{Config, law::{Law, Heading, Content, HeadingContent, Section}};
|
||||||
/// use std::path::Path;
|
/// use std::path::Path;
|
||||||
///
|
///
|
||||||
/// let (_, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
|
/// let (_, _, mut builder, _) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
|
||||||
/// builder.new_header("1. Theil");
|
/// builder.new_header("1. Theil");
|
||||||
///
|
///
|
||||||
/// let par = "§ 1".to_string();
|
/// let par = "§ 1".to_string();
|
||||||
@ -736,7 +736,7 @@ mod tests {
|
|||||||
for config in configs {
|
for config in configs {
|
||||||
let path = format!("{}", config.unwrap().path().display());
|
let path = format!("{}", config.unwrap().path().display());
|
||||||
if path.contains("abgb") {
|
if path.contains("abgb") {
|
||||||
let (_law_id, mut builder, _) = Config::load(&path).unwrap();
|
let (_, _law_id, mut builder, _) = Config::load(&path).unwrap();
|
||||||
builder.new_header("Nullter Theil. Einleitung");
|
builder.new_header("Nullter Theil. Einleitung");
|
||||||
builder.new_par("§ 1.".into(), Content::Text("My test law text.".into()));
|
builder.new_par("§ 1.".into(), Content::Text("My test law text.".into()));
|
||||||
builder.new_header("Erster Hauptstück. Einleitung");
|
builder.new_header("Erster Hauptstück. Einleitung");
|
||||||
|
11
src/lib.rs
11
src/lib.rs
@ -20,7 +20,18 @@ pub use config::Config;
|
|||||||
mod misc;
|
mod misc;
|
||||||
pub use misc::clear_cache;
|
pub use misc::clear_cache;
|
||||||
pub use misc::Error;
|
pub use misc::Error;
|
||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
pub mod law;
|
pub mod law;
|
||||||
pub mod overview;
|
pub mod overview;
|
||||||
pub mod paragraph;
|
pub mod paragraph;
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub enum Typ {
|
||||||
|
Bund,
|
||||||
|
Land(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_type() -> Typ {
|
||||||
|
Typ::Bund
|
||||||
|
}
|
||||||
|
@ -58,7 +58,7 @@ fn main() {
|
|||||||
} else {
|
} else {
|
||||||
let config = &args.config.unwrap(); // ok, checked with clap
|
let config = &args.config.unwrap(); // ok, checked with clap
|
||||||
if let Some(par_url) = &args.par_url {
|
if let Some(par_url) = &args.par_url {
|
||||||
let (_, mut builder, parser) = Config::load(config).unwrap();
|
let (_, _, mut builder, parser) = Config::load(config).unwrap();
|
||||||
builder.add_classifier(Classifier::new("always-true", Arc::new(always_true)));
|
builder.add_classifier(Classifier::new("always-true", Arc::new(always_true)));
|
||||||
// builder.new_header("initial");
|
// builder.new_header("initial");
|
||||||
parser.parse(par_url, &mut builder).unwrap();
|
parser.parse(par_url, &mut builder).unwrap();
|
||||||
|
@ -23,7 +23,10 @@ use std::path::Path;
|
|||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use tracing::{event, instrument, Level};
|
use tracing::{event, instrument, Level};
|
||||||
|
|
||||||
use crate::misc::{current_date, get_cache_dir, Error};
|
use crate::{
|
||||||
|
misc::{current_date, get_cache_dir, Error},
|
||||||
|
Typ,
|
||||||
|
};
|
||||||
|
|
||||||
use ris_structure::OgdSearchResult;
|
use ris_structure::OgdSearchResult;
|
||||||
|
|
||||||
@ -57,20 +60,20 @@ use ris_structure::OgdSearchResult;
|
|||||||
///
|
///
|
||||||
/// # Example
|
/// # Example
|
||||||
/// ```
|
/// ```
|
||||||
/// use risp::overview::parse;
|
/// use risp::{Typ, overview::parse};
|
||||||
///
|
///
|
||||||
/// let list_with_xml_links_to_paragraphs = parse(10001905).unwrap();
|
/// let list_with_xml_links_to_paragraphs = parse(&Typ::Bund, 10001905).unwrap();
|
||||||
/// assert_eq!(list_with_xml_links_to_paragraphs.len(), 31); // TEG has 31 paragraphs
|
/// assert_eq!(list_with_xml_links_to_paragraphs.len(), 31); // TEG has 31 paragraphs
|
||||||
/// assert_eq!(list_with_xml_links_to_paragraphs[0], "https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12025190/NOR12025190.xml"); // Link to first paragraph
|
/// assert_eq!(list_with_xml_links_to_paragraphs[0], "https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12025190/NOR12025190.xml"); // Link to first paragraph
|
||||||
/// ```
|
/// ```
|
||||||
pub fn parse(law_id: usize) -> Result<Vec<String>, Error> {
|
pub fn parse(typ: &Typ, law_id: usize) -> Result<Vec<String>, Error> {
|
||||||
let mut page = 1;
|
let mut page = 1;
|
||||||
let mut skip = true;
|
let mut skip = true;
|
||||||
let mut ret = Vec::new();
|
let mut ret = Vec::new();
|
||||||
loop {
|
loop {
|
||||||
//info!("=== Fetching overview page #{page} ===");
|
//info!("=== Fetching overview page #{page} ===");
|
||||||
event!(Level::INFO, "Fetching over page #{page}");
|
event!(Level::INFO, "Fetching over page #{page}");
|
||||||
let json = fetch_page(law_id, page)?;
|
let json = fetch_page(typ, law_id, page)?;
|
||||||
let (cont, nodes) = parse_from_str(&json, skip)?;
|
let (cont, nodes) = parse_from_str(&json, skip)?;
|
||||||
for n in nodes {
|
for n in nodes {
|
||||||
ret.push(n.clone());
|
ret.push(n.clone());
|
||||||
@ -111,7 +114,7 @@ fn parse_from_str(content: &str, skip_first: bool) -> Result<(bool, Vec<String>)
|
|||||||
Ok((true, ret))
|
Ok((true, ret))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
|
fn fetch_page(typ: &Typ, overview_id: usize, page: usize) -> Result<String, Error> {
|
||||||
use std::fs;
|
use std::fs;
|
||||||
|
|
||||||
let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?);
|
let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?);
|
||||||
@ -126,15 +129,37 @@ fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
|
|||||||
Level::INFO,
|
Level::INFO,
|
||||||
"Not finding law_id {overview_id} (page {page}) in the cache, downloading..."
|
"Not finding law_id {overview_id} (page {page}) in the cache, downloading..."
|
||||||
);
|
);
|
||||||
let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
|
let (path, application, additional_params) = match typ {
|
||||||
.send_form(&[
|
Typ::Bund => ("Bundesrecht", "BrKons", None),
|
||||||
("Applikation", "BrKons"),
|
Typ::Land(land) => {
|
||||||
("Gesetzesnummer", &format!("{overview_id}")),
|
let additional = if land == "OÖ" {
|
||||||
|
Some(("Bundesland.SucheInOberoesterreich", "true"))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
("Landesrecht", "LrKons", additional)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let overview_id = format!("{overview_id}");
|
||||||
|
let page = format!("{page}");
|
||||||
|
let current_date = current_date();
|
||||||
|
let mut form_params = vec![
|
||||||
|
("Applikation", application),
|
||||||
|
("Gesetzesnummer", &overview_id),
|
||||||
("DokumenteProSeite", "OneHundred"),
|
("DokumenteProSeite", "OneHundred"),
|
||||||
("Seitennummer", &format!("{page}")),
|
("Seitennummer", &page),
|
||||||
("Fassung.FassungVom", ¤t_date()),
|
("Fassung.FassungVom", ¤t_date),
|
||||||
])?
|
];
|
||||||
|
|
||||||
|
if let Some(additional) = additional_params {
|
||||||
|
form_params.push(additional);
|
||||||
|
}
|
||||||
|
|
||||||
|
let data = ureq::post(&format!("https://data.bka.gv.at/ris/api/v2.6/{path}"))
|
||||||
|
.send_form(&form_params)?
|
||||||
.into_string()?;
|
.into_string()?;
|
||||||
|
|
||||||
let path = Path::new(&expected_filename);
|
let path = Path::new(&expected_filename);
|
||||||
if let Some(parent) = path.parent() {
|
if let Some(parent) = path.parent() {
|
||||||
// Try to create the directory (and any necessary parent directories)
|
// Try to create the directory (and any necessary parent directories)
|
||||||
@ -158,9 +183,9 @@ mod tests {
|
|||||||
for config in configs {
|
for config in configs {
|
||||||
let path = format!("{}", config.unwrap().path().display());
|
let path = format!("{}", config.unwrap().path().display());
|
||||||
|
|
||||||
let (law_id, _, _) = Config::load(&path).unwrap();
|
let (typ, law_id, _, _) = Config::load(&path).unwrap();
|
||||||
|
|
||||||
let actual = parse(law_id).unwrap();
|
let actual = parse(&typ, law_id).unwrap();
|
||||||
let expected_path = format!("./data/expected/overview/{law_id}");
|
let expected_path = format!("./data/expected/overview/{law_id}");
|
||||||
match fs::read_to_string(&expected_path) {
|
match fs::read_to_string(&expected_path) {
|
||||||
Ok(expected) => {
|
Ok(expected) => {
|
||||||
|
@ -111,8 +111,16 @@ pub(crate) struct Metadata {
|
|||||||
#[serde(rename = "Allgemein")]
|
#[serde(rename = "Allgemein")]
|
||||||
general: GeneralMetadata,
|
general: GeneralMetadata,
|
||||||
|
|
||||||
|
#[serde(flatten)]
|
||||||
|
law_type: LawType,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
enum LawType {
|
||||||
#[serde(rename = "Bundesrecht")]
|
#[serde(rename = "Bundesrecht")]
|
||||||
fed: FedMetadata,
|
Federal(FedMetadata),
|
||||||
|
#[serde(rename = "Landesrecht")]
|
||||||
|
State(LocMetadata),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
@ -150,6 +158,18 @@ pub(crate) struct GeneralMetadata {
|
|||||||
document_url: String,
|
document_url: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: check if more data is given to me (nom nom nom)
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
#[allow(dead_code)]
|
||||||
|
#[serde(rename_all = "PascalCase")]
|
||||||
|
pub(crate) struct LocMetadata {
|
||||||
|
#[serde(rename = "Kurztitel")]
|
||||||
|
short_title: String,
|
||||||
|
|
||||||
|
#[serde(rename = "Titel")]
|
||||||
|
title: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
#[serde(rename_all = "PascalCase")]
|
#[serde(rename_all = "PascalCase")]
|
||||||
|
@ -105,7 +105,7 @@ impl Parser {
|
|||||||
/// use risp::{Config, law::{Law, Heading, Content, Section, HeadingContent}};
|
/// use risp::{Config, law::{Law, Heading, Content, Section, HeadingContent}};
|
||||||
/// use std::path::Path;
|
/// use std::path::Path;
|
||||||
///
|
///
|
||||||
/// let (_, mut builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
|
/// let (_, _, mut builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
|
||||||
/// let result = parser.parse("https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12017691/NOR12017691.xml", &mut builder).unwrap();
|
/// let result = parser.parse("https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12017691/NOR12017691.xml", &mut builder).unwrap();
|
||||||
///
|
///
|
||||||
/// let law: Law = builder.into();
|
/// let law: Law = builder.into();
|
||||||
@ -223,7 +223,7 @@ mod tests {
|
|||||||
let path = format!("{}", config.unwrap().path().display());
|
let path = format!("{}", config.unwrap().path().display());
|
||||||
println!("Testing {path}");
|
println!("Testing {path}");
|
||||||
|
|
||||||
let (law_id, mut builder, parser) = Config::load(&path).unwrap();
|
let (_, law_id, mut builder, parser) = Config::load(&path).unwrap();
|
||||||
|
|
||||||
let paragraph_path = format!("./data/expected/overview/{law_id}");
|
let paragraph_path = format!("./data/expected/overview/{law_id}");
|
||||||
let expected_path = format!("./data/expected/par/{law_id}");
|
let expected_path = format!("./data/expected/par/{law_id}");
|
||||||
|
Loading…
Reference in New Issue
Block a user