risp/src/overview/mod.rs
philipp 33b04234cb
All checks were successful
CI/CD Pipeline / test (push) Successful in 1m51s
use ascii ' in license text
2024-02-15 16:12:14 +01:00

163 lines
5.5 KiB
Rust

// Copyright (C) 2024 Philipp Hofer
//
// Licensed under the EUPL, Version 1.2 or - as soon they will be approved by
// the European Commission - subsequent versions of the EUPL (the "Licence").
// You may not use this work except in compliance with the Licence.
//
// You should have received a copy of the European Union Public License along
// with this program. If not, you may obtain a copy of the Licence at:
// <https://joinup.ec.europa.eu/software/page/eupl>
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Licence is distributed on an "AS IS" basis,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the Licence for the specific language governing permissions and
// limitations under the Licence.
//! Deals with getting all paragraphs for a given law text
mod ris_structure;
use std::path::Path;
use log::info;
use serde::Deserialize;
use crate::misc::{current_date, get_cache_dir, Error};
use ris_structure::OgdSearchResult;
/// Parses a law text from the Austrian RIS (Rechtsinformationssystem) based on the given `law_id`.
///
/// This function iterates over all pages of the law text, with each page containing a maximum of 100
/// paragraphs in XML format. It extracts and returns the links to each paragraph as XML URLs.
///
/// The first section of the first page is skipped (`skip` is set to true) because it always
/// contains the table of contents. The function continues processing subsequent pages until there
/// are no more pages left to fetch.
///
/// # Parameters
///
/// - `law_id`: The unique identifier of the law in the RIS system.
///
/// # Returns
///
/// - `Ok(Vec<String>)`: A vector of XML file links representing paragraphs from the given law text.
/// - `Err(Error)`: An error if there was an issue fetching or parsing the law text.
///
/// # Example
/// ```
/// use risp::overview::parse;
///
/// let list_with_xml_links_to_paragraphs = parse(10001905).unwrap();
/// assert_eq!(list_with_xml_links_to_paragraphs.len(), 31); // TEG has 31 paragraphs
/// assert_eq!(list_with_xml_links_to_paragraphs[0], "https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12025190/NOR12025190.xml"); // Link to first paragraph
/// ```
pub fn parse(law_id: usize) -> Result<Vec<String>, Error> {
let mut page = 1;
let mut skip = true;
let mut ret = Vec::new();
loop {
info!("=== Fetching overview page #{page} ===");
let json = fetch_page(law_id, page)?;
let (cont, nodes) = parse_from_str(&json, skip)?;
for n in nodes {
ret.push(n.clone());
}
if !cont {
break;
}
skip = false;
page += 1;
}
Ok(ret)
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
struct Overview {
ogd_search_result: OgdSearchResult,
}
fn parse_from_str(content: &str, skip_first: bool) -> Result<(bool, Vec<String>), Error> {
let mut ret = Vec::new();
let wrapper: Overview = serde_json::from_str(content)?;
let iter = wrapper.ogd_search_result.get_par().into_iter();
let boxed_iter: Box<dyn Iterator<Item = String>> = if skip_first {
Box::new(iter.skip(1))
} else {
Box::new(iter)
};
for par in boxed_iter {
ret.push(par);
}
if !wrapper.ogd_search_result.has_next_page() {
return Ok((false, ret));
}
Ok((true, ret))
}
fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
use std::fs;
let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?);
match fs::read_to_string(&expected_filename) {
Ok(data) => Ok(data),
Err(_) => {
info!("Not finding law_id {overview_id} (page {page}) in the cache, downloading...");
let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
.send_form(&[
("Applikation", "BrKons"),
("Gesetzesnummer", &format!("{overview_id}")),
("DokumenteProSeite", "OneHundred"),
("Seitennummer", &format!("{page}")),
("Fassung.FassungVom", &current_date()),
])?
.into_string()?;
let path = Path::new(&expected_filename);
if let Some(parent) = path.parent() {
// Try to create the directory (and any necessary parent directories)
fs::create_dir_all(parent).expect("Unable to create directory");
}
fs::write(expected_filename, &data).expect("Unable to write file");
Ok(data)
}
}
}
#[cfg(test)]
mod tests {
use crate::{config::Config, overview::parse};
use pretty_assertions::assert_eq;
use std::fs;
#[test]
fn overview() {
let configs = fs::read_dir("./data/configs").expect("No folder with config files");
for config in configs {
let path = format!("{}", config.unwrap().path().display());
let (law_id, _, _) = Config::load(&path).unwrap();
let actual = parse(law_id).unwrap();
let expected_path = format!("./data/expected/overview/{law_id}");
match fs::read_to_string(&expected_path) {
Ok(expected) => {
let expected = expected.trim().split('\n').collect::<Vec<&str>>();
assert_eq!(actual, expected);
}
Err(_) => {
let to_write = actual.join("\n");
fs::write(expected_path, to_write).expect("Unable to write file");
}
}
}
}
}