// Copyright (C) 2024 Philipp Hofer // // Licensed under the EUPL, Version 1.2 or - as soon they will be approved by // the European Commission - subsequent versions of the EUPL (the "Licence"). // You may not use this work except in compliance with the Licence. // // You should have received a copy of the European Union Public License along // with this program. If not, you may obtain a copy of the Licence at: // // // Unless required by applicable law or agreed to in writing, software // distributed under the Licence is distributed on an "AS IS" basis, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the Licence for the specific language governing permissions and // limitations under the Licence. //! Deals with getting all paragraphs for a given law text mod ris_structure; use std::path::Path; use log::info; use serde::Deserialize; use crate::misc::{current_date, get_cache_dir, Error}; use ris_structure::OgdSearchResult; /// Parses a law text from the Austrian RIS (Rechtsinformationssystem) based on the given `law_id`. /// /// This function iterates over all pages of the law text, with each page containing a maximum of 100 /// paragraphs in XML format. It extracts and returns the links to each paragraph as XML URLs. /// /// The first section of the first page is skipped (`skip` is set to true) because it always /// contains the table of contents. The function continues processing subsequent pages until there /// are no more pages left to fetch. /// /// # Parameters /// /// - `law_id`: The unique identifier of the law in the RIS system. /// /// # Returns /// /// - `Ok(Vec)`: A vector of XML file links representing paragraphs from the given law text. /// - `Err(Error)`: An error if there was an issue fetching or parsing the law text. /// /// # Example /// ``` /// use risp::overview::parse; /// /// let list_with_xml_links_to_paragraphs = parse(10001905).unwrap(); /// assert_eq!(list_with_xml_links_to_paragraphs.len(), 31); // TEG has 31 paragraphs /// assert_eq!(list_with_xml_links_to_paragraphs[0], "https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12025190/NOR12025190.xml"); // Link to first paragraph /// ``` pub fn parse(law_id: usize) -> Result, Error> { let mut page = 1; let mut skip = true; let mut ret = Vec::new(); loop { info!("=== Fetching overview page #{page} ==="); let json = fetch_page(law_id, page)?; let (cont, nodes) = parse_from_str(&json, skip)?; for n in nodes { ret.push(n.clone()); } if !cont { break; } skip = false; page += 1; } Ok(ret) } #[derive(Deserialize)] #[serde(rename_all = "PascalCase")] struct Overview { ogd_search_result: OgdSearchResult, } fn parse_from_str(content: &str, skip_first: bool) -> Result<(bool, Vec), Error> { let mut ret = Vec::new(); let wrapper: Overview = serde_json::from_str(content)?; let iter = wrapper.ogd_search_result.get_par().into_iter(); let boxed_iter: Box> = if skip_first { Box::new(iter.skip(1)) } else { Box::new(iter) }; for par in boxed_iter { ret.push(par); } if !wrapper.ogd_search_result.has_next_page() { return Ok((false, ret)); } Ok((true, ret)) } fn fetch_page(overview_id: usize, page: usize) -> Result { use std::fs; let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?); if let Ok(data) = fs::read_to_string(&expected_filename) { Ok(data) } else { info!("Not finding law_id {overview_id} (page {page}) in the cache, downloading..."); let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht") .send_form(&[ ("Applikation", "BrKons"), ("Gesetzesnummer", &format!("{overview_id}")), ("DokumenteProSeite", "OneHundred"), ("Seitennummer", &format!("{page}")), ("Fassung.FassungVom", ¤t_date()), ])? .into_string()?; let path = Path::new(&expected_filename); if let Some(parent) = path.parent() { // Try to create the directory (and any necessary parent directories) fs::create_dir_all(parent).expect("Unable to create directory"); } fs::write(expected_filename, &data).expect("Unable to write file"); Ok(data) } } #[cfg(test)] mod tests { use crate::{config::Config, overview::parse}; use pretty_assertions::assert_eq; use std::fs; #[test] fn overview() { let configs = fs::read_dir("./data/configs").expect("No folder with config files"); for config in configs { let path = format!("{}", config.unwrap().path().display()); let (law_id, _, _) = Config::load(&path).unwrap(); let actual = parse(law_id).unwrap(); let expected_path = format!("./data/expected/overview/{law_id}"); match fs::read_to_string(&expected_path) { Ok(expected) => { let expected = expected.trim().split('\n').collect::>(); assert_eq!(actual, expected); } Err(_) => { let to_write = actual.join("\n"); fs::write(expected_path, to_write).expect("Unable to write file"); } } } } }