diff --git a/src/law/mod.rs b/src/law/mod.rs index 5a68a76..edb2540 100644 --- a/src/law/mod.rs +++ b/src/law/mod.rs @@ -1,4 +1,5 @@ use log::{debug, info}; +use risp::risparser::overview::parse; use serde::{Deserialize, Serialize}; use std::{ cell::RefCell, @@ -7,7 +8,7 @@ use std::{ sync::Arc, }; -use crate::{overview, par}; +use crate::par; use self::responsible::{ contains, contains_at_start, contains_without_unter, starts_with_letter, starts_with_number, @@ -239,7 +240,7 @@ impl LawBuilder { history: Vec::new(), }; - let paragraphs = overview::parse(law_id.unwrap()).unwrap(); + let paragraphs = parse(law_id.unwrap()).unwrap(); for paragraph in tqdm::tqdm(paragraphs.into_iter()) { let cont = par::parse(¶graph, &mut builder).unwrap(); diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..7c40796 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,2 @@ +pub mod misc; +pub mod risparser; diff --git a/src/main.rs b/src/main.rs index ec16529..c352afd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,8 @@ use law::LawBuilder; mod law; -mod overview; mod par; -mod misc; fn main() { env_logger::init(); diff --git a/src/misc.rs b/src/misc.rs index 815f205..08fe867 100644 --- a/src/misc.rs +++ b/src/misc.rs @@ -1,5 +1,7 @@ use std::io; +use time::{format_description, OffsetDateTime}; + #[derive(Debug)] pub struct Error { msg: String, @@ -33,3 +35,10 @@ impl From for Error { } } } + +/// Returns the current date in YYYY-MM-DD format. Needed for RIS API query to get current version of the overview. +pub(crate) fn current_date() -> String { + let local_date = OffsetDateTime::now_utc(); + let format = format_description::parse("[year]-[month]-[day]").unwrap(); //unwrap okay, supplied format is fine + local_date.format(&format).expect("Failed to format date") +} diff --git a/src/par/mod.rs b/src/par/mod.rs index 8d8cc14..5030d6b 100644 --- a/src/par/mod.rs +++ b/src/par/mod.rs @@ -1,8 +1,8 @@ mod parser; +use crate::{law::LawBuilder, par::parser::Risdok}; use log::{debug, info}; - -use crate::{law::LawBuilder, misc::Error, par::parser::Risdok}; +use risp::misc::Error; fn fetch_page(url: &str) -> Result { Ok(ureq::get(url).call()?.into_string()?) diff --git a/src/par/parser.rs b/src/par/parser.rs index 882e514..60e9897 100644 --- a/src/par/parser.rs +++ b/src/par/parser.rs @@ -1,9 +1,7 @@ +use risp::misc::Error; use roxmltree::Node; -use crate::{ - law::{Content, LawBuilder}, - misc::Error, -}; +use crate::law::{Content, LawBuilder}; #[derive(Debug, PartialEq)] pub(crate) struct Risdok {} diff --git a/src/risparser/mod.rs b/src/risparser/mod.rs new file mode 100644 index 0000000..907bbfd --- /dev/null +++ b/src/risparser/mod.rs @@ -0,0 +1,4 @@ +//! This deals with accessing RIS data. + +pub mod overview; +mod parser; diff --git a/src/overview/mod.rs b/src/risparser/overview.rs similarity index 65% rename from src/overview/mod.rs rename to src/risparser/overview.rs index de1b4df..2b1d557 100644 --- a/src/overview/mod.rs +++ b/src/risparser/overview.rs @@ -1,17 +1,65 @@ -/// This module contains everything everything, to convert the given JSON file into Rust structs using serde. -mod parser; +//! Deals with getting all paragraphs for a given law text use log::info; use serde::Deserialize; -use time::{format_description, OffsetDateTime}; -use crate::{misc::Error, overview::parser::OgdSearchResult}; +use crate::misc::{current_date, Error}; -/// Returns the current date in YYYY-MM-DD format. Needed for RIS API query to get current version of the overview. -fn current_date() -> String { - let local_date = OffsetDateTime::now_utc(); - let format = format_description::parse("[year]-[month]-[day]").unwrap(); //unwrap okay, supplied format is fine - local_date.format(&format).expect("Failed to format date") +use super::parser::OgdSearchResult; + +pub fn parse(law_id: usize) -> Result, Error> { + let mut page = 1; + let mut skip = true; + let mut ret = Vec::new(); + loop { + info!("=== Fetching overview page #{page} ==="); + let json = fetch_page(law_id, page)?; + let (cont, nodes) = parse_from_str(&json, skip)?; + for n in nodes { + ret.push(n.clone()); + } + if !cont { + break; + } + skip = false; + page += 1; + } + + Ok(ret) +} + +#[derive(Deserialize)] +#[serde(rename_all = "PascalCase")] +struct Overview { + ogd_search_result: OgdSearchResult, +} + +#[cfg(test)] +pub(crate) fn parse_from_str_test( + content: &str, + skip_first: bool, +) -> Result<(bool, Vec), Error> { + parse_from_str(content, skip_first) +} + +fn parse_from_str(content: &str, skip_first: bool) -> Result<(bool, Vec), Error> { + let mut ret = Vec::new(); + let wrapper: Overview = serde_json::from_str(content)?; + + let iter = wrapper.ogd_search_result.get_par().into_iter(); + let boxed_iter: Box> = if skip_first { + Box::new(iter.skip(1)) + } else { + Box::new(iter) + }; + for par in boxed_iter { + ret.push(par); + } + + if !wrapper.ogd_search_result.has_next_page() { + return Ok((false, ret)); + } + Ok((true, ret)) } /// Fetches the json content of the given overview (`law_id`) from the RIS API. @@ -31,56 +79,3 @@ fn fetch_page(overview_id: usize, page: usize) -> Result { .into_string()?, ) } - -#[derive(Deserialize)] -#[serde(rename_all = "PascalCase")] -pub(crate) struct Wrapper { - ogd_search_result: OgdSearchResult, -} - -pub(crate) fn parse(overview_id: usize) -> Result, Error> { - let mut page = 1; - let mut skip = true; - let mut ret = Vec::new(); - loop { - info!("=== Fetching overview page #{page} ==="); - let json = fetch_page(overview_id, page)?; - let (cont, nodes) = parse_from_str(&json, skip)?; - for n in nodes { - ret.push(n.clone()); - } - if !cont { - break; - } - skip = false; - page += 1; - } - - Ok(ret) -} - -pub(crate) fn parse_from_str( - content: &str, - skip_first: bool, -) -> Result<(bool, Vec), Error> { - let mut ret = Vec::new(); - let wrapper: Wrapper = serde_json::from_str(content)?; - - let iter = wrapper.ogd_search_result.get_par().into_iter(); - let boxed_iter: Box> = if skip_first { - Box::new(iter.skip(1)) - } else { - Box::new(iter) - }; - for par in boxed_iter { - ret.push(par); - //if !crate::par::parse(&par).unwrap() { - // return Ok(false); - //} - } - - if !wrapper.ogd_search_result.has_next_page() { - return Ok((false, ret)); - } - Ok((true, ret)) -} diff --git a/src/overview/parser.rs b/src/risparser/parser.rs similarity index 98% rename from src/overview/parser.rs rename to src/risparser/parser.rs index 6fb911c..dbb84fe 100644 --- a/src/overview/parser.rs +++ b/src/risparser/parser.rs @@ -200,7 +200,7 @@ mod tests { use log::debug; - use crate::{law::LawBuilder, overview::parse_from_str}; + use crate::risparser::overview::parse_from_str_test; use super::*; @@ -283,7 +283,7 @@ mod tests { file.read_to_string(&mut json).unwrap(); let expected_continue = !(idx == last_index); - let (cont, cur_files) = parse_from_str(&json, skip).unwrap(); + let (cont, cur_files) = parse_from_str_test(&json, skip).unwrap(); assert_eq!(cont, expected_continue); for file in cur_files {