move 'overview' code (getting paragraphs of law text) into lib
All checks were successful
CI/CD Pipeline / test (push) Successful in 36s

This commit is contained in:
philipp 2024-02-04 21:15:15 +01:00
parent 0407a8bfbd
commit ab21651a87
9 changed files with 81 additions and 74 deletions

View File

@ -1,4 +1,5 @@
use log::{debug, info}; use log::{debug, info};
use risp::risparser::overview::parse;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{ use std::{
cell::RefCell, cell::RefCell,
@ -7,7 +8,7 @@ use std::{
sync::Arc, sync::Arc,
}; };
use crate::{overview, par}; use crate::par;
use self::responsible::{ use self::responsible::{
contains, contains_at_start, contains_without_unter, starts_with_letter, starts_with_number, contains, contains_at_start, contains_without_unter, starts_with_letter, starts_with_number,
@ -239,7 +240,7 @@ impl LawBuilder {
history: Vec::new(), history: Vec::new(),
}; };
let paragraphs = overview::parse(law_id.unwrap()).unwrap(); let paragraphs = parse(law_id.unwrap()).unwrap();
for paragraph in tqdm::tqdm(paragraphs.into_iter()) { for paragraph in tqdm::tqdm(paragraphs.into_iter()) {
let cont = par::parse(&paragraph, &mut builder).unwrap(); let cont = par::parse(&paragraph, &mut builder).unwrap();

2
src/lib.rs Normal file
View File

@ -0,0 +1,2 @@
pub mod misc;
pub mod risparser;

View File

@ -1,10 +1,8 @@
use law::LawBuilder; use law::LawBuilder;
mod law; mod law;
mod overview;
mod par; mod par;
mod misc;
fn main() { fn main() {
env_logger::init(); env_logger::init();

View File

@ -1,5 +1,7 @@
use std::io; use std::io;
use time::{format_description, OffsetDateTime};
#[derive(Debug)] #[derive(Debug)]
pub struct Error { pub struct Error {
msg: String, msg: String,
@ -33,3 +35,10 @@ impl From<roxmltree::Error> for Error {
} }
} }
} }
/// Returns the current date in YYYY-MM-DD format. Needed for RIS API query to get current version of the overview.
pub(crate) fn current_date() -> String {
let local_date = OffsetDateTime::now_utc();
let format = format_description::parse("[year]-[month]-[day]").unwrap(); //unwrap okay, supplied format is fine
local_date.format(&format).expect("Failed to format date")
}

View File

@ -1,8 +1,8 @@
mod parser; mod parser;
use crate::{law::LawBuilder, par::parser::Risdok};
use log::{debug, info}; use log::{debug, info};
use risp::misc::Error;
use crate::{law::LawBuilder, misc::Error, par::parser::Risdok};
fn fetch_page(url: &str) -> Result<String, Error> { fn fetch_page(url: &str) -> Result<String, Error> {
Ok(ureq::get(url).call()?.into_string()?) Ok(ureq::get(url).call()?.into_string()?)

View File

@ -1,9 +1,7 @@
use risp::misc::Error;
use roxmltree::Node; use roxmltree::Node;
use crate::{ use crate::law::{Content, LawBuilder};
law::{Content, LawBuilder},
misc::Error,
};
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub(crate) struct Risdok {} pub(crate) struct Risdok {}

4
src/risparser/mod.rs Normal file
View File

@ -0,0 +1,4 @@
//! This deals with accessing RIS data.
pub mod overview;
mod parser;

View File

@ -1,17 +1,65 @@
/// This module contains everything everything, to convert the given JSON file into Rust structs using serde. //! Deals with getting all paragraphs for a given law text
mod parser;
use log::info; use log::info;
use serde::Deserialize; use serde::Deserialize;
use time::{format_description, OffsetDateTime};
use crate::{misc::Error, overview::parser::OgdSearchResult}; use crate::misc::{current_date, Error};
/// Returns the current date in YYYY-MM-DD format. Needed for RIS API query to get current version of the overview. use super::parser::OgdSearchResult;
fn current_date() -> String {
let local_date = OffsetDateTime::now_utc(); pub fn parse(law_id: usize) -> Result<Vec<String>, Error> {
let format = format_description::parse("[year]-[month]-[day]").unwrap(); //unwrap okay, supplied format is fine let mut page = 1;
local_date.format(&format).expect("Failed to format date") let mut skip = true;
let mut ret = Vec::new();
loop {
info!("=== Fetching overview page #{page} ===");
let json = fetch_page(law_id, page)?;
let (cont, nodes) = parse_from_str(&json, skip)?;
for n in nodes {
ret.push(n.clone());
}
if !cont {
break;
}
skip = false;
page += 1;
}
Ok(ret)
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
struct Overview {
ogd_search_result: OgdSearchResult,
}
#[cfg(test)]
pub(crate) fn parse_from_str_test(
content: &str,
skip_first: bool,
) -> Result<(bool, Vec<String>), Error> {
parse_from_str(content, skip_first)
}
fn parse_from_str(content: &str, skip_first: bool) -> Result<(bool, Vec<String>), Error> {
let mut ret = Vec::new();
let wrapper: Overview = serde_json::from_str(content)?;
let iter = wrapper.ogd_search_result.get_par().into_iter();
let boxed_iter: Box<dyn Iterator<Item = String>> = if skip_first {
Box::new(iter.skip(1))
} else {
Box::new(iter)
};
for par in boxed_iter {
ret.push(par);
}
if !wrapper.ogd_search_result.has_next_page() {
return Ok((false, ret));
}
Ok((true, ret))
} }
/// Fetches the json content of the given overview (`law_id`) from the RIS API. /// Fetches the json content of the given overview (`law_id`) from the RIS API.
@ -31,56 +79,3 @@ fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
.into_string()?, .into_string()?,
) )
} }
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct Wrapper {
ogd_search_result: OgdSearchResult,
}
pub(crate) fn parse(overview_id: usize) -> Result<Vec<String>, Error> {
let mut page = 1;
let mut skip = true;
let mut ret = Vec::new();
loop {
info!("=== Fetching overview page #{page} ===");
let json = fetch_page(overview_id, page)?;
let (cont, nodes) = parse_from_str(&json, skip)?;
for n in nodes {
ret.push(n.clone());
}
if !cont {
break;
}
skip = false;
page += 1;
}
Ok(ret)
}
pub(crate) fn parse_from_str(
content: &str,
skip_first: bool,
) -> Result<(bool, Vec<String>), Error> {
let mut ret = Vec::new();
let wrapper: Wrapper = serde_json::from_str(content)?;
let iter = wrapper.ogd_search_result.get_par().into_iter();
let boxed_iter: Box<dyn Iterator<Item = String>> = if skip_first {
Box::new(iter.skip(1))
} else {
Box::new(iter)
};
for par in boxed_iter {
ret.push(par);
//if !crate::par::parse(&par).unwrap() {
// return Ok(false);
//}
}
if !wrapper.ogd_search_result.has_next_page() {
return Ok((false, ret));
}
Ok((true, ret))
}

View File

@ -200,7 +200,7 @@ mod tests {
use log::debug; use log::debug;
use crate::{law::LawBuilder, overview::parse_from_str}; use crate::risparser::overview::parse_from_str_test;
use super::*; use super::*;
@ -283,7 +283,7 @@ mod tests {
file.read_to_string(&mut json).unwrap(); file.read_to_string(&mut json).unwrap();
let expected_continue = !(idx == last_index); let expected_continue = !(idx == last_index);
let (cont, cur_files) = parse_from_str(&json, skip).unwrap(); let (cont, cur_files) = parse_from_str_test(&json, skip).unwrap();
assert_eq!(cont, expected_continue); assert_eq!(cont, expected_continue);
for file in cur_files { for file in cur_files {