move 'overview' code (getting paragraphs of law text) into lib
All checks were successful
CI/CD Pipeline / test (push) Successful in 36s
All checks were successful
CI/CD Pipeline / test (push) Successful in 36s
This commit is contained in:
parent
0407a8bfbd
commit
ab21651a87
@ -1,4 +1,5 @@
|
||||
use log::{debug, info};
|
||||
use risp::risparser::overview::parse;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{
|
||||
cell::RefCell,
|
||||
@ -7,7 +8,7 @@ use std::{
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use crate::{overview, par};
|
||||
use crate::par;
|
||||
|
||||
use self::responsible::{
|
||||
contains, contains_at_start, contains_without_unter, starts_with_letter, starts_with_number,
|
||||
@ -239,7 +240,7 @@ impl LawBuilder {
|
||||
history: Vec::new(),
|
||||
};
|
||||
|
||||
let paragraphs = overview::parse(law_id.unwrap()).unwrap();
|
||||
let paragraphs = parse(law_id.unwrap()).unwrap();
|
||||
|
||||
for paragraph in tqdm::tqdm(paragraphs.into_iter()) {
|
||||
let cont = par::parse(¶graph, &mut builder).unwrap();
|
||||
|
2
src/lib.rs
Normal file
2
src/lib.rs
Normal file
@ -0,0 +1,2 @@
|
||||
pub mod misc;
|
||||
pub mod risparser;
|
@ -1,10 +1,8 @@
|
||||
use law::LawBuilder;
|
||||
|
||||
mod law;
|
||||
mod overview;
|
||||
mod par;
|
||||
|
||||
mod misc;
|
||||
fn main() {
|
||||
env_logger::init();
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
use std::io;
|
||||
|
||||
use time::{format_description, OffsetDateTime};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Error {
|
||||
msg: String,
|
||||
@ -33,3 +35,10 @@ impl From<roxmltree::Error> for Error {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the current date in YYYY-MM-DD format. Needed for RIS API query to get current version of the overview.
|
||||
pub(crate) fn current_date() -> String {
|
||||
let local_date = OffsetDateTime::now_utc();
|
||||
let format = format_description::parse("[year]-[month]-[day]").unwrap(); //unwrap okay, supplied format is fine
|
||||
local_date.format(&format).expect("Failed to format date")
|
||||
}
|
||||
|
@ -1,8 +1,8 @@
|
||||
mod parser;
|
||||
|
||||
use crate::{law::LawBuilder, par::parser::Risdok};
|
||||
use log::{debug, info};
|
||||
|
||||
use crate::{law::LawBuilder, misc::Error, par::parser::Risdok};
|
||||
use risp::misc::Error;
|
||||
|
||||
fn fetch_page(url: &str) -> Result<String, Error> {
|
||||
Ok(ureq::get(url).call()?.into_string()?)
|
||||
|
@ -1,9 +1,7 @@
|
||||
use risp::misc::Error;
|
||||
use roxmltree::Node;
|
||||
|
||||
use crate::{
|
||||
law::{Content, LawBuilder},
|
||||
misc::Error,
|
||||
};
|
||||
use crate::law::{Content, LawBuilder};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Risdok {}
|
||||
|
4
src/risparser/mod.rs
Normal file
4
src/risparser/mod.rs
Normal file
@ -0,0 +1,4 @@
|
||||
//! This deals with accessing RIS data.
|
||||
|
||||
pub mod overview;
|
||||
mod parser;
|
@ -1,17 +1,65 @@
|
||||
/// This module contains everything everything, to convert the given JSON file into Rust structs using serde.
|
||||
mod parser;
|
||||
//! Deals with getting all paragraphs for a given law text
|
||||
|
||||
use log::info;
|
||||
use serde::Deserialize;
|
||||
use time::{format_description, OffsetDateTime};
|
||||
|
||||
use crate::{misc::Error, overview::parser::OgdSearchResult};
|
||||
use crate::misc::{current_date, Error};
|
||||
|
||||
/// Returns the current date in YYYY-MM-DD format. Needed for RIS API query to get current version of the overview.
|
||||
fn current_date() -> String {
|
||||
let local_date = OffsetDateTime::now_utc();
|
||||
let format = format_description::parse("[year]-[month]-[day]").unwrap(); //unwrap okay, supplied format is fine
|
||||
local_date.format(&format).expect("Failed to format date")
|
||||
use super::parser::OgdSearchResult;
|
||||
|
||||
pub fn parse(law_id: usize) -> Result<Vec<String>, Error> {
|
||||
let mut page = 1;
|
||||
let mut skip = true;
|
||||
let mut ret = Vec::new();
|
||||
loop {
|
||||
info!("=== Fetching overview page #{page} ===");
|
||||
let json = fetch_page(law_id, page)?;
|
||||
let (cont, nodes) = parse_from_str(&json, skip)?;
|
||||
for n in nodes {
|
||||
ret.push(n.clone());
|
||||
}
|
||||
if !cont {
|
||||
break;
|
||||
}
|
||||
skip = false;
|
||||
page += 1;
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(rename_all = "PascalCase")]
|
||||
struct Overview {
|
||||
ogd_search_result: OgdSearchResult,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn parse_from_str_test(
|
||||
content: &str,
|
||||
skip_first: bool,
|
||||
) -> Result<(bool, Vec<String>), Error> {
|
||||
parse_from_str(content, skip_first)
|
||||
}
|
||||
|
||||
fn parse_from_str(content: &str, skip_first: bool) -> Result<(bool, Vec<String>), Error> {
|
||||
let mut ret = Vec::new();
|
||||
let wrapper: Overview = serde_json::from_str(content)?;
|
||||
|
||||
let iter = wrapper.ogd_search_result.get_par().into_iter();
|
||||
let boxed_iter: Box<dyn Iterator<Item = String>> = if skip_first {
|
||||
Box::new(iter.skip(1))
|
||||
} else {
|
||||
Box::new(iter)
|
||||
};
|
||||
for par in boxed_iter {
|
||||
ret.push(par);
|
||||
}
|
||||
|
||||
if !wrapper.ogd_search_result.has_next_page() {
|
||||
return Ok((false, ret));
|
||||
}
|
||||
Ok((true, ret))
|
||||
}
|
||||
|
||||
/// Fetches the json content of the given overview (`law_id`) from the RIS API.
|
||||
@ -31,56 +79,3 @@ fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
|
||||
.into_string()?,
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(rename_all = "PascalCase")]
|
||||
pub(crate) struct Wrapper {
|
||||
ogd_search_result: OgdSearchResult,
|
||||
}
|
||||
|
||||
pub(crate) fn parse(overview_id: usize) -> Result<Vec<String>, Error> {
|
||||
let mut page = 1;
|
||||
let mut skip = true;
|
||||
let mut ret = Vec::new();
|
||||
loop {
|
||||
info!("=== Fetching overview page #{page} ===");
|
||||
let json = fetch_page(overview_id, page)?;
|
||||
let (cont, nodes) = parse_from_str(&json, skip)?;
|
||||
for n in nodes {
|
||||
ret.push(n.clone());
|
||||
}
|
||||
if !cont {
|
||||
break;
|
||||
}
|
||||
skip = false;
|
||||
page += 1;
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
pub(crate) fn parse_from_str(
|
||||
content: &str,
|
||||
skip_first: bool,
|
||||
) -> Result<(bool, Vec<String>), Error> {
|
||||
let mut ret = Vec::new();
|
||||
let wrapper: Wrapper = serde_json::from_str(content)?;
|
||||
|
||||
let iter = wrapper.ogd_search_result.get_par().into_iter();
|
||||
let boxed_iter: Box<dyn Iterator<Item = String>> = if skip_first {
|
||||
Box::new(iter.skip(1))
|
||||
} else {
|
||||
Box::new(iter)
|
||||
};
|
||||
for par in boxed_iter {
|
||||
ret.push(par);
|
||||
//if !crate::par::parse(&par).unwrap() {
|
||||
// return Ok(false);
|
||||
//}
|
||||
}
|
||||
|
||||
if !wrapper.ogd_search_result.has_next_page() {
|
||||
return Ok((false, ret));
|
||||
}
|
||||
Ok((true, ret))
|
||||
}
|
@ -200,7 +200,7 @@ mod tests {
|
||||
|
||||
use log::debug;
|
||||
|
||||
use crate::{law::LawBuilder, overview::parse_from_str};
|
||||
use crate::risparser::overview::parse_from_str_test;
|
||||
|
||||
use super::*;
|
||||
|
||||
@ -283,7 +283,7 @@ mod tests {
|
||||
file.read_to_string(&mut json).unwrap();
|
||||
|
||||
let expected_continue = !(idx == last_index);
|
||||
let (cont, cur_files) = parse_from_str(&json, skip).unwrap();
|
||||
let (cont, cur_files) = parse_from_str_test(&json, skip).unwrap();
|
||||
assert_eq!(cont, expected_continue);
|
||||
|
||||
for file in cur_files {
|
Loading…
Reference in New Issue
Block a user