move 'overview' code (getting paragraphs of law text) into lib
All checks were successful
CI/CD Pipeline / test (push) Successful in 36s
All checks were successful
CI/CD Pipeline / test (push) Successful in 36s
This commit is contained in:
parent
0407a8bfbd
commit
ab21651a87
@ -1,4 +1,5 @@
|
|||||||
use log::{debug, info};
|
use log::{debug, info};
|
||||||
|
use risp::risparser::overview::parse;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::{
|
use std::{
|
||||||
cell::RefCell,
|
cell::RefCell,
|
||||||
@ -7,7 +8,7 @@ use std::{
|
|||||||
sync::Arc,
|
sync::Arc,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::{overview, par};
|
use crate::par;
|
||||||
|
|
||||||
use self::responsible::{
|
use self::responsible::{
|
||||||
contains, contains_at_start, contains_without_unter, starts_with_letter, starts_with_number,
|
contains, contains_at_start, contains_without_unter, starts_with_letter, starts_with_number,
|
||||||
@ -239,7 +240,7 @@ impl LawBuilder {
|
|||||||
history: Vec::new(),
|
history: Vec::new(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let paragraphs = overview::parse(law_id.unwrap()).unwrap();
|
let paragraphs = parse(law_id.unwrap()).unwrap();
|
||||||
|
|
||||||
for paragraph in tqdm::tqdm(paragraphs.into_iter()) {
|
for paragraph in tqdm::tqdm(paragraphs.into_iter()) {
|
||||||
let cont = par::parse(¶graph, &mut builder).unwrap();
|
let cont = par::parse(¶graph, &mut builder).unwrap();
|
||||||
|
2
src/lib.rs
Normal file
2
src/lib.rs
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
pub mod misc;
|
||||||
|
pub mod risparser;
|
@ -1,10 +1,8 @@
|
|||||||
use law::LawBuilder;
|
use law::LawBuilder;
|
||||||
|
|
||||||
mod law;
|
mod law;
|
||||||
mod overview;
|
|
||||||
mod par;
|
mod par;
|
||||||
|
|
||||||
mod misc;
|
|
||||||
fn main() {
|
fn main() {
|
||||||
env_logger::init();
|
env_logger::init();
|
||||||
|
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
|
use time::{format_description, OffsetDateTime};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Error {
|
pub struct Error {
|
||||||
msg: String,
|
msg: String,
|
||||||
@ -33,3 +35,10 @@ impl From<roxmltree::Error> for Error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the current date in YYYY-MM-DD format. Needed for RIS API query to get current version of the overview.
|
||||||
|
pub(crate) fn current_date() -> String {
|
||||||
|
let local_date = OffsetDateTime::now_utc();
|
||||||
|
let format = format_description::parse("[year]-[month]-[day]").unwrap(); //unwrap okay, supplied format is fine
|
||||||
|
local_date.format(&format).expect("Failed to format date")
|
||||||
|
}
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
mod parser;
|
mod parser;
|
||||||
|
|
||||||
|
use crate::{law::LawBuilder, par::parser::Risdok};
|
||||||
use log::{debug, info};
|
use log::{debug, info};
|
||||||
|
use risp::misc::Error;
|
||||||
use crate::{law::LawBuilder, misc::Error, par::parser::Risdok};
|
|
||||||
|
|
||||||
fn fetch_page(url: &str) -> Result<String, Error> {
|
fn fetch_page(url: &str) -> Result<String, Error> {
|
||||||
Ok(ureq::get(url).call()?.into_string()?)
|
Ok(ureq::get(url).call()?.into_string()?)
|
||||||
|
@ -1,9 +1,7 @@
|
|||||||
|
use risp::misc::Error;
|
||||||
use roxmltree::Node;
|
use roxmltree::Node;
|
||||||
|
|
||||||
use crate::{
|
use crate::law::{Content, LawBuilder};
|
||||||
law::{Content, LawBuilder},
|
|
||||||
misc::Error,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub(crate) struct Risdok {}
|
pub(crate) struct Risdok {}
|
||||||
|
4
src/risparser/mod.rs
Normal file
4
src/risparser/mod.rs
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
//! This deals with accessing RIS data.
|
||||||
|
|
||||||
|
pub mod overview;
|
||||||
|
mod parser;
|
@ -1,17 +1,65 @@
|
|||||||
/// This module contains everything everything, to convert the given JSON file into Rust structs using serde.
|
//! Deals with getting all paragraphs for a given law text
|
||||||
mod parser;
|
|
||||||
|
|
||||||
use log::info;
|
use log::info;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use time::{format_description, OffsetDateTime};
|
|
||||||
|
|
||||||
use crate::{misc::Error, overview::parser::OgdSearchResult};
|
use crate::misc::{current_date, Error};
|
||||||
|
|
||||||
/// Returns the current date in YYYY-MM-DD format. Needed for RIS API query to get current version of the overview.
|
use super::parser::OgdSearchResult;
|
||||||
fn current_date() -> String {
|
|
||||||
let local_date = OffsetDateTime::now_utc();
|
pub fn parse(law_id: usize) -> Result<Vec<String>, Error> {
|
||||||
let format = format_description::parse("[year]-[month]-[day]").unwrap(); //unwrap okay, supplied format is fine
|
let mut page = 1;
|
||||||
local_date.format(&format).expect("Failed to format date")
|
let mut skip = true;
|
||||||
|
let mut ret = Vec::new();
|
||||||
|
loop {
|
||||||
|
info!("=== Fetching overview page #{page} ===");
|
||||||
|
let json = fetch_page(law_id, page)?;
|
||||||
|
let (cont, nodes) = parse_from_str(&json, skip)?;
|
||||||
|
for n in nodes {
|
||||||
|
ret.push(n.clone());
|
||||||
|
}
|
||||||
|
if !cont {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
skip = false;
|
||||||
|
page += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(ret)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
#[serde(rename_all = "PascalCase")]
|
||||||
|
struct Overview {
|
||||||
|
ogd_search_result: OgdSearchResult,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub(crate) fn parse_from_str_test(
|
||||||
|
content: &str,
|
||||||
|
skip_first: bool,
|
||||||
|
) -> Result<(bool, Vec<String>), Error> {
|
||||||
|
parse_from_str(content, skip_first)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_from_str(content: &str, skip_first: bool) -> Result<(bool, Vec<String>), Error> {
|
||||||
|
let mut ret = Vec::new();
|
||||||
|
let wrapper: Overview = serde_json::from_str(content)?;
|
||||||
|
|
||||||
|
let iter = wrapper.ogd_search_result.get_par().into_iter();
|
||||||
|
let boxed_iter: Box<dyn Iterator<Item = String>> = if skip_first {
|
||||||
|
Box::new(iter.skip(1))
|
||||||
|
} else {
|
||||||
|
Box::new(iter)
|
||||||
|
};
|
||||||
|
for par in boxed_iter {
|
||||||
|
ret.push(par);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !wrapper.ogd_search_result.has_next_page() {
|
||||||
|
return Ok((false, ret));
|
||||||
|
}
|
||||||
|
Ok((true, ret))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Fetches the json content of the given overview (`law_id`) from the RIS API.
|
/// Fetches the json content of the given overview (`law_id`) from the RIS API.
|
||||||
@ -31,56 +79,3 @@ fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
|
|||||||
.into_string()?,
|
.into_string()?,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
|
||||||
#[serde(rename_all = "PascalCase")]
|
|
||||||
pub(crate) struct Wrapper {
|
|
||||||
ogd_search_result: OgdSearchResult,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn parse(overview_id: usize) -> Result<Vec<String>, Error> {
|
|
||||||
let mut page = 1;
|
|
||||||
let mut skip = true;
|
|
||||||
let mut ret = Vec::new();
|
|
||||||
loop {
|
|
||||||
info!("=== Fetching overview page #{page} ===");
|
|
||||||
let json = fetch_page(overview_id, page)?;
|
|
||||||
let (cont, nodes) = parse_from_str(&json, skip)?;
|
|
||||||
for n in nodes {
|
|
||||||
ret.push(n.clone());
|
|
||||||
}
|
|
||||||
if !cont {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
skip = false;
|
|
||||||
page += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(ret)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn parse_from_str(
|
|
||||||
content: &str,
|
|
||||||
skip_first: bool,
|
|
||||||
) -> Result<(bool, Vec<String>), Error> {
|
|
||||||
let mut ret = Vec::new();
|
|
||||||
let wrapper: Wrapper = serde_json::from_str(content)?;
|
|
||||||
|
|
||||||
let iter = wrapper.ogd_search_result.get_par().into_iter();
|
|
||||||
let boxed_iter: Box<dyn Iterator<Item = String>> = if skip_first {
|
|
||||||
Box::new(iter.skip(1))
|
|
||||||
} else {
|
|
||||||
Box::new(iter)
|
|
||||||
};
|
|
||||||
for par in boxed_iter {
|
|
||||||
ret.push(par);
|
|
||||||
//if !crate::par::parse(&par).unwrap() {
|
|
||||||
// return Ok(false);
|
|
||||||
//}
|
|
||||||
}
|
|
||||||
|
|
||||||
if !wrapper.ogd_search_result.has_next_page() {
|
|
||||||
return Ok((false, ret));
|
|
||||||
}
|
|
||||||
Ok((true, ret))
|
|
||||||
}
|
|
@ -200,7 +200,7 @@ mod tests {
|
|||||||
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
|
|
||||||
use crate::{law::LawBuilder, overview::parse_from_str};
|
use crate::risparser::overview::parse_from_str_test;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@ -283,7 +283,7 @@ mod tests {
|
|||||||
file.read_to_string(&mut json).unwrap();
|
file.read_to_string(&mut json).unwrap();
|
||||||
|
|
||||||
let expected_continue = !(idx == last_index);
|
let expected_continue = !(idx == last_index);
|
||||||
let (cont, cur_files) = parse_from_str(&json, skip).unwrap();
|
let (cont, cur_files) = parse_from_str_test(&json, skip).unwrap();
|
||||||
assert_eq!(cont, expected_continue);
|
assert_eq!(cont, expected_continue);
|
||||||
|
|
||||||
for file in cur_files {
|
for file in cur_files {
|
Loading…
Reference in New Issue
Block a user