This commit is contained in:
133
src/paragraph/mod.rs
Normal file
133
src/paragraph/mod.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
//! Deals with getting all paragraphs for a given law text
|
||||
mod parser;
|
||||
|
||||
use log::info;
|
||||
|
||||
use crate::{law::LawBuilder, misc::Error};
|
||||
|
||||
use self::parser::Risdok;
|
||||
|
||||
pub struct Parser {
|
||||
remove: Vec<String>,
|
||||
replace: Vec<(String, String)>,
|
||||
}
|
||||
|
||||
impl Default for Parser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
remove: Vec::new(),
|
||||
replace: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_string_to_remove(&mut self, data: &str) {
|
||||
self.remove.push(data.into());
|
||||
}
|
||||
|
||||
pub fn add_string_to_replace(&mut self, search: &str, replace: &str) {
|
||||
self.replace.push((search.into(), replace.into()));
|
||||
}
|
||||
|
||||
/// Parses the content available in `url`. Calls appropriate functions in supplied `LawBuilder`.
|
||||
pub fn parse(&self, url: &str, builder: &mut LawBuilder) -> Result<bool, Error> {
|
||||
info!("Parsing {url}");
|
||||
let xml = fetch(url)?;
|
||||
|
||||
let xml = xml.replace('\u{a0}', " ");
|
||||
|
||||
self.parse_from_str(&xml, builder)
|
||||
}
|
||||
|
||||
fn parse_from_str(&self, xml: &str, builder: &mut LawBuilder) -> Result<bool, Error> {
|
||||
let mut xml = String::from(xml);
|
||||
for r in &self.remove {
|
||||
xml = xml.replace(r, "");
|
||||
}
|
||||
for (search, replace) in &self.replace {
|
||||
xml = xml.replace(search, replace);
|
||||
}
|
||||
|
||||
Risdok::from_str(&xml, builder)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(test))]
|
||||
fn fetch(url: &str) -> Result<String, Error> {
|
||||
Ok(ureq::get(url).call()?.into_string()?)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn fetch(url: &str) -> Result<String, Error> {
|
||||
use std::{
|
||||
collections::hash_map::DefaultHasher,
|
||||
fs,
|
||||
hash::{Hash, Hasher},
|
||||
};
|
||||
|
||||
let mut hasher = DefaultHasher::new();
|
||||
url.hash(&mut hasher);
|
||||
let hash = format!("{:x}", hasher.finish());
|
||||
|
||||
let expected_filename = format!("./data/cache/par-{hash}");
|
||||
|
||||
match fs::read_to_string(&expected_filename) {
|
||||
Ok(data) => Ok(data),
|
||||
Err(_) => {
|
||||
info!("Not finding url {url} in the cache, downloading...");
|
||||
let data = ureq::get(url).call()?.into_string()?;
|
||||
fs::write(expected_filename, &data).expect("Unable to write file");
|
||||
Ok(data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use std::{fs};
|
||||
|
||||
use crate::{
|
||||
config::Config,
|
||||
};
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn all_configs_produce_expected_output() {
|
||||
let configs = fs::read_dir("./data/configs").expect("No folder with config files");
|
||||
|
||||
for config in configs {
|
||||
let path = format!("{}", config.unwrap().path().display());
|
||||
|
||||
let (law_id, mut builder, parser) = Config::load(&path).unwrap();
|
||||
|
||||
let paragraph_path = format!("./data/expected/overview/{law_id}");
|
||||
let expected_path = format!("./data/expected/par/{law_id}");
|
||||
|
||||
let pars =
|
||||
fs::read_to_string(paragraph_path).expect("Could not read file {paragraph_path}.");
|
||||
let pars = pars.trim().split('\n').collect::<Vec<&str>>();
|
||||
|
||||
for par in pars {
|
||||
println!("{par}");
|
||||
let cont = parser.parse(par, &mut builder).unwrap();
|
||||
if !cont {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let actual = &builder.history;
|
||||
|
||||
let expected = fs::read_to_string(&expected_path)
|
||||
.unwrap_or_else(|_| panic!("Could not read file {expected_path}."));
|
||||
let expected = expected.trim().split('\n').collect::<Vec<&str>>();
|
||||
|
||||
assert_eq!(actual, &expected);
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user