127 lines
4.9 KiB
Rust
127 lines
4.9 KiB
Rust
//! Deals with getting all paragraphs for a given law text
|
||
mod parser;
|
||
|
||
use log::info;
|
||
|
||
use crate::{law::LawBuilder, misc::Error};
|
||
|
||
use self::parser::Risdok;
|
||
|
||
pub struct Parser {
|
||
remove: Vec<String>,
|
||
}
|
||
|
||
impl Parser {
|
||
pub fn new() -> Self {
|
||
Self { remove: Vec::new() }
|
||
}
|
||
|
||
pub fn add_string_to_remove(&mut self, data: &str) {
|
||
self.remove.push(data.into());
|
||
}
|
||
|
||
/// Parses the content available in `url`. Calls appropriate functions in supplied `LawBuilder`.
|
||
pub fn parse(&self, url: &str, builder: &mut LawBuilder) -> Result<bool, Error> {
|
||
info!("Parsing {url}");
|
||
let xml = fetch(url)?;
|
||
|
||
self.parse_from_str(&xml, builder)
|
||
}
|
||
|
||
fn parse_from_str(&self, xml: &str, builder: &mut LawBuilder) -> Result<bool, Error> {
|
||
let mut xml = String::from(xml);
|
||
for r in &self.remove {
|
||
xml = xml.replace(r, "");
|
||
}
|
||
|
||
Risdok::from_str(&xml, builder)
|
||
}
|
||
}
|
||
|
||
#[cfg(not(test))]
|
||
fn fetch(url: &str) -> Result<String, Error> {
|
||
Ok(ureq::get(url).call()?.into_string()?)
|
||
}
|
||
|
||
#[cfg(test)]
|
||
fn fetch(url: &str) -> Result<String, Error> {
|
||
use std::{
|
||
fs,
|
||
hash::{DefaultHasher, Hash, Hasher},
|
||
};
|
||
|
||
let mut hasher = DefaultHasher::new();
|
||
url.hash(&mut hasher);
|
||
let hash = format!("{:x}", hasher.finish());
|
||
|
||
let expected_filename = format!("./data/cache/par-{hash}");
|
||
|
||
match fs::read_to_string(&expected_filename) {
|
||
Ok(data) => Ok(data),
|
||
Err(_) => {
|
||
info!("Not finding url {url} in the cache, downloading...");
|
||
let data = ureq::get(url).call()?.into_string()?;
|
||
fs::write(expected_filename, &data).expect("Unable to write file");
|
||
Ok(data)
|
||
}
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
|
||
use std::{fs, sync::Arc};
|
||
|
||
use crate::{
|
||
law::{responsible::contains, Classifier, LawBuilder},
|
||
risparser::paragraph::Parser,
|
||
};
|
||
use pretty_assertions::assert_eq;
|
||
|
||
#[test]
|
||
fn paragraph() {
|
||
let laws = fs::read_dir("./data/expected/overview/")
|
||
.expect("No folder with expected overview files");
|
||
|
||
for law in laws {
|
||
let path = law.unwrap();
|
||
let law_path = format!("{}", path.path().display());
|
||
let law_id = path.file_name().into_string().unwrap().to_string();
|
||
|
||
//TODO: Remove this if once all law texts pass
|
||
if ["10001905".into()].contains(&law_id) {
|
||
let expected_path = format!("./data/expected/par/{law_id}");
|
||
|
||
let pars = fs::read_to_string(law_path).expect("Could not read file {file_path}.");
|
||
let pars = pars.trim().split('\n').collect::<Vec<&str>>();
|
||
|
||
let mut builder = LawBuilder::new("law");
|
||
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)).root());
|
||
|
||
let mut parser = Parser::new();
|
||
parser.add_string_to_remove(r#"<absatz typ="abs" ct="text" halign="j"><i>(§§ 1, 2, 3, 4,6, § 8 Abs. 1 und 2 des Gesetzes vom 16. Februar 1883, RGBl. Nr. 20, betreffend das Verfahren zum Zwecke der Todeserklärung und der Beweisführung des Todes, in der Fassung des Gesetzes vom 31. März 1918, RGBl. Nr. 129:)</i></absatz>"#);
|
||
parser.add_string_to_remove(r#"<absatz typ="abs" ct="text" halign="j"><i>(§§ 1 bis 11 des Gesetzes über die Verschollenheit, die Todeserklärung und die Feststellung der Todeszeit vom 4. Juli 1939, Deutsches RGBl. I S. 1186:)</i></absatz>"#);
|
||
parser.add_string_to_remove(r#"<absatz typ="abs" ct="text" halign="j"><i>(§ 56 Abs. 3 des Gesetzes vom 4. Juli 1939, Deutsches RGBl. I S. 1186:)</i></absatz>"#);
|
||
parser.add_string_to_remove(r#"<absatz typ="abs" ct="text" halign="j"><i>(§ 10 des Gesetzes vom 16. Februar 1883, RGBl. Nr. 20, betreffend das Verfahren zum Zwecke der Todeserklärung und der Beweisführung des Todes, in der Fassung des Gesetzes vom 31. März 1918, RGBl. Nr. 129:)</i></absatz>"#);
|
||
parser.add_string_to_remove(r#"<absatz typ="abs" ct="text" halign="j"><i>(§§ 10a, 10b und 10c des Gesetzes vom 16. Februar 1883, RGBl. Nr. 20, betreffend das Verfahren zum Zwecke der Todeserklärung und der Beweisführung des Todes, in der Fassung des Gesetzes vom 31. März 1918, RGBl. Nr. 129:)</i></absatz>"#);
|
||
parser.add_string_to_remove("<i>");
|
||
parser.add_string_to_remove("</i>");
|
||
for par in pars {
|
||
let cont = parser.parse(par, &mut builder).unwrap();
|
||
if !cont {
|
||
break;
|
||
}
|
||
}
|
||
|
||
let actual = builder.history;
|
||
|
||
let expected = fs::read_to_string(&expected_path)
|
||
.expect(&format!("Could not read file {expected_path}."));
|
||
let expected = expected.trim().split('\n').collect::<Vec<&str>>();
|
||
|
||
assert_eq!(actual, expected);
|
||
}
|
||
}
|
||
}
|
||
}
|