This commit is contained in:
@ -102,7 +102,7 @@ impl Config {
|
||||
let config_str = fs::read_to_string(path)?;
|
||||
let config: Config = toml::from_str(&config_str)?;
|
||||
|
||||
let mut builder = law::Builder::new(config.law.name);
|
||||
let mut builder = law::Builder::new(config.law.name, config.law.par_sign);
|
||||
if let Some(classifiers) = config.law.classifiers {
|
||||
for classifier in &classifiers {
|
||||
let to_add = law::Classifier::new(
|
||||
@ -125,7 +125,7 @@ impl Config {
|
||||
event!(Level::INFO, "Assuming law text does not contain headers");
|
||||
}
|
||||
|
||||
let mut parser = Parser::new();
|
||||
let mut parser = Parser::new(builder.par_sign.clone());
|
||||
|
||||
for to_remove in &config.parser.remove_strings {
|
||||
parser.add_string_to_remove(to_remove);
|
||||
@ -160,6 +160,7 @@ impl Config {
|
||||
struct Law {
|
||||
id: usize,
|
||||
name: String,
|
||||
par_sign: Option<String>,
|
||||
classifiers: Option<Vec<Classifier>>,
|
||||
}
|
||||
|
||||
|
@ -198,6 +198,8 @@ pub struct Builder {
|
||||
|
||||
next_para_note: Option<String>,
|
||||
|
||||
pub(crate) par_sign: String,
|
||||
|
||||
#[cfg(test)]
|
||||
pub history: Vec<String>,
|
||||
}
|
||||
@ -212,14 +214,15 @@ impl PartialEq for Builder {
|
||||
|
||||
impl Default for Builder {
|
||||
fn default() -> Self {
|
||||
Self::new(String::new())
|
||||
Self::new(String::new(), None)
|
||||
}
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
#[must_use]
|
||||
/// Creates a new law builder. Adds classifier for known law texts.
|
||||
pub fn new(name: String) -> Self {
|
||||
pub fn new(name: String, par_sign: Option<String>) -> Self {
|
||||
let par_sign = par_sign.unwrap_or(String::from("§"));
|
||||
Self {
|
||||
name,
|
||||
classifiers: Vec::new(),
|
||||
@ -228,6 +231,7 @@ impl Builder {
|
||||
last_instance: None,
|
||||
next_para_note: None,
|
||||
no_headers: false,
|
||||
par_sign,
|
||||
#[cfg(test)]
|
||||
history: Vec::new(),
|
||||
}
|
||||
|
@ -24,7 +24,7 @@ use std::{
|
||||
hash::{DefaultHasher, Hash, Hasher},
|
||||
path::Path,
|
||||
};
|
||||
use tracing::info;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::{
|
||||
law,
|
||||
@ -37,21 +37,23 @@ pub struct Parser {
|
||||
remove: Vec<String>,
|
||||
replace: Vec<(String, String)>,
|
||||
move_para_headers_into_content: bool,
|
||||
pub(crate) par_sign: String,
|
||||
}
|
||||
|
||||
impl Default for Parser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
Self::new(String::from("§"))
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
pub fn new(par_sign: String) -> Self {
|
||||
Self {
|
||||
remove: Vec::new(),
|
||||
replace: Vec::new(),
|
||||
move_para_headers_into_content: false,
|
||||
par_sign,
|
||||
}
|
||||
}
|
||||
|
||||
@ -143,7 +145,7 @@ impl Parser {
|
||||
}
|
||||
|
||||
let xml = if self.move_para_headers_into_content {
|
||||
Self::do_move_para_headers_into_content(&xml)
|
||||
self.do_move_para_headers_into_content(&xml)
|
||||
} else {
|
||||
xml
|
||||
};
|
||||
@ -151,11 +153,12 @@ impl Parser {
|
||||
Risdok::from_str(&xml, builder)
|
||||
}
|
||||
|
||||
fn do_move_para_headers_into_content(xml: &str) -> String {
|
||||
fn do_move_para_headers_into_content(&self, xml: &str) -> String {
|
||||
let mut result = String::from(xml);
|
||||
let ueberschrift_regex = Regex::new(
|
||||
"<ueberschrift typ=\"[^\"]*\" ct=\"[^\"]*\" halign=\"[^\"]*\">(§.*?)</ueberschrift>",
|
||||
)
|
||||
let ueberschrift_regex = Regex::new(&format!(
|
||||
"<ueberschrift typ=\"[^\"]*\" ct=\"[^\"]*\" halign=\"[^\"]*\">({}.*?)</ueberschrift>",
|
||||
self.par_sign
|
||||
))
|
||||
.unwrap();
|
||||
let absatz_regex =
|
||||
Regex::new("<absatz typ=\"[^\"]*\" ct=\"[^\"]*\" halign=\"[^\"]*\">").unwrap();
|
||||
@ -179,6 +182,8 @@ impl Parser {
|
||||
// Remove the <ueberschrift> tag from the result string
|
||||
result.replace_range(cap.get(0).unwrap().range(), "");
|
||||
}
|
||||
|
||||
debug!("{result:#?}");
|
||||
result
|
||||
}
|
||||
}
|
||||
@ -216,6 +221,7 @@ mod tests {
|
||||
|
||||
for config in configs {
|
||||
let path = format!("{}", config.unwrap().path().display());
|
||||
println!("Testing {path}");
|
||||
|
||||
let (law_id, mut builder, parser) = Config::load(&path).unwrap();
|
||||
|
||||
|
@ -346,7 +346,8 @@ pub(crate) struct AbsatzAbs {
|
||||
}
|
||||
impl AbsatzAbs {
|
||||
pub(crate) fn test(n: &Node) -> bool {
|
||||
n.tag_name().name() == "absatz" && n.attribute("typ").unwrap() == "abs"
|
||||
n.tag_name().name() == "absatz"
|
||||
&& (n.attribute("typ").unwrap() == "abs" || n.attribute("typ").unwrap() == "erltext")
|
||||
}
|
||||
pub(crate) fn parse(n: Node) -> Self {
|
||||
assert!(Self::test(&n));
|
||||
|
Reference in New Issue
Block a user