risp/src/config.rs

146 lines
4.6 KiB
Rust
Raw Normal View History

2024-02-15 13:50:58 +01:00
// Copyright (C) 2024 Philipp Hofer
//
2024-02-15 16:12:14 +01:00
// Licensed under the EUPL, Version 1.2 or - as soon they will be approved by
2024-02-15 13:50:58 +01:00
// the European Commission - subsequent versions of the EUPL (the "Licence").
// You may not use this work except in compliance with the Licence.
//
// You should have received a copy of the European Union Public License along
// with this program. If not, you may obtain a copy of the Licence at:
// <https://joinup.ec.europa.eu/software/page/eupl>
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Licence is distributed on an "AS IS" basis,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the Licence for the specific language governing permissions and
// limitations under the Licence.
2024-02-06 14:18:03 +01:00
#[allow(clippy::wildcard_imports)] // I use *-operator on purpose: I want to receive a compiler
// warning if I've not updated my `create_classifier` function
2024-02-06 10:17:14 +01:00
use crate::law::{self, responsible::*};
use crate::law::{ClassifierApplicable, LawBuilder};
use crate::misc::Error;
2024-02-06 11:45:44 +01:00
use crate::paragraph::Parser;
2024-02-06 14:18:03 +01:00
use serde::Deserialize;
use std::fs;
use std::path::Path;
use std::sync::Arc;
2024-02-06 10:17:14 +01:00
// TODO: more generic
fn create_classifier(match_function: &str) -> Result<ClassifierApplicable, Error> {
let func: ClassifierApplicable = match match_function {
"contains" => Arc::new(contains),
2024-02-06 13:20:38 +01:00
"containsCaseSensitive" => Arc::new(contains_case_sensitive),
2024-02-06 10:17:14 +01:00
"starts_with_roman_number" => Arc::new(starts_with_roman_number),
"contains_at_start" => Arc::new(contains_at_start),
"starts_with_number" => Arc::new(starts_with_number),
"starts_with_letter" => Arc::new(starts_with_letter),
2024-02-06 13:10:59 +01:00
"starts_with_dash" => Arc::new(starts_with_dash),
"starts_with_uppercaseletter" => Arc::new(starts_with_uppercaseletter),
2024-02-06 10:17:14 +01:00
"contains_without_unter" => Arc::new(contains_without_unter),
_ => {
return Err(Error::new(&format!(
2024-02-06 14:18:03 +01:00
"Unknown match function: {match_function}"
2024-02-06 10:17:14 +01:00
)))
}
};
Ok(func)
}
#[derive(Debug, Deserialize)]
pub struct Config {
law: Law,
2024-02-06 11:18:40 +01:00
#[serde(default)]
2024-02-06 10:17:14 +01:00
parser: ParserConfig,
}
impl Config {
pub fn load<P: AsRef<Path>>(path: P) -> Result<(usize, LawBuilder, Parser), Error> {
let config_str = fs::read_to_string(path)?;
let config: Config = toml::from_str(&config_str)?;
let mut builder = LawBuilder::new();
for classifier in config.law.classifiers {
let to_add = law::Classifier::new(
&classifier.name,
create_classifier(&classifier.match_function)?,
);
if classifier.is_root {
builder.add_classifier(to_add.root());
} else {
builder.add_classifier(to_add);
}
}
let mut parser = Parser::new();
for to_remove in config.parser.remove_strings {
parser.add_string_to_remove(&to_remove);
}
for to_replace in config.parser.replace_rules {
parser.add_string_to_replace(&to_replace.find, &to_replace.replace_with);
}
if config.parser.move_para_headers_into_content {
parser.move_para_headers_into_content();
}
2024-02-06 10:17:14 +01:00
Ok((config.law.id, builder, parser))
}
}
#[derive(Debug, Deserialize)]
struct Law {
id: usize,
2024-02-17 10:40:00 +01:00
name: String,
2024-02-06 10:17:14 +01:00
classifiers: Vec<Classifier>,
}
#[derive(Debug, Deserialize)]
struct Classifier {
name: String,
is_root: bool,
match_function: String,
}
2024-02-06 11:18:40 +01:00
#[derive(Debug, Deserialize, Default)]
2024-02-06 10:17:14 +01:00
struct ParserConfig {
/// e.g. used in EheG to transform `<ueberschrift typ="para" ct="text" halign="c">§
/// 6</ueberschrift>` into
/// `
/// <absatz typ="abs" ct="text" halign="j">
/// <gldsym>§ 1.</gldsym>
/// text...
/// </absatz>
///`
#[serde(default)] //okay to not have this part in the config
move_para_headers_into_content: bool,
2024-02-06 11:18:40 +01:00
#[serde(default)] //okay to not have this part in the config
2024-02-06 10:17:14 +01:00
remove_strings: Vec<String>,
2024-02-06 11:18:40 +01:00
#[serde(default)] //okay to not have this part in the config
2024-02-06 10:17:14 +01:00
replace_rules: Vec<ReplaceRule>,
}
#[derive(Debug, Deserialize)]
struct ReplaceRule {
find: String,
replace_with: String,
}
2024-02-06 11:18:40 +01:00
#[cfg(test)]
mod tests {
use std::fs;
use super::Config;
#[test]
fn all_configs_are_deserializable() {
let configs = fs::read_dir("./data/configs").expect("No folder with config files");
for config in configs {
let path = format!("{}", config.unwrap().path().display());
Config::load(&path).unwrap();
}
}
}