// Copyright (C) 2024 Philipp Hofer // // Licensed under the EUPL, Version 1.2 or - as soon they will be approved by // the European Commission - subsequent versions of the EUPL (the "Licence"). // You may not use this work except in compliance with the Licence. // // You should have received a copy of the European Union Public License along // with this program. If not, you may obtain a copy of the Licence at: // // // Unless required by applicable law or agreed to in writing, software // distributed under the Licence is distributed on an "AS IS" basis, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the Licence for the specific language governing permissions and // limitations under the Licence. #[allow(clippy::wildcard_imports)] // I use *-operator on purpose: I want to receive a compiler // warning if I've not updated my `create_classifier` function use crate::law::{self, responsible::*}; use crate::law::{ClassifierApplicable, LawBuilder}; use crate::misc::Error; use crate::paragraph::Parser; use serde::Deserialize; use std::fs; use std::path::Path; use std::sync::Arc; // TODO: more generic fn create_classifier(match_function: &str) -> Result { let func: ClassifierApplicable = match match_function { "contains" => Arc::new(contains), "containsCaseSensitive" => Arc::new(contains_case_sensitive), "starts_with_roman_number" => Arc::new(starts_with_roman_number), "contains_at_start" => Arc::new(contains_at_start), "starts_with_number" => Arc::new(starts_with_number), "starts_with_letter" => Arc::new(starts_with_letter), "starts_with_dash" => Arc::new(starts_with_dash), "starts_with_uppercaseletter" => Arc::new(starts_with_uppercaseletter), "contains_without_unter" => Arc::new(contains_without_unter), _ => { return Err(Error::new(&format!( "Unknown match function: {match_function}" ))) } }; Ok(func) } #[derive(Debug, Deserialize)] pub struct Config { law: Law, #[serde(default)] parser: ParserConfig, } impl Config { pub fn load>(path: P) -> Result<(usize, LawBuilder, Parser), Error> { let config_str = fs::read_to_string(path)?; let config: Config = toml::from_str(&config_str)?; let mut builder = LawBuilder::new(); for classifier in config.law.classifiers { let to_add = law::Classifier::new( &classifier.name, create_classifier(&classifier.match_function)?, ); if classifier.is_root { builder.add_classifier(to_add.root()); } else { builder.add_classifier(to_add); } } let mut parser = Parser::new(); for to_remove in config.parser.remove_strings { parser.add_string_to_remove(&to_remove); } for to_replace in config.parser.replace_rules { parser.add_string_to_replace(&to_replace.find, &to_replace.replace_with); } if config.parser.move_para_headers_into_content { parser.move_para_headers_into_content(); } Ok((config.law.id, builder, parser)) } } #[derive(Debug, Deserialize)] struct Law { id: usize, name: String, classifiers: Vec, } #[derive(Debug, Deserialize)] struct Classifier { name: String, is_root: bool, match_function: String, } #[derive(Debug, Deserialize, Default)] struct ParserConfig { /// e.g. used in EheG to transform `§ /// 6` into /// ` /// /// § 1. /// text... /// ///` #[serde(default)] //okay to not have this part in the config move_para_headers_into_content: bool, #[serde(default)] //okay to not have this part in the config remove_strings: Vec, #[serde(default)] //okay to not have this part in the config replace_rules: Vec, } #[derive(Debug, Deserialize)] struct ReplaceRule { find: String, replace_with: String, } #[cfg(test)] mod tests { use std::fs; use super::Config; #[test] fn all_configs_are_deserializable() { let configs = fs::read_dir("./data/configs").expect("No folder with config files"); for config in configs { let path = format!("{}", config.unwrap().path().display()); Config::load(&path).unwrap(); } } }