// Copyright (C) 2024 Philipp Hofer // // Licensed under the EUPL, Version 1.2 or - as soon they will be approved by // the European Commission - subsequent versions of the EUPL (the "Licence"). // You may not use this work except in compliance with the Licence. // // You should have received a copy of the European Union Public License along // with this program. If not, you may obtain a copy of the Licence at: // // // Unless required by applicable law or agreed to in writing, software // distributed under the Licence is distributed on an "AS IS" basis, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the Licence for the specific language governing permissions and // limitations under the Licence. use crate::law::ClassifierApplicable; #[allow(clippy::wildcard_imports)] // I use *-operator on purpose: I want to receive a compiler // warning if I've not updated my `create_classifier` function use crate::law::{self, responsible::*}; use crate::misc::Error; use crate::paragraph::Parser; use serde::Deserialize; use std::fs; use std::path::Path; use std::sync::Arc; // TODO: more generic fn create_classifier(match_function: &str) -> Result { let func: ClassifierApplicable = match match_function { "contains" => Arc::new(contains), "contains_case_sensitive" => Arc::new(contains_case_sensitive), "starts_with_roman_number" => Arc::new(starts_with_roman_number), "contains_at_start" => Arc::new(contains_at_start), "starts_with_number" => Arc::new(starts_with_number), "starts_with_letter" => Arc::new(starts_with_letter), "starts_with_dash" => Arc::new(starts_with_dash), "starts_with_uppercaseletter" => Arc::new(starts_with_uppercaseletter), "contains_without_unter" => Arc::new(contains_without_unter), _ => { return Err(Error::new(&format!( "Unknown match function: {match_function}" ))) } }; Ok(func) } #[derive(Debug, Deserialize)] pub struct Config { law: Law, #[serde(default)] parser: ParserConfig, } impl Config { /// Loads a configuration from a specified path and constructs a `LawBuilder` and `Parser` based on it. /// /// This function reads a configuration file from the given path, expecting it to be in TOML format. It then /// parses the configuration to set up a `LawBuilder` with specified classifiers and a `Parser` with specific /// string manipulation rules (like removing or replacing strings). Additionally, it processes parser settings /// for moving paragraph headers into content if specified. /// /// # Parameters /// /// - `path`: A path to the configuration file. This can be any type that implements the `AsRef` trait, /// allowing for flexible path specifications (e.g., `&str`, `String`, `Path`, or `PathBuf`). /// /// # Returns /// /// Returns a `Result` containing a tuple of the law ID (`usize`), the constructed `LawBuilder`, and the `Parser` /// upon successful operation. If any error occurs during the process (e.g., file reading, TOML parsing, classifier /// creation), it returns an `Error`. /// /// # Errors /// /// This function can return an `Error` in several cases: /// /// - If the specified path does not exist or cannot be read. /// - If the configuration file content is not valid TOML or does not conform to the expected structure. /// - If there's an issue creating any of the classifiers specified in the configuration (e.g., if the `match_function` /// for a classifier fails). /// /// # Examples /// /// ``` /// use risp::Config; /// use std::path::Path; /// /// let (law_id, builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); /// /// assert_eq!(law_id, 10001622); /// ``` pub fn load>(path: P) -> Result<(usize, law::Builder, Parser), Error> { let config_str = fs::read_to_string(path)?; let config: Config = toml::from_str(&config_str)?; let mut builder = law::Builder::new(config.law.name); for classifier in config.law.classifiers { let to_add = law::Classifier::new( &classifier.name, create_classifier(&classifier.match_function)?, ); if classifier.is_root { builder.add_classifier(to_add.root()); } else { builder.add_classifier(to_add); } } let mut parser = Parser::new(); for to_remove in config.parser.remove_strings { parser.add_string_to_remove(&to_remove); } for to_replace in config.parser.replace_rules { parser.add_string_to_replace(&to_replace.find, &to_replace.replace_with); } if config.parser.move_para_headers_into_content { parser.move_para_headers_into_content(); } Ok((config.law.id, builder, parser)) } } #[derive(Debug, Deserialize)] struct Law { id: usize, name: String, classifiers: Vec, } #[derive(Debug, Deserialize)] struct Classifier { name: String, is_root: bool, match_function: String, } #[derive(Debug, Deserialize, Default)] struct ParserConfig { /// e.g. used in EheG to transform `§ /// 6` into /// ` /// /// § 1. /// text... /// ///` #[serde(default)] //okay to not have this part in the config move_para_headers_into_content: bool, #[serde(default)] //okay to not have this part in the config remove_strings: Vec, #[serde(default)] //okay to not have this part in the config replace_rules: Vec, } #[derive(Debug, Deserialize)] struct ReplaceRule { find: String, replace_with: String, } #[cfg(test)] mod tests { use std::fs; use super::Config; #[test] fn all_configs_are_deserializable() { let configs = fs::read_dir("./data/configs").expect("No folder with config files"); for config in configs { let path = format!("{}", config.unwrap().path().display()); Config::load(&path).unwrap(); } } }