// Copyright (C) 2024 Philipp Hofer // // Licensed under the EUPL, Version 1.2 or - as soon they will be approved by // the European Commission - subsequent versions of the EUPL (the "Licence"). // You may not use this work except in compliance with the Licence. // // You should have received a copy of the European Union Public License along // with this program. If not, you may obtain a copy of the Licence at: // // // Unless required by applicable law or agreed to in writing, software // distributed under the Licence is distributed on an "AS IS" basis, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the Licence for the specific language governing permissions and // limitations under the Licence. #[allow(clippy::wildcard_imports)] // I use *-operator on purpose: I want to receive a compiler // warning if I've not updated my `create_classifier` function use crate::law::{self, responsible::*}; use crate::misc::Error; use crate::paragraph::Parser; use crate::{default_type, Typ}; use crate::{law::ClassifierApplicable, misc}; use serde::Deserialize; use std::fs; use std::path::Path; use std::sync::Arc; use tracing::{event, info, instrument, Level}; // TODO: more generic fn create_classifier(match_function: &str) -> Result { let func: ClassifierApplicable = match match_function { "contains" => Arc::new(contains), "contains_case_sensitive" => Arc::new(contains_case_sensitive), "starts_with_roman_number" => Arc::new(starts_with_roman_number), "contains_at_start" => Arc::new(contains_at_start), "starts_with_number" => Arc::new(starts_with_number), "starts_with_letter" => Arc::new(starts_with_letter), "starts_with_dash" => Arc::new(starts_with_dash), "starts_with_uppercaseletter" => Arc::new(starts_with_uppercaseletter), "contains_without_unter" => Arc::new(contains_without_unter), _ => { return Err(Error::new(&format!( "Unknown match function: {match_function}" ))) } }; Ok(func) } #[derive(Debug, Deserialize)] pub struct Config { law: Law, #[serde(default)] parser: ParserConfig, } impl Config { #[instrument(level = "trace", skip(path))] /// Loads a configuration from a specified path and constructs a `LawBuilder` and `Parser` based on it. /// /// This function reads a configuration file from the given path, expecting it to be in TOML format. It then /// parses the configuration to set up a `LawBuilder` with specified classifiers and a `Parser` with specific /// string manipulation rules (like removing or replacing strings). Additionally, it processes parser settings /// for moving paragraph headers into content if specified. /// /// # Parameters /// /// - `path`: A path to the configuration file. This can be any type that implements the `AsRef` trait, /// allowing for flexible path specifications (e.g., `&str`, `String`, `Path`, or `PathBuf`). /// /// # Returns /// /// Returns a `Result` containing a tuple of the law ID (`usize`), the constructed `LawBuilder`, and the `Parser` /// upon successful operation. If any error occurs during the process (e.g., file reading, TOML parsing, classifier /// creation), it returns an `Error`. /// /// # Errors /// /// This function can return an `Error` in several cases: /// /// - If the specified path does not exist or cannot be read. /// - If the configuration file content is not valid TOML or does not conform to the expected structure. /// - If there's an issue creating any of the classifiers specified in the configuration (e.g., if the `match_function` /// for a classifier fails). /// /// # Examples /// /// ``` /// use risp::Config; /// use std::path::Path; /// /// let (_, law_id, builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap(); /// /// assert_eq!(law_id, 10001622); /// ``` pub fn load + std::fmt::Debug>( path: P, ) -> Result<(Typ, usize, law::Builder, Parser), Error> { info!("Using cache dir: {}", misc::get_cache_dir().unwrap()); let config_str = fs::read_to_string(path)?; let config: Config = toml::from_str(&config_str)?; let mut builder = law::Builder::new(config.law.name, config.law.par_sign); if let Some(classifiers) = config.law.classifiers { for classifier in &classifiers { let to_add = law::Classifier::new( &classifier.name, create_classifier(&classifier.match_function)?, ); if classifier.is_root { builder.add_classifier(to_add.root()); } else { builder.add_classifier(to_add); } } event!( Level::INFO, "Added {} classifiers from config", &classifiers.len() ); } else { builder.no_headers(); event!(Level::INFO, "Assuming law text does not contain headers"); } let mut parser = Parser::new(builder.par_sign.clone()); for to_remove in &config.parser.remove_strings { parser.add_string_to_remove(to_remove); } event!( Level::INFO, "Added {} strings to remove", &config.parser.remove_strings.len() ); for to_replace in &config.parser.replace_rules { parser.add_string_to_replace(&to_replace.find, &to_replace.replace_with); } event!( Level::INFO, "Added {} strings to replace", &config.parser.replace_rules.len() ); if config.parser.move_para_headers_into_content { event!( Level::WARN, "Move para headers into content. Make sure you know what you do!" ); parser.move_para_headers_into_content(); } Ok((config.law.typ, config.law.id, builder, parser)) } } #[derive(Debug, Deserialize)] struct Law { id: usize, name: String, #[serde(default = "default_type")] typ: Typ, par_sign: Option, classifiers: Option>, } #[derive(Debug, Deserialize)] struct Classifier { name: String, is_root: bool, match_function: String, } #[derive(Debug, Deserialize, Default)] struct ParserConfig { /// e.g. used in EheG to transform `§ /// 6` into /// ` /// /// § 1. /// text... /// ///` #[serde(default)] //okay to not have this part in the config move_para_headers_into_content: bool, #[serde(default)] //okay to not have this part in the config remove_strings: Vec, #[serde(default)] //okay to not have this part in the config replace_rules: Vec, } #[derive(Debug, Deserialize)] struct ReplaceRule { find: String, replace_with: String, } #[cfg(test)] mod tests { use std::fs; use super::Config; #[test] fn all_configs_are_deserializable() { let configs = fs::read_dir("./data/configs").expect("No folder with config files"); for config in configs { let path = format!("{}", config.unwrap().path().display()); Config::load(&path).unwrap(); } } }