229 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
			
		
		
	
	
			229 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
| // Copyright (C) 2024 Philipp Hofer
 | |
| //
 | |
| // Licensed under the EUPL, Version 1.2 or - as soon they will be approved by
 | |
| // the European Commission - subsequent versions of the EUPL (the "Licence").
 | |
| // You may not use this work except in compliance with the Licence.
 | |
| //
 | |
| // You should have received a copy of the European Union Public License along
 | |
| // with this program.  If not, you may obtain a copy of the Licence at:
 | |
| // <https://joinup.ec.europa.eu/software/page/eupl>
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the Licence is distributed on an "AS IS" basis,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the Licence for the specific language governing permissions and
 | |
| // limitations under the Licence.
 | |
| 
 | |
| #[allow(clippy::wildcard_imports)] // I use *-operator on purpose: I want to receive a compiler
 | |
| // warning if I've not updated my `create_classifier` function
 | |
| use crate::law::{self, responsible::*};
 | |
| use crate::misc::Error;
 | |
| use crate::paragraph::Parser;
 | |
| use crate::{default_type, Typ};
 | |
| use crate::{law::ClassifierApplicable, misc};
 | |
| use serde::Deserialize;
 | |
| use std::fs;
 | |
| use std::path::Path;
 | |
| use std::sync::Arc;
 | |
| use tracing::{event, info, instrument, Level};
 | |
| 
 | |
| // TODO: more generic
 | |
| fn create_classifier(match_function: &str) -> Result<ClassifierApplicable, Error> {
 | |
|     let func: ClassifierApplicable = match match_function {
 | |
|         "contains" => Arc::new(contains),
 | |
|         "contains_case_sensitive" => Arc::new(contains_case_sensitive),
 | |
|         "starts_with_roman_number" => Arc::new(starts_with_roman_number),
 | |
|         "contains_at_start" => Arc::new(contains_at_start),
 | |
|         "starts_with_number" => Arc::new(starts_with_number),
 | |
|         "starts_with_letter" => Arc::new(starts_with_letter),
 | |
|         "starts_with_dash" => Arc::new(starts_with_dash),
 | |
|         "starts_with_uppercaseletter" => Arc::new(starts_with_uppercaseletter),
 | |
|         "contains_without_unter" => Arc::new(contains_without_unter),
 | |
|         _ => {
 | |
|             return Err(Error::new(&format!(
 | |
|                 "Unknown match function: {match_function}"
 | |
|             )))
 | |
|         }
 | |
|     };
 | |
| 
 | |
|     Ok(func)
 | |
| }
 | |
| 
 | |
| #[derive(Debug, Deserialize)]
 | |
| pub struct Config {
 | |
|     law: Law,
 | |
|     #[serde(default)]
 | |
|     parser: ParserConfig,
 | |
| }
 | |
| 
 | |
| impl Config {
 | |
|     #[instrument(level = "trace", skip(path))]
 | |
|     /// Loads a configuration from a specified path and constructs a `LawBuilder` and `Parser` based on it.
 | |
|     ///
 | |
|     /// This function reads a configuration file from the given path, expecting it to be in TOML format. It then
 | |
|     /// parses the configuration to set up a `LawBuilder` with specified classifiers and a `Parser` with specific
 | |
|     /// string manipulation rules (like removing or replacing strings). Additionally, it processes parser settings
 | |
|     /// for moving paragraph headers into content if specified.
 | |
|     ///
 | |
|     /// # Parameters
 | |
|     ///
 | |
|     /// - `path`: A path to the configuration file. This can be any type that implements the `AsRef<Path>` trait,
 | |
|     ///   allowing for flexible path specifications (e.g., `&str`, `String`, `Path`, or `PathBuf`).
 | |
|     ///
 | |
|     /// # Returns
 | |
|     ///
 | |
|     /// Returns a `Result` containing a tuple of the law ID (`usize`), the constructed `LawBuilder`, and the `Parser`
 | |
|     /// upon successful operation. If any error occurs during the process (e.g., file reading, TOML parsing, classifier
 | |
|     /// creation), it returns an `Error`.
 | |
|     ///
 | |
|     /// # Errors
 | |
|     ///
 | |
|     /// This function can return an `Error` in several cases:
 | |
|     ///
 | |
|     /// - If the specified path does not exist or cannot be read.
 | |
|     /// - If the configuration file content is not valid TOML or does not conform to the expected structure.
 | |
|     /// - If there's an issue creating any of the classifiers specified in the configuration (e.g., if the `match_function`
 | |
|     ///   for a classifier fails).
 | |
|     ///
 | |
|     /// # Examples
 | |
|     ///
 | |
|     /// ```
 | |
|     /// use risp::Config;
 | |
|     /// use std::path::Path;
 | |
|     ///
 | |
|     /// let (_, law_id, _,builder, parser) = Config::load(Path::new("data/configs/abgb.toml")).unwrap();
 | |
|     ///
 | |
|     /// assert_eq!(law_id, 10001622);
 | |
|     /// ```    
 | |
|     pub fn load<P: AsRef<Path> + std::fmt::Debug>(
 | |
|         path: P,
 | |
|     ) -> Result<(Typ, usize, String, law::Builder, Parser), Error> {
 | |
|         info!("Using cache dir: {}", misc::get_cache_dir().unwrap());
 | |
| 
 | |
|         let config_str = fs::read_to_string(path)?;
 | |
|         let config: Config = toml::from_str(&config_str)?;
 | |
| 
 | |
|         let mut builder = law::Builder::new(config.law.name, config.law.par_sign);
 | |
|         if let Some(classifiers) = config.law.classifiers {
 | |
|             for classifier in &classifiers {
 | |
|                 let to_add = law::Classifier::new(
 | |
|                     &classifier.name,
 | |
|                     create_classifier(&classifier.match_function)?,
 | |
|                 );
 | |
|                 if classifier.is_root {
 | |
|                     builder.add_classifier(to_add.root());
 | |
|                 } else {
 | |
|                     builder.add_classifier(to_add);
 | |
|                 }
 | |
|             }
 | |
|             event!(
 | |
|                 Level::INFO,
 | |
|                 "Added {} classifiers from config",
 | |
|                 &classifiers.len()
 | |
|             );
 | |
|         } else {
 | |
|             builder.no_headers();
 | |
|             event!(Level::INFO, "Assuming law text does not contain headers");
 | |
|         }
 | |
| 
 | |
|         let mut parser = Parser::new(builder.par_sign.clone());
 | |
| 
 | |
|         for to_remove in &config.parser.remove_strings {
 | |
|             parser.add_string_to_remove(to_remove);
 | |
|         }
 | |
|         event!(
 | |
|             Level::INFO,
 | |
|             "Added {} strings to remove",
 | |
|             &config.parser.remove_strings.len()
 | |
|         );
 | |
| 
 | |
|         for to_replace in &config.parser.replace_rules {
 | |
|             parser.add_string_to_replace(&to_replace.find, &to_replace.replace_with);
 | |
|         }
 | |
|         event!(
 | |
|             Level::INFO,
 | |
|             "Added {} strings to replace",
 | |
|             &config.parser.replace_rules.len()
 | |
|         );
 | |
| 
 | |
|         if config.parser.move_para_headers_into_content {
 | |
|             event!(
 | |
|                 Level::WARN,
 | |
|                 "Move para headers into content. Make sure you know what you do!"
 | |
|             );
 | |
|             parser.move_para_headers_into_content();
 | |
|         }
 | |
|         Ok((
 | |
|             config.law.typ,
 | |
|             config.law.id,
 | |
|             config.law.fassung,
 | |
|             builder,
 | |
|             parser,
 | |
|         ))
 | |
|     }
 | |
| }
 | |
| 
 | |
| #[derive(Debug, Deserialize)]
 | |
| struct Law {
 | |
|     id: usize,
 | |
|     name: String,
 | |
|     #[serde(default = "default_type")]
 | |
|     typ: Typ,
 | |
|     #[serde(default = "today")]
 | |
|     fassung: String,
 | |
|     par_sign: Option<String>,
 | |
|     classifiers: Option<Vec<Classifier>>,
 | |
| }
 | |
| 
 | |
| fn today() -> String {
 | |
|     String::from("today")
 | |
| }
 | |
| 
 | |
| #[derive(Debug, Deserialize)]
 | |
| struct Classifier {
 | |
|     name: String,
 | |
|     is_root: bool,
 | |
|     match_function: String,
 | |
| }
 | |
| 
 | |
| #[derive(Debug, Deserialize, Default)]
 | |
| struct ParserConfig {
 | |
|     /// e.g. used in EheG to transform `<ueberschrift typ="para" ct="text" halign="c">§
 | |
|     /// 6</ueberschrift>` into
 | |
|     /// `
 | |
|     /// <absatz typ="abs" ct="text" halign="j">
 | |
|     /// <gldsym>§ 1.</gldsym>
 | |
|     /// text...
 | |
|     /// </absatz>
 | |
|     ///`
 | |
|     #[serde(default)] //okay to not have this part in the config
 | |
|     move_para_headers_into_content: bool,
 | |
|     #[serde(default)] //okay to not have this part in the config
 | |
|     remove_strings: Vec<String>,
 | |
|     #[serde(default)] //okay to not have this part in the config
 | |
|     replace_rules: Vec<ReplaceRule>,
 | |
| }
 | |
| 
 | |
| #[derive(Debug, Deserialize)]
 | |
| struct ReplaceRule {
 | |
|     find: String,
 | |
|     replace_with: String,
 | |
| }
 | |
| 
 | |
| #[cfg(test)]
 | |
| mod tests {
 | |
|     use std::fs;
 | |
| 
 | |
|     use super::Config;
 | |
| 
 | |
|     #[test]
 | |
|     fn all_configs_are_deserializable() {
 | |
|         let configs = fs::read_dir("./data/configs").expect("No folder with config files");
 | |
| 
 | |
|         for config in configs {
 | |
|             let path = format!("{}", config.unwrap().path().display());
 | |
|             Config::load(&path).unwrap();
 | |
|         }
 | |
|     }
 | |
| }
 |