use log::debug; use serde::{Deserialize, Serialize}; use std::sync::Arc; use crate::{overview, par}; #[derive(Debug, Serialize, Deserialize, PartialEq)] pub(crate) struct Law { name: String, //ABGB, UrhG header: Vec<Heading>, } #[derive(Debug, Serialize, Deserialize, PartialEq)] struct Heading { name: String, //1. Hauptstück; 3. Theil; ... desc: Option<String>, content: HeadingContent, // 1. Theil; 1. Subtheil; ... } #[derive(Debug, Serialize, Deserialize, PartialEq)] enum HeadingContent { Paragraph(Vec<Section>), Heading(Vec<Heading>), } fn add_from_node(cur: &ClassifierInstance, builder: &LawBuilder) -> Heading { let children = builder.get_by_parent(&cur.name); if !children.is_empty() { let mut ret = Vec::new(); for child in children { ret.push(add_from_node(&child, builder)); } Heading { name: cur.name.clone(), desc: cur.desc.clone(), content: HeadingContent::Heading(ret), } } else { Heading { name: cur.name.clone(), desc: cur.desc.clone(), content: HeadingContent::Paragraph(cur.sections.clone()), } } } impl From<LawBuilder> for Law { fn from(builder: LawBuilder) -> Self { let cur: Vec<Classifier> = builder .classifiers .clone() .into_iter() .filter(|c| c.parent_index.is_none()) .collect(); let mut ret = Vec::new(); for class in cur { for child in class.instances { ret.push(add_from_node(&child, &builder)); } } Self { name: builder.name, header: ret, } } } pub(crate) fn contains(classifier_name: &str, instance_name: &str) -> bool { instance_name .to_lowercase() .contains(&classifier_name.to_lowercase()) } fn starts_with_number(_classifier_name: &str, instance_name: &str) -> bool { matches!(instance_name.trim().as_bytes().first(), Some(c) if c.is_ascii_digit()) } /// Is used to generate a law struct. It's organized mainly by classifier. #[derive(Debug, PartialEq)] pub(crate) struct LawBuilder { /// Name of the law pub(crate) name: String, //ABGB, UrhG /// Structure of the law text pub(crate) classifiers: Vec<Classifier>, pub(crate) last_header_index: Option<usize>, /// Stores the header of the next paragraph pub(crate) next_para_header: Option<String>, #[cfg(test)] pub(crate) history: Vec<String>, } impl LawBuilder { #[cfg(test)] pub(crate) fn test(name: &str) -> Self { let mut last_header_index = None; let mut classifiers = Vec::new(); if name == "UrhG" { let hauptstueck = Classifier::new("Hauptstück", Arc::new(&contains)); classifiers.push(hauptstueck.clone()); let mut abschnitt = Classifier::new("Abschnitt", Arc::new(&contains)); abschnitt.set_parent(0); classifiers.push(abschnitt); let mut numbered_header = Classifier::new("Numbered Header", Arc::new(&starts_with_number)); numbered_header.set_parent(9999); classifiers.push(numbered_header); } else if name == "test" { let h1 = Classifier::new("h1", Arc::new(&contains)); classifiers.push(h1); let mut h2 = Classifier::new("h2", Arc::new(&contains)); h2.set_parent(0); classifiers.push(h2); let mut h3 = Classifier::new("h3", Arc::new(&contains)); h3.set_parent(1); classifiers.push(h3); } else if name == "no-headers" { let mut h1 = Classifier::new("", Arc::new(&contains)); h1.add_instance(ClassifierInstance::new("", 0)); last_header_index = Some(0); classifiers.push(h1); } Self { name: name.into(), classifiers, next_para_header: None, last_header_index, #[cfg(test)] history: Vec::new(), } } /// Creates a new law builder. Adds classifier for known law texts. pub(crate) fn new(name: &str) -> Law { //TODO: return Law (not LawBuilder) let mut classifiers = Vec::new(); let mut law_id = None; if name == "UrhG" { law_id = Some(10001848); let hauptstueck = Classifier::new("Hauptstück", Arc::new(&contains)); classifiers.push(hauptstueck.clone()); let mut abschnitt = Classifier::new("Abschnitt", Arc::new(&contains)); abschnitt.set_parent(0); classifiers.push(abschnitt); let mut numbered_header = Classifier::new("Numbered Header", Arc::new(&starts_with_number)); numbered_header.set_parent(9999); classifiers.push(numbered_header); } else if name == "MSchG" { law_id = Some(10002180); let abschnitt = Classifier::new("Abschnitt", Arc::new(&contains)); classifiers.push(abschnitt.clone()); let mut numbered_header = Classifier::new("Numbered Header", Arc::new(&starts_with_number)); numbered_header.set_parent(0); classifiers.push(numbered_header); } let mut builder = Self { name: name.into(), classifiers, next_para_header: None, last_header_index: None, #[cfg(test)] history: Vec::new(), }; let paragraphs = overview::parse(law_id.unwrap()).unwrap(); for paragraph in paragraphs { let cont = par::parse(¶graph, &mut builder).unwrap(); if !cont { break; } } builder.into() } /// Sets a new header. pub(crate) fn new_header(&mut self, name: &str) { #[cfg(test)] self.history.push(format!("New_header: {name}")); debug!("new_header={name}"); let classifier_index = self .classifiers .iter() .position(|class| class.used_for(name)); match classifier_index { Some(index) => { let mut class = ClassifierInstance::new(name.trim(), index); if self.classifiers[index] .parent_index .is_some_and(|x| x == 9999) { if self.classifiers[self.last_header_index.unwrap()] .parent_index .is_some_and(|x| x == 9999) { class.add_parent( self.classifiers[self.classifiers[self.last_header_index.unwrap()] .instances .last() .unwrap() .parent .clone() .unwrap() .idx] .instances .last() .unwrap(), ) } else { class.add_parent( self.classifiers[self.last_header_index.unwrap()] .instances .last() .unwrap(), ); } } else if let Some(parent) = self.classifiers[index].parent_index { class.add_parent(self.classifiers[parent].instances.last().unwrap()); } self.classifiers[index].add_instance(class); self.last_header_index = Some(index); } None => panic!("No classifier for {name}"), } } /// Sets a new description for the last classifier. pub(crate) fn new_desc(&mut self, desc: &str) { #[cfg(test)] self.history.push(format!("New desc: {desc}")); debug!("new_desc={desc}"); if let Some(index) = self.last_header_index { self.classifiers[index].set_desc(desc); } else { panic!("Not possible"); } } /// Adds a new paragraph. pub(crate) fn new_par(&mut self, par: String, content: Content) { #[cfg(test)] self.history.push(format!( "New_par: {par};{}", serde_json::to_string(&content).unwrap() )); debug!("new_par=par:{par};content:{content:#?}"); if let Some(index) = self.last_header_index { let section = Section { symb: par, content, par_header: self.next_para_header.clone(), }; self.next_para_header = None; self.classifiers[index].add_section(section); } else { panic!("Expected at least one classifier"); } } /// Next paragraph has a header, store its name. pub(crate) fn new_next_para_header(&mut self, header: &str) { #[cfg(test)] self.history.push(format!("New_new_para_header: {header}")); debug!("new_next_para_header={header}"); self.next_para_header = Some(header.trim().into()); } fn get_by_parent(&self, name: &String) -> Vec<ClassifierInstance> { let mut ret = Vec::new(); for class in &self.classifiers { for inst in &class.instances { if let Some(parent) = &inst.parent { if &parent.name == name { ret.push(inst.clone()); } } } } ret } } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub(crate) struct Section { pub(crate) symb: String, // §"1", §"2", ... pub(crate) par_header: Option<String>, pub(crate) content: Content, } //impl fmt::Debug for Section { // fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // let par_header = self.par_header.as_ref().map(String::as_str).unwrap_or(""); // write!(f, "{} ({})", self.symb, par_header) // } //} #[derive(Clone, Debug, PartialEq)] pub(crate) struct ClassifierInstance { pub(crate) name: String, pub(crate) desc: Option<String>, pub(crate) sections: Vec<Section>, pub(crate) parent: Option<Box<ClassifierInstance>>, pub(crate) idx: usize, } impl ClassifierInstance { fn new(name: &str, idx: usize) -> Self { Self { name: name.into(), desc: None, sections: Vec::new(), parent: None, idx, } } fn set_desc(&mut self, desc: &str) { self.desc = Some(desc.into()); } fn add_section(&mut self, section: Section) { self.sections.push(section); } fn add_parent(&mut self, parent: &ClassifierInstance) { self.parent = Some(Box::new(parent.clone())); } } #[derive(Clone)] pub(crate) struct Classifier { pub(crate) name: String, // Hauptstück, Theil, Abschnitt, ol pub(crate) parent_index: Option<usize>, pub(crate) instances: Vec<ClassifierInstance>, pub(crate) used_for_fn: Arc<dyn Fn(&str, &str) -> bool>, } impl PartialEq for Classifier { fn eq(&self, other: &Self) -> bool { self.name == other.name && self.parent_index == other.parent_index && self.instances == other.instances } } impl std::fmt::Debug for Classifier { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Classifier") .field("name", &self.name) .field("parent_index", &self.parent_index) .field("instances", &self.instances) .finish() } } impl Classifier { fn new(name: &str, used_for_fn: Arc<dyn Fn(&str, &str) -> bool>) -> Self { Self { name: name.into(), parent_index: None, instances: Vec::new(), used_for_fn, } } fn set_parent(&mut self, parent: usize) { self.parent_index = Some(parent); } fn add_instance(&mut self, name: ClassifierInstance) { self.instances.push(name); } fn set_desc(&mut self, desc: &str) { self.instances.last_mut().unwrap().set_desc(desc.trim()); } fn used_for(&self, name: &str) -> bool { (self.used_for_fn)(&self.name, name) } fn add_section(&mut self, section: Section) { self.instances.last_mut().unwrap().add_section(section); } } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub(crate) enum Content { Text(String), //This is my direct law text Item(Vec<Content>), //(1) This is general law. (2) This is more specific law List(Vec<Content>), } #[cfg(test)] mod tests { use pretty_assertions::assert_eq; use std::{ fs::File, io::{self, BufRead, Read}, path::Path, }; use super::*; fn read_lines<P>(filename: P) -> io::Result<Vec<String>> where P: AsRef<Path>, { let file = File::open(filename)?; let buf_reader = io::BufReader::new(file); buf_reader.lines().collect() } #[ignore] #[test] fn test_with_live_data() { let law = LawBuilder::new("UrhG"); let path = Path::new("./data/urhg/builder.result"); let mut file = File::open(path).unwrap(); let mut json = String::new(); file.read_to_string(&mut json).unwrap(); let expected: Law = serde_json::from_str(&json).unwrap(); assert_eq!(law, expected); } #[test] fn test_builder_full_urhg() { let mut builder = LawBuilder::test("UrhG"); let path = Path::new("./data/urhg/par"); let input = read_lines(path.join("../par.result")).unwrap(); for i in input { let (command, content) = i.split_once(":").unwrap(); match command { "New_header" => builder.new_header(content), "New desc" => builder.new_desc(content), "New_new_para_header" => builder.new_next_para_header(content), "New_par" => { let (par, real_content) = i.split_once(";").unwrap(); let (_, real_par) = par.split_once(":").unwrap(); let real_content: Content = serde_json::from_str(real_content).unwrap(); builder.new_par(real_par.trim().into(), real_content); } _ => { panic!("Don't know command '{command}'"); } } } let actual: Law = builder.into(); //println!("{}", serde_json::to_string(&law).unwrap()); let mut file = File::open(path.join("../builder.result")).unwrap(); let mut json = String::new(); file.read_to_string(&mut json).unwrap(); let expected = serde_json::from_str(&json).unwrap(); assert_eq!(actual, expected); } }