use log::{debug, info}; use risp::risparser::overview::parse; use serde::{Deserialize, Serialize}; use std::{ cell::RefCell, fmt::{self, Display}, rc::Rc, sync::Arc, }; use crate::par; use self::responsible::{ contains, contains_at_start, contains_without_unter, starts_with_letter, starts_with_number, starts_with_roman_number, starts_with_uppercaseletter, }; mod responsible; #[derive(Debug, Serialize, Deserialize, PartialEq)] pub(crate) struct Law { name: String, //ABGB, UrhG header: Vec, } impl Law { pub(crate) fn to_md(&self) { println!("# {}", self.name); for header in &self.header { Self::print_md(header, 2); } } fn print_md(header: &Heading, level: usize) { println!("{} {}", "#".repeat(level), header); match &header.content { HeadingContent::Heading(h) => { for child in h { Self::print_md(child, level + 1); } } HeadingContent::Paragraph(p) => { for par in p { println!("{} {par}", "#".repeat(level + 1)); } } } } } impl From for Law { fn from(builder: LawBuilder) -> Self { let mut ret = Vec::new(); for header in builder.header { ret.push(Heading { name: header.borrow().name.clone(), desc: header.borrow().desc.clone(), content: header.borrow().clone().into(), }); } Self { name: builder.name, header: ret, } } } #[derive(Debug, Serialize, Deserialize, PartialEq)] struct Heading { name: String, //1. Hauptstück; 3. Theil; ... desc: Option, content: HeadingContent, // 1. Theil; 1. Subtheil; ... } impl Display for Heading { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if let Some(desc) = &self.desc { f.write_str(&format!("{} ({desc})\n", self.name)) } else { f.write_str(&format!("{}\n", self.name)) } } } #[derive(Debug, Serialize, Deserialize, PartialEq)] enum HeadingContent { Paragraph(Vec
), Heading(Vec), } impl From for HeadingContent { fn from(value: ClassifierInstance) -> Self { if value.sections.is_empty() { let mut ret = Vec::new(); for child in value.children { ret.push(Heading { name: child.borrow().name.clone(), desc: child.borrow().desc.clone(), content: child.borrow().clone().into(), }); } Self::Heading(ret) } else { Self::Paragraph(value.sections) } } } /// Is used to generate a law struct. It's organized mainly by classifier. #[derive(Debug)] pub(crate) struct LawBuilder { /// Name of the law pub(crate) name: String, //ABGB, UrhG /// Structure of the law text pub(crate) classifiers: Vec, /// Instances pub(crate) header: Vec>>, last_instance: Option>>, /// Stores the header of the next paragraph pub(crate) next_para_header: Option, #[cfg(test)] pub(crate) history: Vec, } impl PartialEq for LawBuilder { fn eq(&self, other: &Self) -> bool { self.name == other.name && self.classifiers == other.classifiers && self.header == other.header && self.next_para_header == other.next_para_header } } impl LawBuilder { #[cfg(test)] pub(crate) fn test(name: &str) -> Self { let mut classifiers = Vec::new(); if name == "new" { classifiers.push(Classifier::new("a", Arc::new(&contains)).root()); classifiers.push(Classifier::new("b", Arc::new(&contains))); classifiers.push(Classifier::new("c", Arc::new(&contains))); classifiers.push(Classifier::new("d", Arc::new(&contains))); } else if name == "UrhG" { classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)).root()); classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains))); classifiers.push(Classifier::new("Number", Arc::new(&starts_with_number))); } Self { name: name.into(), classifiers, header: Vec::new(), next_para_header: None, last_instance: None, #[cfg(test)] history: Vec::new(), } } /// Creates a new law builder. Adds classifier for known law texts. pub(crate) fn new(name: &str) -> Self { let mut classifiers = Vec::new(); let mut law_id = None; if name == "UrhG" { law_id = Some(10_001_848); classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)).root()); classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains))); classifiers.push(Classifier::new("Number", Arc::new(&starts_with_number))); } else if name == "MSchG" { law_id = Some(10_002_180); classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)).root()); classifiers.push(Classifier::new("Number", Arc::new(&starts_with_number))); } else if name == "ABGB" { law_id = Some(10_001_622); classifiers.push(Classifier::new("Einleitung", Arc::new(&contains)).root()); classifiers.push(Classifier::new("Theil", Arc::new(&contains)).root()); classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains))); classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains))); classifiers.push(Classifier::new("Abtheilung", Arc::new(&contains))); classifiers.push(Classifier::new("heading", Arc::new(&contains_at_start))); classifiers.push(Classifier::new("letter", Arc::new(&starts_with_letter))); classifiers.push(Classifier::new("num", Arc::new(&starts_with_number))); classifiers.push(Classifier::new("rom", Arc::new(&starts_with_roman_number))); } else if name == "FSG" { law_id = Some(10_003_898); classifiers.push(Classifier::new("Artikel", Arc::new(&contains)).root()); classifiers.push(Classifier::new( "Abschnitt", Arc::new(&contains_without_unter), )); classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains))); classifiers.push(Classifier::new("Unterabschnitt", Arc::new(&contains))); classifiers.push(Classifier::new( "uppercase letter", Arc::new(&starts_with_uppercaseletter), )); classifiers.push(Classifier::new("num", Arc::new(&starts_with_number))); } else if name == "VVG" { law_id = Some(20_004_425); classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)).root()); } else if name == "KSchG" { law_id = Some(10_002_462); classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)).root()); classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains))); } else if name == "StGB" { law_id = Some(10_002_296); classifiers.push(Classifier::new("Teil", Arc::new(&contains)).root()); classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains))); } let mut builder = Self { name: name.into(), classifiers, header: Vec::new(), next_para_header: None, last_instance: None, #[cfg(test)] history: Vec::new(), }; let paragraphs = parse(law_id.unwrap()).unwrap(); for paragraph in tqdm::tqdm(paragraphs.into_iter()) { let cont = par::parse(¶graph, &mut builder).unwrap(); if !cont { break; } } builder } fn responsible_classifier(&self, name: &str) -> Option<&Classifier> { self.classifiers.iter().find(|&c| c.used_for(name)) } fn find_parent( &self, cur: Option>>, class: &Classifier, ) -> Option>> { let mut cur = cur; while let Some(c) = cur { let (cur_name, cur_parent) = { let c_borrow = c.borrow(); (c_borrow.name.clone(), c_borrow.parent.clone()) }; let cur_responsible_class = self.responsible_classifier(&cur_name).unwrap(); if cur_responsible_class == class { return c.borrow_mut().get_parent(); } cur = cur_parent; } None } /// Sets a new header. pub(crate) fn new_header(&mut self, name: &str) { let name = name.trim(); #[cfg(test)] self.history.push(format!("New_header: {name}")); info!("new_header={name}"); let responsible_class = self .responsible_classifier(name) .unwrap_or_else(|| panic!("No classifier for '{name}'")); let mut heading: ClassifierInstance = name.into(); if let Some(last_instance) = &self.last_instance { let cur = Some(last_instance.clone()); let parent = &self.find_parent(cur, responsible_class); match parent { None => { if responsible_class.root { let c = Rc::new(RefCell::new(heading)); self.header.push(c.clone()); self.last_instance = Some(c.clone()); } else { heading.set_parent(last_instance.clone()); let c = Rc::new(RefCell::new(heading)); last_instance.borrow_mut().add_child(c.clone()); self.last_instance = Some(c.clone()); } } Some(parent) => { heading.set_parent(parent.clone()); let c = Rc::new(RefCell::new(heading)); parent.borrow_mut().add_child(c.clone()); self.last_instance = Some(c.clone()); } } } else { let c = Rc::new(RefCell::new(heading)); self.header.push(c.clone()); self.last_instance = Some(c.clone()); } } /// Sets a new description for the last classifier. pub(crate) fn new_desc(&mut self, desc: &str) { let desc = desc.trim(); #[cfg(test)] self.history.push(format!("New desc: {desc}")); debug!("new_desc={desc}"); self.last_instance .clone() .unwrap() .borrow_mut() .set_desc(desc); } /// Adds a new paragraph. pub(crate) fn new_par(&mut self, par: String, content: Content) { #[cfg(test)] self.history.push(format!( "New_par: {par};{}", serde_json::to_string(&content).unwrap() )); debug!("new_par=par:{par};content:{content:#?}"); let par_header = self.next_para_header.clone(); self.next_para_header = None; self.last_instance .clone() .expect("Expect at least one classifier") .borrow_mut() .add_section(Section { symb: par, par_header, content, }); } /// Next paragraph has a header, store its name. pub(crate) fn new_next_para_header(&mut self, header: &str) { #[cfg(test)] self.history.push(format!("New_new_para_header: {header}")); if let Some(next_para_header) = &self.next_para_header { self.new_header(&next_para_header.clone()); // promote to bigger header :-) } debug!("new_next_para_header={header}"); self.next_para_header = Some(header.trim().into()); } } #[derive(Clone, PartialEq, Serialize, Deserialize)] pub(crate) struct Section { pub(crate) symb: String, // §"1", §"2", ... pub(crate) par_header: Option, pub(crate) content: Content, } impl fmt::Debug for Section { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let par_header = self.par_header.as_deref().unwrap_or(""); write!(f, "{} ({})", self.symb, par_header) } } impl fmt::Display for Section { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if let Some(header) = &self.par_header { f.write_str(&format!("{} ({})\n{}", self.symb, header, self.content)) } else { f.write_str(&format!("{}\n{}", self.symb, self.content)) } } } #[derive(Clone, PartialEq)] pub(crate) struct ClassifierInstance { pub(crate) name: String, //e.g. 1 Theilstück pub(crate) desc: Option, pub(crate) sections: Vec
, pub(crate) children: Vec>>, pub(crate) parent: Option>>, } impl ClassifierInstance { fn new(name: &str) -> Self { Self { name: name.into(), desc: None, parent: None, sections: Vec::new(), children: Vec::new(), } } fn set_parent(&mut self, parent: Rc>) { self.parent = Some(parent); } fn get_parent(&self) -> Option>> { self.parent.clone() } fn add_child(&mut self, child: Rc>) { self.children.push(child); } fn set_desc(&mut self, desc: &str) { self.desc = Some(desc.into()); } fn add_section(&mut self, section: Section) { self.sections.push(section); } } impl std::fmt::Debug for ClassifierInstance { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Classifier") .field("name", &self.name) .field("desc", &self.desc) .field("sections", &self.sections) .field("children", &self.children) .finish_non_exhaustive() } } impl From<&str> for ClassifierInstance { fn from(value: &str) -> Self { Self::new(value) } } type ClassifierApplicable = Arc bool>; #[derive(Clone)] pub(crate) struct Classifier { pub(crate) name: String, // Hauptstück, Theil, Abschnitt, ol pub(crate) used_for_fn: ClassifierApplicable, pub(crate) instances: Vec, pub(crate) child: Vec>>, pub(crate) root: bool, } impl PartialEq for Classifier { fn eq(&self, other: &Self) -> bool { self.name == other.name } } impl Classifier { fn new(name: &str, used_for_fn: ClassifierApplicable) -> Self { Self { name: name.into(), used_for_fn, child: Vec::new(), instances: Vec::new(), root: false, } } fn root(self) -> Self { Self { root: true, ..self } } fn used_for(&self, name: &str) -> bool { (self.used_for_fn)(&self.name, name) } } impl std::fmt::Debug for Classifier { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Classifier") .field("name", &self.name) .field("instances", &self.instances) .field("child", &self.child) .finish_non_exhaustive() } } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub(crate) enum Content { Text(String), //This is my direct law text Item(Vec), //(1) This is general law. (2) This is more specific law List(Vec), } impl Display for Content { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Text(a) => f.write_str(&format!("{a}\n")), Self::Item(a) | Self::List(a) => { let mut ret = String::new(); for aa in a { ret.push_str(&format!("{aa}")); } f.write_str(&ret) } } } } #[cfg(test)] mod tests { use pretty_assertions::assert_eq; use std::{ fs::File, io::{self, BufRead, Read}, path::Path, }; use super::*; fn read_lines

(filename: P) -> io::Result> where P: AsRef, { let file = File::open(filename)?; let buf_reader = io::BufReader::new(file); buf_reader.lines().collect() } #[ignore] #[test] fn test_with_live_data() { let law: Law = LawBuilder::new("UrhG").into(); let path = Path::new("./data/urhg/builder.result"); let mut file = File::open(path).unwrap(); let mut json = String::new(); file.read_to_string(&mut json).unwrap(); let expected: Law = serde_json::from_str(&json).unwrap(); assert_eq!(law, expected); } #[ignore] #[test] fn test_stgb_with_live_data() { let law: Law = LawBuilder::new("StGB").into(); let path = Path::new("./data/stgb/builder.result"); let mut file = File::open(path).unwrap(); let mut json = String::new(); file.read_to_string(&mut json).unwrap(); let expected: Law = serde_json::from_str(&json).unwrap(); //println!("{}", serde_json::to_string(&law).unwrap()); assert_eq!(law, expected); } #[test] fn test_builder_full_urhg() { let mut builder = LawBuilder::test("UrhG"); let path = Path::new("./data/urhg/par"); let input = read_lines(path.join("../par.result")).unwrap(); for i in input { let (command, content) = i.split_once(":").unwrap(); match command { "New_header" => builder.new_header(content), "New desc" => builder.new_desc(content), "New_new_para_header" => builder.new_next_para_header(content), "New_par" => { let (par, real_content) = i.split_once(";").unwrap(); let (_, real_par) = par.split_once(":").unwrap(); let real_content: Content = serde_json::from_str(real_content).unwrap(); builder.new_par(real_par.trim().into(), real_content); } _ => { panic!("Don't know command '{command}'"); } } } let actual: Law = builder.into(); //println!("{}", serde_json::to_string(&law).unwrap()); let mut file = File::open(path.join("../builder.result")).unwrap(); let mut json = String::new(); file.read_to_string(&mut json).unwrap(); let expected = serde_json::from_str(&json).unwrap(); assert_eq!(actual, expected); } }