not needing to specify header levels

This commit is contained in:
philipp 2023-11-06 23:45:29 +01:00
parent 780ea74c2c
commit ec91de2eae
3 changed files with 265 additions and 258 deletions

View File

@ -1,7 +1,9 @@
use log::debug; use log::{debug, error, info};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{ use std::{
cell::RefCell,
fmt::{self, Display}, fmt::{self, Display},
rc::Rc,
sync::Arc, sync::Arc,
}; };
@ -13,73 +15,6 @@ pub(crate) struct Law {
header: Vec<Heading>, header: Vec<Heading>,
} }
#[derive(Debug, Serialize, Deserialize, PartialEq)]
struct Heading {
name: String, //1. Hauptstück; 3. Theil; ...
desc: Option<String>,
content: HeadingContent, // 1. Theil; 1. Subtheil; ...
}
impl Display for Heading {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(desc) = &self.desc {
f.write_str(&format!("{} ({desc})\n", self.name))
} else {
f.write_str(&format!("{}\n", self.name))
}
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
enum HeadingContent {
Paragraph(Vec<Section>),
Heading(Vec<Heading>),
}
fn add_from_node(cur: &ClassifierInstance, builder: &LawBuilder) -> Heading {
let children = builder.get_by_parent(&cur.name);
if !children.is_empty() {
let mut ret = Vec::new();
for child in children {
ret.push(add_from_node(&child, builder));
}
Heading {
name: cur.name.clone(),
desc: cur.desc.clone(),
content: HeadingContent::Heading(ret),
}
} else {
Heading {
name: cur.name.clone(),
desc: cur.desc.clone(),
content: HeadingContent::Paragraph(cur.sections.clone()),
}
}
}
impl From<LawBuilder> for Law {
fn from(builder: LawBuilder) -> Self {
let cur: Vec<Classifier> = builder
.classifiers
.clone()
.into_iter()
.filter(|c| c.parent_index.is_none())
.collect();
let mut ret = Vec::new();
for class in cur {
for child in class.instances {
ret.push(add_from_node(&child, &builder));
}
}
Self {
name: builder.name,
header: ret,
}
}
}
impl Law { impl Law {
pub(crate) fn to_md(&self) { pub(crate) fn to_md(&self) {
println!("# {}", self.name); println!("# {}", self.name);
@ -106,6 +41,88 @@ impl Law {
} }
} }
impl From<LawBuilder> for Law {
fn from(builder: LawBuilder) -> Self {
let mut ret = Vec::new();
for header in builder.header {
ret.push(Heading {
name: header.borrow().name.clone(),
desc: header.borrow().desc.clone(),
content: header.borrow().clone().into(),
})
}
Self {
name: builder.name,
header: ret,
}
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
struct Heading {
name: String, //1. Hauptstück; 3. Theil; ...
desc: Option<String>,
content: HeadingContent, // 1. Theil; 1. Subtheil; ...
}
impl Display for Heading {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(desc) = &self.desc {
f.write_str(&format!("{} ({desc})\n", self.name))
} else {
f.write_str(&format!("{}\n", self.name))
}
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
enum HeadingContent {
Paragraph(Vec<Section>),
Heading(Vec<Heading>),
}
impl From<ClassifierInstance> for HeadingContent {
fn from(value: ClassifierInstance) -> Self {
if value.sections.is_empty() {
let mut ret = Vec::new();
for child in value.children {
ret.push(Heading {
name: child.borrow().name.clone(),
desc: child.borrow().desc.clone(),
content: child.borrow().clone().into(),
})
}
Self::Heading(ret)
} else {
Self::Paragraph(value.sections)
}
}
}
fn add_from_node(cur: &ClassifierInstance, builder: &LawBuilder) -> Heading {
let children = builder.get_by_parent(&cur.name);
if !children.is_empty() {
let mut ret = Vec::new();
for child in children {
ret.push(add_from_node(&child, builder));
}
Heading {
name: cur.name.clone(),
desc: cur.desc.clone(),
content: HeadingContent::Heading(ret),
}
} else {
Heading {
name: cur.name.clone(),
desc: cur.desc.clone(),
content: HeadingContent::Paragraph(cur.sections.clone()),
}
}
}
pub(crate) fn contains(classifier_name: &str, instance_name: &str) -> bool { pub(crate) fn contains(classifier_name: &str, instance_name: &str) -> bool {
instance_name instance_name
.to_lowercase() .to_lowercase()
@ -127,7 +144,7 @@ fn starts_with_uppercaseletter(_classifier_name: &str, instance_name: &str) -> b
} }
/// Is used to generate a law struct. It's organized mainly by classifier. /// Is used to generate a law struct. It's organized mainly by classifier.
#[derive(Debug, PartialEq)] #[derive(Debug)]
pub(crate) struct LawBuilder { pub(crate) struct LawBuilder {
/// Name of the law /// Name of the law
pub(crate) name: String, //ABGB, UrhG pub(crate) name: String, //ABGB, UrhG
@ -135,7 +152,10 @@ pub(crate) struct LawBuilder {
/// Structure of the law text /// Structure of the law text
pub(crate) classifiers: Vec<Classifier>, pub(crate) classifiers: Vec<Classifier>,
pub(crate) last_header_index: Option<usize>, /// Instances
pub(crate) header: Vec<Rc<RefCell<ClassifierInstance>>>,
last_instance: Option<Rc<RefCell<ClassifierInstance>>>,
/// Stores the header of the next paragraph /// Stores the header of the next paragraph
pub(crate) next_para_header: Option<String>, pub(crate) next_para_header: Option<String>,
@ -144,45 +164,36 @@ pub(crate) struct LawBuilder {
pub(crate) history: Vec<String>, pub(crate) history: Vec<String>,
} }
impl LawBuilder { impl PartialEq for LawBuilder {
#[cfg(test)] fn eq(&self, other: &Self) -> bool {
pub(crate) fn test(name: &str) -> Self { self.name == other.name
let mut last_header_index = None; && self.classifiers == other.classifiers
let mut classifiers = Vec::new(); && self.header == other.header
if name == "UrhG" { && self.next_para_header == other.next_para_header
let hauptstueck = Classifier::new("Hauptstück", Arc::new(&contains));
classifiers.push(hauptstueck.clone());
let mut abschnitt = Classifier::new("Abschnitt", Arc::new(&contains));
abschnitt.set_parent(0);
classifiers.push(abschnitt);
let mut numbered_header =
Classifier::new("Numbered Header", Arc::new(&starts_with_number));
numbered_header.set_parent(9999);
classifiers.push(numbered_header);
} else if name == "test" {
let h1 = Classifier::new("h1", Arc::new(&contains));
classifiers.push(h1);
let mut h2 = Classifier::new("h2", Arc::new(&contains));
h2.set_parent(0);
classifiers.push(h2);
let mut h3 = Classifier::new("h3", Arc::new(&contains));
h3.set_parent(1);
classifiers.push(h3);
} else if name == "no-headers" {
let mut h1 = Classifier::new("", Arc::new(&contains));
h1.add_instance(ClassifierInstance::new("", 0));
last_header_index = Some(0);
classifiers.push(h1);
} }
}
impl LawBuilder {
pub(crate) fn test(name: &str) -> Self {
let mut classifiers = Vec::new();
if name == "new" {
classifiers.push(Classifier::new("a", Arc::new(&contains)).root());
classifiers.push(Classifier::new("b", Arc::new(&contains)));
classifiers.push(Classifier::new("c", Arc::new(&contains)));
classifiers.push(Classifier::new("d", Arc::new(&contains)));
} else if name == "UrhG" {
classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)).root());
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
classifiers.push(Classifier::new("Number", Arc::new(&starts_with_number)));
}
Self { Self {
name: name.into(), name: name.into(),
classifiers, classifiers,
header: Vec::new(),
next_para_header: None, next_para_header: None,
last_header_index, last_instance: None,
#[cfg(test)] #[cfg(test)]
history: Vec::new(), history: Vec::new(),
} }
@ -190,73 +201,23 @@ impl LawBuilder {
/// Creates a new law builder. Adds classifier for known law texts. /// Creates a new law builder. Adds classifier for known law texts.
pub(crate) fn new(name: &str) -> Law { pub(crate) fn new(name: &str) -> Law {
//TODO: return Law (not LawBuilder)
let mut classifiers = Vec::new(); let mut classifiers = Vec::new();
let mut law_id = None; let mut law_id = None;
if name == "UrhG" { if name == "UrhG" {
law_id = Some(10001848); law_id = Some(10001848);
let hauptstueck = Classifier::new("Hauptstück", Arc::new(&contains)); classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)).root());
classifiers.push(hauptstueck.clone()); classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
classifiers.push(Classifier::new("Number", Arc::new(&starts_with_number)));
let mut abschnitt = Classifier::new("Abschnitt", Arc::new(&contains));
abschnitt.set_parent(0);
classifiers.push(abschnitt);
let mut numbered_header =
Classifier::new("Numbered Header", Arc::new(&starts_with_number));
numbered_header.set_parent(9999);
classifiers.push(numbered_header);
} else if name == "MSchG" {
law_id = Some(10002180);
let abschnitt = Classifier::new("Abschnitt", Arc::new(&contains));
classifiers.push(abschnitt.clone());
let mut numbered_header =
Classifier::new("Numbered Header", Arc::new(&starts_with_number));
numbered_header.set_parent(0);
classifiers.push(numbered_header);
} else if name == "FSG" {
law_id = Some(10003898);
let artikel = Classifier::new("Artikel", Arc::new(&contains));
classifiers.push(artikel);
let mut abschnitt = Classifier::new(" Abschnitt", Arc::new(&contains));
abschnitt.set_parent(0);
classifiers.push(abschnitt);
let mut unterabschnitt = Classifier::new("Unterabschnitt", Arc::new(&contains));
unterabschnitt.set_parent(1);
classifiers.push(unterabschnitt);
let mut hauptstueck = Classifier::new("Hauptstück", Arc::new(&contains));
hauptstueck.set_parent(9999);
classifiers.push(hauptstueck);
let mut numbered_header =
Classifier::new("Numbered Header", Arc::new(&starts_with_letter));
numbered_header.set_parent(9999);
classifiers.push(numbered_header);
let mut uppercase_headers =
Classifier::new("Numbered Header", Arc::new(&starts_with_uppercaseletter));
uppercase_headers.set_parent(9999);
classifiers.push(uppercase_headers);
let mut number_header =
Classifier::new("Numbered Header", Arc::new(&starts_with_number));
number_header.set_parent(9999);
classifiers.push(number_header);
} }
let mut builder = Self { let mut builder = Self {
name: name.into(), name: name.into(),
classifiers, classifiers,
header: Vec::new(),
next_para_header: None, next_para_header: None,
last_header_index: None, last_instance: None,
#[cfg(test)] #[cfg(test)]
history: Vec::new(), history: Vec::new(),
}; };
@ -273,71 +234,95 @@ impl LawBuilder {
builder.into() builder.into()
} }
fn responsible_classifier(&self, name: &str) -> Option<&Classifier> {
for c in &self.classifiers {
if c.used_for(name) {
return Some(&c);
}
}
None
}
fn find_parent(
&self,
cur: Option<Rc<RefCell<ClassifierInstance>>>,
class: &Classifier,
) -> Option<Rc<RefCell<ClassifierInstance>>> {
let mut cur = cur;
while let Some(c) = cur {
let (cur_name, cur_parent) = {
let c_borrow = c.borrow();
(c_borrow.name.clone(), c_borrow.parent.clone())
};
let cur_responsible_class = self.responsible_classifier(&cur_name).unwrap();
if cur_responsible_class == class {
return c.borrow_mut().get_parent();
}
cur = cur_parent;
}
None
}
/// Sets a new header. /// Sets a new header.
pub(crate) fn new_header(&mut self, name: &str) { pub(crate) fn new_header(&mut self, name: &str) {
let name = name.trim();
#[cfg(test)] #[cfg(test)]
self.history.push(format!("New_header: {name}")); self.history.push(format!("New_header: {name}"));
debug!("new_header={name}");
let classifier_index = self
.classifiers
.iter()
.position(|class| class.used_for(name));
match classifier_index { info!("new_header={name}");
Some(index) => {
let mut class = ClassifierInstance::new(name.trim(), index);
if self.classifiers[index] let responsible_class = self
.parent_index .responsible_classifier(name)
.is_some_and(|x| x == 9999) .expect(&format!("No classifier for '{name}'"));
{
if self.classifiers[self.last_header_index.unwrap()] let mut heading: ClassifierInstance = name.into();
.parent_index
.is_some_and(|x| x == 9999) if let Some(last_instance) = &self.last_instance {
{ let cur = Some(last_instance.clone());
class.add_parent(
self.classifiers[self.classifiers[self.last_header_index.unwrap()] let parent = &self.find_parent(cur, responsible_class);
.instances println!("parnet={parent:#?}");
.last() match parent {
.unwrap() None => {
.parent if responsible_class.root {
.clone() let c = Rc::new(RefCell::new(heading));
.unwrap() self.header.push(c.clone());
.idx] self.last_instance = Some(c.clone());
.instances
.last()
.unwrap(),
)
} else { } else {
class.add_parent( heading.set_parent(last_instance.clone());
self.classifiers[self.last_header_index.unwrap()] let c = Rc::new(RefCell::new(heading));
.instances last_instance.borrow_mut().add_child(c.clone());
.last() self.last_instance = Some(c.clone());
.unwrap(),
);
} }
} else if let Some(parent) = self.classifiers[index].parent_index {
class.add_parent(self.classifiers[parent].instances.last().unwrap());
} }
Some(parent) => {
self.classifiers[index].add_instance(class); heading.set_parent(parent.clone());
self.last_header_index = Some(index); let c = Rc::new(RefCell::new(heading));
parent.borrow_mut().add_child(c.clone());
self.last_instance = Some(c.clone());
} }
None => panic!("No classifier for {name}"), }
} else {
let c = Rc::new(RefCell::new(heading));
self.header.push(c.clone());
self.last_instance = Some(c.clone());
} }
} }
/// Sets a new description for the last classifier. /// Sets a new description for the last classifier.
pub(crate) fn new_desc(&mut self, desc: &str) { pub(crate) fn new_desc(&mut self, desc: &str) {
let desc = desc.trim();
#[cfg(test)] #[cfg(test)]
self.history.push(format!("New desc: {desc}")); self.history.push(format!("New desc: {desc}"));
debug!("new_desc={desc}"); debug!("new_desc={desc}");
if let Some(index) = self.last_header_index { self.last_instance
self.classifiers[index].set_desc(desc); .clone()
} else { .unwrap()
panic!("Not possible"); .borrow_mut()
} .set_desc(desc);
} }
/// Adds a new paragraph. /// Adds a new paragraph.
@ -349,17 +334,19 @@ impl LawBuilder {
)); ));
debug!("new_par=par:{par};content:{content:#?}"); debug!("new_par=par:{par};content:{content:#?}");
if let Some(index) = self.last_header_index {
let section = Section { let par_header = self.next_para_header.clone();
symb: par,
content,
par_header: self.next_para_header.clone(),
};
self.next_para_header = None; self.next_para_header = None;
self.classifiers[index].add_section(section);
} else { self.last_instance
panic!("Expected at least one classifier"); .clone()
} .expect("Expect at least one classifier")
.borrow_mut()
.add_section(Section {
symb: par,
par_header,
content,
})
} }
/// Next paragraph has a header, store its name. /// Next paragraph has a header, store its name.
@ -373,15 +360,15 @@ impl LawBuilder {
fn get_by_parent(&self, name: &String) -> Vec<ClassifierInstance> { fn get_by_parent(&self, name: &String) -> Vec<ClassifierInstance> {
let mut ret = Vec::new(); let mut ret = Vec::new();
for class in &self.classifiers { // for class in &self.classifiers {
for inst in &class.instances { // for inst in &class.instances {
if let Some(parent) = &inst.parent { // if let Some(parent) = &inst.parent {
if &parent.name == name { // if &parent.name == name {
ret.push(inst.clone()); // ret.push(inst.clone());
} // }
} // }
} // }
} // }
ret ret
} }
@ -411,26 +398,38 @@ impl fmt::Display for Section {
} }
} }
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, PartialEq)]
pub(crate) struct ClassifierInstance { pub(crate) struct ClassifierInstance {
pub(crate) name: String, pub(crate) name: String, //e.g. 1 Theilstück
pub(crate) desc: Option<String>, pub(crate) desc: Option<String>,
pub(crate) sections: Vec<Section>, pub(crate) sections: Vec<Section>,
pub(crate) parent: Option<Box<ClassifierInstance>>, pub(crate) children: Vec<Rc<RefCell<ClassifierInstance>>>,
pub(crate) idx: usize, pub(crate) parent: Option<Rc<RefCell<ClassifierInstance>>>,
} }
impl ClassifierInstance { impl ClassifierInstance {
fn new(name: &str, idx: usize) -> Self { fn new(name: &str) -> Self {
Self { Self {
name: name.into(), name: name.into(),
desc: None, desc: None,
sections: Vec::new(),
parent: None, parent: None,
idx, sections: Vec::new(),
children: Vec::new(),
} }
} }
fn set_parent(&mut self, parent: Rc<RefCell<ClassifierInstance>>) {
self.parent = Some(parent);
}
fn get_parent(&self) -> Option<Rc<RefCell<ClassifierInstance>>> {
self.parent.clone()
}
fn add_child(&mut self, child: Rc<RefCell<ClassifierInstance>>) {
self.children.push(child);
}
fn set_desc(&mut self, desc: &str) { fn set_desc(&mut self, desc: &str) {
self.desc = Some(desc.into()); self.desc = Some(desc.into());
} }
@ -438,35 +437,37 @@ impl ClassifierInstance {
fn add_section(&mut self, section: Section) { fn add_section(&mut self, section: Section) {
self.sections.push(section); self.sections.push(section);
} }
}
fn add_parent(&mut self, parent: &ClassifierInstance) { impl std::fmt::Debug for ClassifierInstance {
self.parent = Some(Box::new(parent.clone())); fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Classifier")
.field("name", &self.name)
.field("desc", &self.desc)
.field("sections", &self.sections)
.field("children", &self.children)
.finish()
}
}
impl From<&str> for ClassifierInstance {
fn from(value: &str) -> Self {
Self::new(value)
} }
} }
#[derive(Clone)] #[derive(Clone)]
pub(crate) struct Classifier { pub(crate) struct Classifier {
pub(crate) name: String, // Hauptstück, Theil, Abschnitt, ol pub(crate) name: String, // Hauptstück, Theil, Abschnitt, ol
pub(crate) parent_index: Option<usize>,
pub(crate) instances: Vec<ClassifierInstance>,
pub(crate) used_for_fn: Arc<dyn Fn(&str, &str) -> bool>, pub(crate) used_for_fn: Arc<dyn Fn(&str, &str) -> bool>,
pub(crate) instances: Vec<ClassifierInstance>,
pub(crate) child: Vec<Rc<RefCell<Classifier>>>,
pub(crate) root: bool,
} }
impl PartialEq for Classifier { impl PartialEq for Classifier {
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
self.name == other.name self.name == other.name
&& self.parent_index == other.parent_index
&& self.instances == other.instances
}
}
impl std::fmt::Debug for Classifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Classifier")
.field("name", &self.name)
.field("parent_index", &self.parent_index)
.field("instances", &self.instances)
.finish()
} }
} }
@ -474,30 +475,37 @@ impl Classifier {
fn new(name: &str, used_for_fn: Arc<dyn Fn(&str, &str) -> bool>) -> Self { fn new(name: &str, used_for_fn: Arc<dyn Fn(&str, &str) -> bool>) -> Self {
Self { Self {
name: name.into(), name: name.into(),
parent_index: None,
instances: Vec::new(),
used_for_fn, used_for_fn,
child: Vec::new(),
instances: Vec::new(),
root: false,
} }
} }
fn set_parent(&mut self, parent: usize) { fn root(self) -> Self {
self.parent_index = Some(parent); Self { root: true, ..self }
}
fn add_instance(&mut self, name: ClassifierInstance) {
self.instances.push(name);
}
fn set_desc(&mut self, desc: &str) {
self.instances.last_mut().unwrap().set_desc(desc.trim());
} }
fn used_for(&self, name: &str) -> bool { fn used_for(&self, name: &str) -> bool {
(self.used_for_fn)(&self.name, name) (self.used_for_fn)(&self.name, name)
} }
fn add_section(&mut self, section: Section) { pub(crate) fn add_child(&mut self, child: Rc<RefCell<Classifier>>) {
self.instances.last_mut().unwrap().add_section(section); self.child.push(child);
}
pub(crate) fn add_instance(&mut self, instance: ClassifierInstance) {
self.instances.push(instance);
}
}
impl std::fmt::Debug for Classifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Classifier")
.field("name", &self.name)
.field("instances", &self.instances)
.field("child", &self.child)
.finish()
} }
} }

View File

@ -42,7 +42,7 @@ impl From<roxmltree::Error> for Error {
fn main() { fn main() {
env_logger::init(); env_logger::init();
let law = LawBuilder::new("FSG"); let law = LawBuilder::new("UrhG");
law.to_md(); law.to_md();
} }

View File

@ -119,7 +119,6 @@ mod tests {
entries.sort_by_key(|entry| entry.file_name()); entries.sort_by_key(|entry| entry.file_name());
for entry in entries { for entry in entries {
println!("{entry:?}");
let mut file = File::open(path.join(entry.file_name())).unwrap(); let mut file = File::open(path.join(entry.file_name())).unwrap();
let mut json = String::new(); let mut json = String::new();
file.read_to_string(&mut json).unwrap(); file.read_to_string(&mut json).unwrap();