not needing to specify header levels

This commit is contained in:
philipp 2023-11-06 23:45:29 +01:00
parent 780ea74c2c
commit ec91de2eae
3 changed files with 265 additions and 258 deletions

View File

@ -1,7 +1,9 @@
use log::debug;
use log::{debug, error, info};
use serde::{Deserialize, Serialize};
use std::{
cell::RefCell,
fmt::{self, Display},
rc::Rc,
sync::Arc,
};
@ -13,73 +15,6 @@ pub(crate) struct Law {
header: Vec<Heading>,
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
struct Heading {
name: String, //1. Hauptstück; 3. Theil; ...
desc: Option<String>,
content: HeadingContent, // 1. Theil; 1. Subtheil; ...
}
impl Display for Heading {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(desc) = &self.desc {
f.write_str(&format!("{} ({desc})\n", self.name))
} else {
f.write_str(&format!("{}\n", self.name))
}
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
enum HeadingContent {
Paragraph(Vec<Section>),
Heading(Vec<Heading>),
}
fn add_from_node(cur: &ClassifierInstance, builder: &LawBuilder) -> Heading {
let children = builder.get_by_parent(&cur.name);
if !children.is_empty() {
let mut ret = Vec::new();
for child in children {
ret.push(add_from_node(&child, builder));
}
Heading {
name: cur.name.clone(),
desc: cur.desc.clone(),
content: HeadingContent::Heading(ret),
}
} else {
Heading {
name: cur.name.clone(),
desc: cur.desc.clone(),
content: HeadingContent::Paragraph(cur.sections.clone()),
}
}
}
impl From<LawBuilder> for Law {
fn from(builder: LawBuilder) -> Self {
let cur: Vec<Classifier> = builder
.classifiers
.clone()
.into_iter()
.filter(|c| c.parent_index.is_none())
.collect();
let mut ret = Vec::new();
for class in cur {
for child in class.instances {
ret.push(add_from_node(&child, &builder));
}
}
Self {
name: builder.name,
header: ret,
}
}
}
impl Law {
pub(crate) fn to_md(&self) {
println!("# {}", self.name);
@ -106,6 +41,88 @@ impl Law {
}
}
impl From<LawBuilder> for Law {
fn from(builder: LawBuilder) -> Self {
let mut ret = Vec::new();
for header in builder.header {
ret.push(Heading {
name: header.borrow().name.clone(),
desc: header.borrow().desc.clone(),
content: header.borrow().clone().into(),
})
}
Self {
name: builder.name,
header: ret,
}
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
struct Heading {
name: String, //1. Hauptstück; 3. Theil; ...
desc: Option<String>,
content: HeadingContent, // 1. Theil; 1. Subtheil; ...
}
impl Display for Heading {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(desc) = &self.desc {
f.write_str(&format!("{} ({desc})\n", self.name))
} else {
f.write_str(&format!("{}\n", self.name))
}
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
enum HeadingContent {
Paragraph(Vec<Section>),
Heading(Vec<Heading>),
}
impl From<ClassifierInstance> for HeadingContent {
fn from(value: ClassifierInstance) -> Self {
if value.sections.is_empty() {
let mut ret = Vec::new();
for child in value.children {
ret.push(Heading {
name: child.borrow().name.clone(),
desc: child.borrow().desc.clone(),
content: child.borrow().clone().into(),
})
}
Self::Heading(ret)
} else {
Self::Paragraph(value.sections)
}
}
}
fn add_from_node(cur: &ClassifierInstance, builder: &LawBuilder) -> Heading {
let children = builder.get_by_parent(&cur.name);
if !children.is_empty() {
let mut ret = Vec::new();
for child in children {
ret.push(add_from_node(&child, builder));
}
Heading {
name: cur.name.clone(),
desc: cur.desc.clone(),
content: HeadingContent::Heading(ret),
}
} else {
Heading {
name: cur.name.clone(),
desc: cur.desc.clone(),
content: HeadingContent::Paragraph(cur.sections.clone()),
}
}
}
pub(crate) fn contains(classifier_name: &str, instance_name: &str) -> bool {
instance_name
.to_lowercase()
@ -127,7 +144,7 @@ fn starts_with_uppercaseletter(_classifier_name: &str, instance_name: &str) -> b
}
/// Is used to generate a law struct. It's organized mainly by classifier.
#[derive(Debug, PartialEq)]
#[derive(Debug)]
pub(crate) struct LawBuilder {
/// Name of the law
pub(crate) name: String, //ABGB, UrhG
@ -135,7 +152,10 @@ pub(crate) struct LawBuilder {
/// Structure of the law text
pub(crate) classifiers: Vec<Classifier>,
pub(crate) last_header_index: Option<usize>,
/// Instances
pub(crate) header: Vec<Rc<RefCell<ClassifierInstance>>>,
last_instance: Option<Rc<RefCell<ClassifierInstance>>>,
/// Stores the header of the next paragraph
pub(crate) next_para_header: Option<String>,
@ -144,45 +164,36 @@ pub(crate) struct LawBuilder {
pub(crate) history: Vec<String>,
}
impl LawBuilder {
#[cfg(test)]
pub(crate) fn test(name: &str) -> Self {
let mut last_header_index = None;
let mut classifiers = Vec::new();
if name == "UrhG" {
let hauptstueck = Classifier::new("Hauptstück", Arc::new(&contains));
classifiers.push(hauptstueck.clone());
let mut abschnitt = Classifier::new("Abschnitt", Arc::new(&contains));
abschnitt.set_parent(0);
classifiers.push(abschnitt);
let mut numbered_header =
Classifier::new("Numbered Header", Arc::new(&starts_with_number));
numbered_header.set_parent(9999);
classifiers.push(numbered_header);
} else if name == "test" {
let h1 = Classifier::new("h1", Arc::new(&contains));
classifiers.push(h1);
let mut h2 = Classifier::new("h2", Arc::new(&contains));
h2.set_parent(0);
classifiers.push(h2);
let mut h3 = Classifier::new("h3", Arc::new(&contains));
h3.set_parent(1);
classifiers.push(h3);
} else if name == "no-headers" {
let mut h1 = Classifier::new("", Arc::new(&contains));
h1.add_instance(ClassifierInstance::new("", 0));
last_header_index = Some(0);
classifiers.push(h1);
impl PartialEq for LawBuilder {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
&& self.classifiers == other.classifiers
&& self.header == other.header
&& self.next_para_header == other.next_para_header
}
}
impl LawBuilder {
pub(crate) fn test(name: &str) -> Self {
let mut classifiers = Vec::new();
if name == "new" {
classifiers.push(Classifier::new("a", Arc::new(&contains)).root());
classifiers.push(Classifier::new("b", Arc::new(&contains)));
classifiers.push(Classifier::new("c", Arc::new(&contains)));
classifiers.push(Classifier::new("d", Arc::new(&contains)));
} else if name == "UrhG" {
classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)).root());
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
classifiers.push(Classifier::new("Number", Arc::new(&starts_with_number)));
}
Self {
name: name.into(),
classifiers,
header: Vec::new(),
next_para_header: None,
last_header_index,
last_instance: None,
#[cfg(test)]
history: Vec::new(),
}
@ -190,73 +201,23 @@ impl LawBuilder {
/// Creates a new law builder. Adds classifier for known law texts.
pub(crate) fn new(name: &str) -> Law {
//TODO: return Law (not LawBuilder)
let mut classifiers = Vec::new();
let mut law_id = None;
if name == "UrhG" {
law_id = Some(10001848);
let hauptstueck = Classifier::new("Hauptstück", Arc::new(&contains));
classifiers.push(hauptstueck.clone());
let mut abschnitt = Classifier::new("Abschnitt", Arc::new(&contains));
abschnitt.set_parent(0);
classifiers.push(abschnitt);
let mut numbered_header =
Classifier::new("Numbered Header", Arc::new(&starts_with_number));
numbered_header.set_parent(9999);
classifiers.push(numbered_header);
} else if name == "MSchG" {
law_id = Some(10002180);
let abschnitt = Classifier::new("Abschnitt", Arc::new(&contains));
classifiers.push(abschnitt.clone());
let mut numbered_header =
Classifier::new("Numbered Header", Arc::new(&starts_with_number));
numbered_header.set_parent(0);
classifiers.push(numbered_header);
} else if name == "FSG" {
law_id = Some(10003898);
let artikel = Classifier::new("Artikel", Arc::new(&contains));
classifiers.push(artikel);
let mut abschnitt = Classifier::new(" Abschnitt", Arc::new(&contains));
abschnitt.set_parent(0);
classifiers.push(abschnitt);
let mut unterabschnitt = Classifier::new("Unterabschnitt", Arc::new(&contains));
unterabschnitt.set_parent(1);
classifiers.push(unterabschnitt);
let mut hauptstueck = Classifier::new("Hauptstück", Arc::new(&contains));
hauptstueck.set_parent(9999);
classifiers.push(hauptstueck);
let mut numbered_header =
Classifier::new("Numbered Header", Arc::new(&starts_with_letter));
numbered_header.set_parent(9999);
classifiers.push(numbered_header);
let mut uppercase_headers =
Classifier::new("Numbered Header", Arc::new(&starts_with_uppercaseletter));
uppercase_headers.set_parent(9999);
classifiers.push(uppercase_headers);
let mut number_header =
Classifier::new("Numbered Header", Arc::new(&starts_with_number));
number_header.set_parent(9999);
classifiers.push(number_header);
classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)).root());
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
classifiers.push(Classifier::new("Number", Arc::new(&starts_with_number)));
}
let mut builder = Self {
name: name.into(),
classifiers,
header: Vec::new(),
next_para_header: None,
last_header_index: None,
last_instance: None,
#[cfg(test)]
history: Vec::new(),
};
@ -273,71 +234,95 @@ impl LawBuilder {
builder.into()
}
fn responsible_classifier(&self, name: &str) -> Option<&Classifier> {
for c in &self.classifiers {
if c.used_for(name) {
return Some(&c);
}
}
None
}
fn find_parent(
&self,
cur: Option<Rc<RefCell<ClassifierInstance>>>,
class: &Classifier,
) -> Option<Rc<RefCell<ClassifierInstance>>> {
let mut cur = cur;
while let Some(c) = cur {
let (cur_name, cur_parent) = {
let c_borrow = c.borrow();
(c_borrow.name.clone(), c_borrow.parent.clone())
};
let cur_responsible_class = self.responsible_classifier(&cur_name).unwrap();
if cur_responsible_class == class {
return c.borrow_mut().get_parent();
}
cur = cur_parent;
}
None
}
/// Sets a new header.
pub(crate) fn new_header(&mut self, name: &str) {
let name = name.trim();
#[cfg(test)]
self.history.push(format!("New_header: {name}"));
debug!("new_header={name}");
let classifier_index = self
.classifiers
.iter()
.position(|class| class.used_for(name));
match classifier_index {
Some(index) => {
let mut class = ClassifierInstance::new(name.trim(), index);
info!("new_header={name}");
if self.classifiers[index]
.parent_index
.is_some_and(|x| x == 9999)
{
if self.classifiers[self.last_header_index.unwrap()]
.parent_index
.is_some_and(|x| x == 9999)
{
class.add_parent(
self.classifiers[self.classifiers[self.last_header_index.unwrap()]
.instances
.last()
.unwrap()
.parent
.clone()
.unwrap()
.idx]
.instances
.last()
.unwrap(),
)
let responsible_class = self
.responsible_classifier(name)
.expect(&format!("No classifier for '{name}'"));
let mut heading: ClassifierInstance = name.into();
if let Some(last_instance) = &self.last_instance {
let cur = Some(last_instance.clone());
let parent = &self.find_parent(cur, responsible_class);
println!("parnet={parent:#?}");
match parent {
None => {
if responsible_class.root {
let c = Rc::new(RefCell::new(heading));
self.header.push(c.clone());
self.last_instance = Some(c.clone());
} else {
class.add_parent(
self.classifiers[self.last_header_index.unwrap()]
.instances
.last()
.unwrap(),
);
heading.set_parent(last_instance.clone());
let c = Rc::new(RefCell::new(heading));
last_instance.borrow_mut().add_child(c.clone());
self.last_instance = Some(c.clone());
}
} else if let Some(parent) = self.classifiers[index].parent_index {
class.add_parent(self.classifiers[parent].instances.last().unwrap());
}
self.classifiers[index].add_instance(class);
self.last_header_index = Some(index);
Some(parent) => {
heading.set_parent(parent.clone());
let c = Rc::new(RefCell::new(heading));
parent.borrow_mut().add_child(c.clone());
self.last_instance = Some(c.clone());
}
None => panic!("No classifier for {name}"),
}
} else {
let c = Rc::new(RefCell::new(heading));
self.header.push(c.clone());
self.last_instance = Some(c.clone());
}
}
/// Sets a new description for the last classifier.
pub(crate) fn new_desc(&mut self, desc: &str) {
let desc = desc.trim();
#[cfg(test)]
self.history.push(format!("New desc: {desc}"));
debug!("new_desc={desc}");
if let Some(index) = self.last_header_index {
self.classifiers[index].set_desc(desc);
} else {
panic!("Not possible");
}
self.last_instance
.clone()
.unwrap()
.borrow_mut()
.set_desc(desc);
}
/// Adds a new paragraph.
@ -349,17 +334,19 @@ impl LawBuilder {
));
debug!("new_par=par:{par};content:{content:#?}");
if let Some(index) = self.last_header_index {
let section = Section {
symb: par,
content,
par_header: self.next_para_header.clone(),
};
let par_header = self.next_para_header.clone();
self.next_para_header = None;
self.classifiers[index].add_section(section);
} else {
panic!("Expected at least one classifier");
}
self.last_instance
.clone()
.expect("Expect at least one classifier")
.borrow_mut()
.add_section(Section {
symb: par,
par_header,
content,
})
}
/// Next paragraph has a header, store its name.
@ -373,15 +360,15 @@ impl LawBuilder {
fn get_by_parent(&self, name: &String) -> Vec<ClassifierInstance> {
let mut ret = Vec::new();
for class in &self.classifiers {
for inst in &class.instances {
if let Some(parent) = &inst.parent {
if &parent.name == name {
ret.push(inst.clone());
}
}
}
}
// for class in &self.classifiers {
// for inst in &class.instances {
// if let Some(parent) = &inst.parent {
// if &parent.name == name {
// ret.push(inst.clone());
// }
// }
// }
// }
ret
}
@ -411,26 +398,38 @@ impl fmt::Display for Section {
}
}
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, PartialEq)]
pub(crate) struct ClassifierInstance {
pub(crate) name: String,
pub(crate) name: String, //e.g. 1 Theilstück
pub(crate) desc: Option<String>,
pub(crate) sections: Vec<Section>,
pub(crate) parent: Option<Box<ClassifierInstance>>,
pub(crate) idx: usize,
pub(crate) children: Vec<Rc<RefCell<ClassifierInstance>>>,
pub(crate) parent: Option<Rc<RefCell<ClassifierInstance>>>,
}
impl ClassifierInstance {
fn new(name: &str, idx: usize) -> Self {
fn new(name: &str) -> Self {
Self {
name: name.into(),
desc: None,
sections: Vec::new(),
parent: None,
idx,
sections: Vec::new(),
children: Vec::new(),
}
}
fn set_parent(&mut self, parent: Rc<RefCell<ClassifierInstance>>) {
self.parent = Some(parent);
}
fn get_parent(&self) -> Option<Rc<RefCell<ClassifierInstance>>> {
self.parent.clone()
}
fn add_child(&mut self, child: Rc<RefCell<ClassifierInstance>>) {
self.children.push(child);
}
fn set_desc(&mut self, desc: &str) {
self.desc = Some(desc.into());
}
@ -438,35 +437,37 @@ impl ClassifierInstance {
fn add_section(&mut self, section: Section) {
self.sections.push(section);
}
}
fn add_parent(&mut self, parent: &ClassifierInstance) {
self.parent = Some(Box::new(parent.clone()));
impl std::fmt::Debug for ClassifierInstance {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Classifier")
.field("name", &self.name)
.field("desc", &self.desc)
.field("sections", &self.sections)
.field("children", &self.children)
.finish()
}
}
impl From<&str> for ClassifierInstance {
fn from(value: &str) -> Self {
Self::new(value)
}
}
#[derive(Clone)]
pub(crate) struct Classifier {
pub(crate) name: String, // Hauptstück, Theil, Abschnitt, ol
pub(crate) parent_index: Option<usize>,
pub(crate) instances: Vec<ClassifierInstance>,
pub(crate) used_for_fn: Arc<dyn Fn(&str, &str) -> bool>,
pub(crate) instances: Vec<ClassifierInstance>,
pub(crate) child: Vec<Rc<RefCell<Classifier>>>,
pub(crate) root: bool,
}
impl PartialEq for Classifier {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
&& self.parent_index == other.parent_index
&& self.instances == other.instances
}
}
impl std::fmt::Debug for Classifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Classifier")
.field("name", &self.name)
.field("parent_index", &self.parent_index)
.field("instances", &self.instances)
.finish()
}
}
@ -474,30 +475,37 @@ impl Classifier {
fn new(name: &str, used_for_fn: Arc<dyn Fn(&str, &str) -> bool>) -> Self {
Self {
name: name.into(),
parent_index: None,
instances: Vec::new(),
used_for_fn,
child: Vec::new(),
instances: Vec::new(),
root: false,
}
}
fn set_parent(&mut self, parent: usize) {
self.parent_index = Some(parent);
}
fn add_instance(&mut self, name: ClassifierInstance) {
self.instances.push(name);
}
fn set_desc(&mut self, desc: &str) {
self.instances.last_mut().unwrap().set_desc(desc.trim());
fn root(self) -> Self {
Self { root: true, ..self }
}
fn used_for(&self, name: &str) -> bool {
(self.used_for_fn)(&self.name, name)
}
fn add_section(&mut self, section: Section) {
self.instances.last_mut().unwrap().add_section(section);
pub(crate) fn add_child(&mut self, child: Rc<RefCell<Classifier>>) {
self.child.push(child);
}
pub(crate) fn add_instance(&mut self, instance: ClassifierInstance) {
self.instances.push(instance);
}
}
impl std::fmt::Debug for Classifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Classifier")
.field("name", &self.name)
.field("instances", &self.instances)
.field("child", &self.child)
.finish()
}
}

View File

@ -42,7 +42,7 @@ impl From<roxmltree::Error> for Error {
fn main() {
env_logger::init();
let law = LawBuilder::new("FSG");
let law = LawBuilder::new("UrhG");
law.to_md();
}

View File

@ -119,7 +119,6 @@ mod tests {
entries.sort_by_key(|entry| entry.file_name());
for entry in entries {
println!("{entry:?}");
let mut file = File::open(path.join(entry.file_name())).unwrap();
let mut json = String::new();
file.read_to_string(&mut json).unwrap();