risp/src/law.rs
2023-11-06 14:10:08 +01:00

492 lines
15 KiB
Rust

use log::debug;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use crate::{overview, par};
#[derive(Debug, Serialize, Deserialize, PartialEq)]
pub(crate) struct Law {
name: String, //ABGB, UrhG
header: Vec<Heading>,
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
struct Heading {
name: String, //1. Hauptstück; 3. Theil; ...
desc: Option<String>,
content: HeadingContent, // 1. Theil; 1. Subtheil; ...
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
enum HeadingContent {
Paragraph(Vec<Section>),
Heading(Vec<Heading>),
}
fn add_from_node(cur: &ClassifierInstance, builder: &LawBuilder) -> Heading {
let children = builder.get_by_parent(&cur.name);
if !children.is_empty() {
let mut ret = Vec::new();
for child in children {
ret.push(add_from_node(&child, builder));
}
Heading {
name: cur.name.clone(),
desc: cur.desc.clone(),
content: HeadingContent::Heading(ret),
}
} else {
Heading {
name: cur.name.clone(),
desc: cur.desc.clone(),
content: HeadingContent::Paragraph(cur.sections.clone()),
}
}
}
impl From<LawBuilder> for Law {
fn from(builder: LawBuilder) -> Self {
let cur: Vec<Classifier> = builder
.classifiers
.clone()
.into_iter()
.filter(|c| c.parent_index.is_none())
.collect();
let mut ret = Vec::new();
for class in cur {
for child in class.instances {
ret.push(add_from_node(&child, &builder));
}
}
Self {
name: builder.name,
header: ret,
}
}
}
pub(crate) fn contains(classifier_name: &str, instance_name: &str) -> bool {
instance_name
.to_lowercase()
.contains(&classifier_name.to_lowercase())
}
fn starts_with_number(_classifier_name: &str, instance_name: &str) -> bool {
matches!(instance_name.trim().as_bytes().first(), Some(c) if c.is_ascii_digit())
}
/// Is used to generate a law struct. It's organized mainly by classifier.
#[derive(Debug, PartialEq)]
pub(crate) struct LawBuilder {
/// Name of the law
pub(crate) name: String, //ABGB, UrhG
/// Structure of the law text
pub(crate) classifiers: Vec<Classifier>,
pub(crate) last_header_index: Option<usize>,
/// Stores the header of the next paragraph
pub(crate) next_para_header: Option<String>,
#[cfg(test)]
pub(crate) history: Vec<String>,
}
impl LawBuilder {
#[cfg(test)]
pub(crate) fn test(name: &str) -> Self {
let mut last_header_index = None;
let mut classifiers = Vec::new();
if name == "UrhG" {
let hauptstueck = Classifier::new("Hauptstück", Arc::new(&contains));
classifiers.push(hauptstueck.clone());
let mut abschnitt = Classifier::new("Abschnitt", Arc::new(&contains));
abschnitt.set_parent(0);
classifiers.push(abschnitt);
let mut numbered_header =
Classifier::new("Numbered Header", Arc::new(&starts_with_number));
numbered_header.set_parent(9999);
classifiers.push(numbered_header);
} else if name == "test" {
let h1 = Classifier::new("h1", Arc::new(&contains));
classifiers.push(h1);
let mut h2 = Classifier::new("h2", Arc::new(&contains));
h2.set_parent(0);
classifiers.push(h2);
let mut h3 = Classifier::new("h3", Arc::new(&contains));
h3.set_parent(1);
classifiers.push(h3);
} else if name == "no-headers" {
let mut h1 = Classifier::new("", Arc::new(&contains));
h1.add_instance(ClassifierInstance::new("", 0));
last_header_index = Some(0);
classifiers.push(h1);
}
Self {
name: name.into(),
classifiers,
next_para_header: None,
last_header_index,
#[cfg(test)]
history: Vec::new(),
}
}
/// Creates a new law builder. Adds classifier for known law texts.
pub(crate) fn new(name: &str) -> Law {
//TODO: return Law (not LawBuilder)
let mut classifiers = Vec::new();
let mut law_id = None;
if name == "UrhG" {
law_id = Some(10001848);
let hauptstueck = Classifier::new("Hauptstück", Arc::new(&contains));
classifiers.push(hauptstueck.clone());
let mut abschnitt = Classifier::new("Abschnitt", Arc::new(&contains));
abschnitt.set_parent(0);
classifiers.push(abschnitt);
let mut numbered_header =
Classifier::new("Numbered Header", Arc::new(&starts_with_number));
numbered_header.set_parent(9999);
classifiers.push(numbered_header);
} else if name == "MSchG" {
law_id = Some(10002180);
let abschnitt = Classifier::new("Abschnitt", Arc::new(&contains));
classifiers.push(abschnitt.clone());
let mut numbered_header =
Classifier::new("Numbered Header", Arc::new(&starts_with_number));
numbered_header.set_parent(0);
classifiers.push(numbered_header);
}
let mut builder = Self {
name: name.into(),
classifiers,
next_para_header: None,
last_header_index: None,
#[cfg(test)]
history: Vec::new(),
};
let paragraphs = overview::parse(law_id.unwrap()).unwrap();
for paragraph in paragraphs {
let cont = par::parse(&paragraph, &mut builder).unwrap();
if !cont {
break;
}
}
builder.into()
}
/// Sets a new header.
pub(crate) fn new_header(&mut self, name: &str) {
#[cfg(test)]
self.history.push(format!("New_header: {name}"));
debug!("new_header={name}");
let classifier_index = self
.classifiers
.iter()
.position(|class| class.used_for(name));
match classifier_index {
Some(index) => {
let mut class = ClassifierInstance::new(name.trim(), index);
if self.classifiers[index]
.parent_index
.is_some_and(|x| x == 9999)
{
if self.classifiers[self.last_header_index.unwrap()]
.parent_index
.is_some_and(|x| x == 9999)
{
class.add_parent(
self.classifiers[self.classifiers[self.last_header_index.unwrap()]
.instances
.last()
.unwrap()
.parent
.clone()
.unwrap()
.idx]
.instances
.last()
.unwrap(),
)
} else {
class.add_parent(
self.classifiers[self.last_header_index.unwrap()]
.instances
.last()
.unwrap(),
);
}
} else if let Some(parent) = self.classifiers[index].parent_index {
class.add_parent(self.classifiers[parent].instances.last().unwrap());
}
self.classifiers[index].add_instance(class);
self.last_header_index = Some(index);
}
None => panic!("No classifier for {name}"),
}
}
/// Sets a new description for the last classifier.
pub(crate) fn new_desc(&mut self, desc: &str) {
#[cfg(test)]
self.history.push(format!("New desc: {desc}"));
debug!("new_desc={desc}");
if let Some(index) = self.last_header_index {
self.classifiers[index].set_desc(desc);
} else {
panic!("Not possible");
}
}
/// Adds a new paragraph.
pub(crate) fn new_par(&mut self, par: String, content: Content) {
#[cfg(test)]
self.history.push(format!(
"New_par: {par};{}",
serde_json::to_string(&content).unwrap()
));
debug!("new_par=par:{par};content:{content:#?}");
if let Some(index) = self.last_header_index {
let section = Section {
symb: par,
content,
par_header: self.next_para_header.clone(),
};
self.next_para_header = None;
self.classifiers[index].add_section(section);
} else {
panic!("Expected at least one classifier");
}
}
/// Next paragraph has a header, store its name.
pub(crate) fn new_next_para_header(&mut self, header: &str) {
#[cfg(test)]
self.history.push(format!("New_new_para_header: {header}"));
debug!("new_next_para_header={header}");
self.next_para_header = Some(header.trim().into());
}
fn get_by_parent(&self, name: &String) -> Vec<ClassifierInstance> {
let mut ret = Vec::new();
for class in &self.classifiers {
for inst in &class.instances {
if let Some(parent) = &inst.parent {
if &parent.name == name {
ret.push(inst.clone());
}
}
}
}
ret
}
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub(crate) struct Section {
pub(crate) symb: String, // §"1", §"2", ...
pub(crate) par_header: Option<String>,
pub(crate) content: Content,
}
//impl fmt::Debug for Section {
// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// let par_header = self.par_header.as_ref().map(String::as_str).unwrap_or("");
// write!(f, "{} ({})", self.symb, par_header)
// }
//}
#[derive(Clone, Debug, PartialEq)]
pub(crate) struct ClassifierInstance {
pub(crate) name: String,
pub(crate) desc: Option<String>,
pub(crate) sections: Vec<Section>,
pub(crate) parent: Option<Box<ClassifierInstance>>,
pub(crate) idx: usize,
}
impl ClassifierInstance {
fn new(name: &str, idx: usize) -> Self {
Self {
name: name.into(),
desc: None,
sections: Vec::new(),
parent: None,
idx,
}
}
fn set_desc(&mut self, desc: &str) {
self.desc = Some(desc.into());
}
fn add_section(&mut self, section: Section) {
self.sections.push(section);
}
fn add_parent(&mut self, parent: &ClassifierInstance) {
self.parent = Some(Box::new(parent.clone()));
}
}
#[derive(Clone)]
pub(crate) struct Classifier {
pub(crate) name: String, // Hauptstück, Theil, Abschnitt, ol
pub(crate) parent_index: Option<usize>,
pub(crate) instances: Vec<ClassifierInstance>,
pub(crate) used_for_fn: Arc<dyn Fn(&str, &str) -> bool>,
}
impl PartialEq for Classifier {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
&& self.parent_index == other.parent_index
&& self.instances == other.instances
}
}
impl std::fmt::Debug for Classifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Classifier")
.field("name", &self.name)
.field("parent_index", &self.parent_index)
.field("instances", &self.instances)
.finish()
}
}
impl Classifier {
fn new(name: &str, used_for_fn: Arc<dyn Fn(&str, &str) -> bool>) -> Self {
Self {
name: name.into(),
parent_index: None,
instances: Vec::new(),
used_for_fn,
}
}
fn set_parent(&mut self, parent: usize) {
self.parent_index = Some(parent);
}
fn add_instance(&mut self, name: ClassifierInstance) {
self.instances.push(name);
}
fn set_desc(&mut self, desc: &str) {
self.instances.last_mut().unwrap().set_desc(desc.trim());
}
fn used_for(&self, name: &str) -> bool {
(self.used_for_fn)(&self.name, name)
}
fn add_section(&mut self, section: Section) {
self.instances.last_mut().unwrap().add_section(section);
}
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub(crate) enum Content {
Text(String), //This is my direct law text
Item(Vec<Content>), //(1) This is general law. (2) This is more specific law
List(Vec<Content>),
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use std::{
fs::File,
io::{self, BufRead, Read},
path::Path,
};
use super::*;
fn read_lines<P>(filename: P) -> io::Result<Vec<String>>
where
P: AsRef<Path>,
{
let file = File::open(filename)?;
let buf_reader = io::BufReader::new(file);
buf_reader.lines().collect()
}
#[ignore]
#[test]
fn test_with_live_data() {
let law = LawBuilder::new("UrhG");
let path = Path::new("./data/urhg/builder.result");
let mut file = File::open(path).unwrap();
let mut json = String::new();
file.read_to_string(&mut json).unwrap();
let expected: Law = serde_json::from_str(&json).unwrap();
assert_eq!(law, expected);
}
#[test]
fn test_builder_full_urhg() {
let mut builder = LawBuilder::test("UrhG");
let path = Path::new("./data/urhg/par");
let input = read_lines(path.join("../par.result")).unwrap();
for i in input {
let (command, content) = i.split_once(":").unwrap();
match command {
"New_header" => builder.new_header(content),
"New desc" => builder.new_desc(content),
"New_new_para_header" => builder.new_next_para_header(content),
"New_par" => {
let (par, real_content) = i.split_once(";").unwrap();
let (_, real_par) = par.split_once(":").unwrap();
let real_content: Content = serde_json::from_str(real_content).unwrap();
builder.new_par(real_par.trim().into(), real_content);
}
_ => {
panic!("Don't know command '{command}'");
}
}
}
let actual: Law = builder.into();
//println!("{}", serde_json::to_string(&law).unwrap());
let mut file = File::open(path.join("../builder.result")).unwrap();
let mut json = String::new();
file.read_to_string(&mut json).unwrap();
let expected = serde_json::from_str(&json).unwrap();
assert_eq!(actual, expected);
}
}