492 lines
15 KiB
Rust
492 lines
15 KiB
Rust
use log::debug;
|
|
use serde::{Deserialize, Serialize};
|
|
use std::sync::Arc;
|
|
|
|
use crate::{overview, par};
|
|
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq)]
|
|
pub(crate) struct Law {
|
|
name: String, //ABGB, UrhG
|
|
header: Vec<Heading>,
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq)]
|
|
struct Heading {
|
|
name: String, //1. Hauptstück; 3. Theil; ...
|
|
desc: Option<String>,
|
|
content: HeadingContent, // 1. Theil; 1. Subtheil; ...
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq)]
|
|
enum HeadingContent {
|
|
Paragraph(Vec<Section>),
|
|
Heading(Vec<Heading>),
|
|
}
|
|
|
|
fn add_from_node(cur: &ClassifierInstance, builder: &LawBuilder) -> Heading {
|
|
let children = builder.get_by_parent(&cur.name);
|
|
if !children.is_empty() {
|
|
let mut ret = Vec::new();
|
|
for child in children {
|
|
ret.push(add_from_node(&child, builder));
|
|
}
|
|
Heading {
|
|
name: cur.name.clone(),
|
|
desc: cur.desc.clone(),
|
|
content: HeadingContent::Heading(ret),
|
|
}
|
|
} else {
|
|
Heading {
|
|
name: cur.name.clone(),
|
|
desc: cur.desc.clone(),
|
|
content: HeadingContent::Paragraph(cur.sections.clone()),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<LawBuilder> for Law {
|
|
fn from(builder: LawBuilder) -> Self {
|
|
let cur: Vec<Classifier> = builder
|
|
.classifiers
|
|
.clone()
|
|
.into_iter()
|
|
.filter(|c| c.parent_index.is_none())
|
|
.collect();
|
|
|
|
let mut ret = Vec::new();
|
|
for class in cur {
|
|
for child in class.instances {
|
|
ret.push(add_from_node(&child, &builder));
|
|
}
|
|
}
|
|
|
|
Self {
|
|
name: builder.name,
|
|
header: ret,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(crate) fn contains(classifier_name: &str, instance_name: &str) -> bool {
|
|
instance_name
|
|
.to_lowercase()
|
|
.contains(&classifier_name.to_lowercase())
|
|
}
|
|
|
|
fn starts_with_number(_classifier_name: &str, instance_name: &str) -> bool {
|
|
matches!(instance_name.trim().as_bytes().first(), Some(c) if c.is_ascii_digit())
|
|
}
|
|
|
|
/// Is used to generate a law struct. It's organized mainly by classifier.
|
|
#[derive(Debug, PartialEq)]
|
|
pub(crate) struct LawBuilder {
|
|
/// Name of the law
|
|
pub(crate) name: String, //ABGB, UrhG
|
|
|
|
/// Structure of the law text
|
|
pub(crate) classifiers: Vec<Classifier>,
|
|
|
|
pub(crate) last_header_index: Option<usize>,
|
|
|
|
/// Stores the header of the next paragraph
|
|
pub(crate) next_para_header: Option<String>,
|
|
|
|
#[cfg(test)]
|
|
pub(crate) history: Vec<String>,
|
|
}
|
|
|
|
impl LawBuilder {
|
|
#[cfg(test)]
|
|
pub(crate) fn test(name: &str) -> Self {
|
|
let mut last_header_index = None;
|
|
let mut classifiers = Vec::new();
|
|
if name == "UrhG" {
|
|
let hauptstueck = Classifier::new("Hauptstück", Arc::new(&contains));
|
|
classifiers.push(hauptstueck.clone());
|
|
|
|
let mut abschnitt = Classifier::new("Abschnitt", Arc::new(&contains));
|
|
abschnitt.set_parent(0);
|
|
classifiers.push(abschnitt);
|
|
|
|
let mut numbered_header =
|
|
Classifier::new("Numbered Header", Arc::new(&starts_with_number));
|
|
numbered_header.set_parent(9999);
|
|
classifiers.push(numbered_header);
|
|
} else if name == "test" {
|
|
let h1 = Classifier::new("h1", Arc::new(&contains));
|
|
classifiers.push(h1);
|
|
|
|
let mut h2 = Classifier::new("h2", Arc::new(&contains));
|
|
h2.set_parent(0);
|
|
classifiers.push(h2);
|
|
|
|
let mut h3 = Classifier::new("h3", Arc::new(&contains));
|
|
h3.set_parent(1);
|
|
classifiers.push(h3);
|
|
} else if name == "no-headers" {
|
|
let mut h1 = Classifier::new("", Arc::new(&contains));
|
|
h1.add_instance(ClassifierInstance::new("", 0));
|
|
last_header_index = Some(0);
|
|
classifiers.push(h1);
|
|
}
|
|
Self {
|
|
name: name.into(),
|
|
classifiers,
|
|
next_para_header: None,
|
|
last_header_index,
|
|
#[cfg(test)]
|
|
history: Vec::new(),
|
|
}
|
|
}
|
|
|
|
/// Creates a new law builder. Adds classifier for known law texts.
|
|
pub(crate) fn new(name: &str) -> Law {
|
|
//TODO: return Law (not LawBuilder)
|
|
let mut classifiers = Vec::new();
|
|
|
|
let mut law_id = None;
|
|
if name == "UrhG" {
|
|
law_id = Some(10001848);
|
|
let hauptstueck = Classifier::new("Hauptstück", Arc::new(&contains));
|
|
classifiers.push(hauptstueck.clone());
|
|
|
|
let mut abschnitt = Classifier::new("Abschnitt", Arc::new(&contains));
|
|
abschnitt.set_parent(0);
|
|
classifiers.push(abschnitt);
|
|
|
|
let mut numbered_header =
|
|
Classifier::new("Numbered Header", Arc::new(&starts_with_number));
|
|
numbered_header.set_parent(9999);
|
|
classifiers.push(numbered_header);
|
|
} else if name == "MSchG" {
|
|
law_id = Some(10002180);
|
|
|
|
let abschnitt = Classifier::new("Abschnitt", Arc::new(&contains));
|
|
classifiers.push(abschnitt.clone());
|
|
|
|
let mut numbered_header =
|
|
Classifier::new("Numbered Header", Arc::new(&starts_with_number));
|
|
numbered_header.set_parent(0);
|
|
classifiers.push(numbered_header);
|
|
}
|
|
|
|
let mut builder = Self {
|
|
name: name.into(),
|
|
classifiers,
|
|
next_para_header: None,
|
|
last_header_index: None,
|
|
#[cfg(test)]
|
|
history: Vec::new(),
|
|
};
|
|
|
|
let paragraphs = overview::parse(law_id.unwrap()).unwrap();
|
|
|
|
for paragraph in paragraphs {
|
|
let cont = par::parse(¶graph, &mut builder).unwrap();
|
|
if !cont {
|
|
break;
|
|
}
|
|
}
|
|
|
|
builder.into()
|
|
}
|
|
|
|
/// Sets a new header.
|
|
pub(crate) fn new_header(&mut self, name: &str) {
|
|
#[cfg(test)]
|
|
self.history.push(format!("New_header: {name}"));
|
|
debug!("new_header={name}");
|
|
let classifier_index = self
|
|
.classifiers
|
|
.iter()
|
|
.position(|class| class.used_for(name));
|
|
|
|
match classifier_index {
|
|
Some(index) => {
|
|
let mut class = ClassifierInstance::new(name.trim(), index);
|
|
|
|
if self.classifiers[index]
|
|
.parent_index
|
|
.is_some_and(|x| x == 9999)
|
|
{
|
|
if self.classifiers[self.last_header_index.unwrap()]
|
|
.parent_index
|
|
.is_some_and(|x| x == 9999)
|
|
{
|
|
class.add_parent(
|
|
self.classifiers[self.classifiers[self.last_header_index.unwrap()]
|
|
.instances
|
|
.last()
|
|
.unwrap()
|
|
.parent
|
|
.clone()
|
|
.unwrap()
|
|
.idx]
|
|
.instances
|
|
.last()
|
|
.unwrap(),
|
|
)
|
|
} else {
|
|
class.add_parent(
|
|
self.classifiers[self.last_header_index.unwrap()]
|
|
.instances
|
|
.last()
|
|
.unwrap(),
|
|
);
|
|
}
|
|
} else if let Some(parent) = self.classifiers[index].parent_index {
|
|
class.add_parent(self.classifiers[parent].instances.last().unwrap());
|
|
}
|
|
|
|
self.classifiers[index].add_instance(class);
|
|
self.last_header_index = Some(index);
|
|
}
|
|
None => panic!("No classifier for {name}"),
|
|
}
|
|
}
|
|
|
|
/// Sets a new description for the last classifier.
|
|
pub(crate) fn new_desc(&mut self, desc: &str) {
|
|
#[cfg(test)]
|
|
self.history.push(format!("New desc: {desc}"));
|
|
|
|
debug!("new_desc={desc}");
|
|
if let Some(index) = self.last_header_index {
|
|
self.classifiers[index].set_desc(desc);
|
|
} else {
|
|
panic!("Not possible");
|
|
}
|
|
}
|
|
|
|
/// Adds a new paragraph.
|
|
pub(crate) fn new_par(&mut self, par: String, content: Content) {
|
|
#[cfg(test)]
|
|
self.history.push(format!(
|
|
"New_par: {par};{}",
|
|
serde_json::to_string(&content).unwrap()
|
|
));
|
|
debug!("new_par=par:{par};content:{content:#?}");
|
|
if let Some(index) = self.last_header_index {
|
|
let section = Section {
|
|
symb: par,
|
|
content,
|
|
par_header: self.next_para_header.clone(),
|
|
};
|
|
self.next_para_header = None;
|
|
self.classifiers[index].add_section(section);
|
|
} else {
|
|
panic!("Expected at least one classifier");
|
|
}
|
|
}
|
|
|
|
/// Next paragraph has a header, store its name.
|
|
pub(crate) fn new_next_para_header(&mut self, header: &str) {
|
|
#[cfg(test)]
|
|
self.history.push(format!("New_new_para_header: {header}"));
|
|
debug!("new_next_para_header={header}");
|
|
self.next_para_header = Some(header.trim().into());
|
|
}
|
|
|
|
fn get_by_parent(&self, name: &String) -> Vec<ClassifierInstance> {
|
|
let mut ret = Vec::new();
|
|
|
|
for class in &self.classifiers {
|
|
for inst in &class.instances {
|
|
if let Some(parent) = &inst.parent {
|
|
if &parent.name == name {
|
|
ret.push(inst.clone());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
ret
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
|
pub(crate) struct Section {
|
|
pub(crate) symb: String, // §"1", §"2", ...
|
|
pub(crate) par_header: Option<String>,
|
|
pub(crate) content: Content,
|
|
}
|
|
|
|
//impl fmt::Debug for Section {
|
|
// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
// let par_header = self.par_header.as_ref().map(String::as_str).unwrap_or("");
|
|
// write!(f, "{} ({})", self.symb, par_header)
|
|
// }
|
|
//}
|
|
|
|
#[derive(Clone, Debug, PartialEq)]
|
|
pub(crate) struct ClassifierInstance {
|
|
pub(crate) name: String,
|
|
pub(crate) desc: Option<String>,
|
|
pub(crate) sections: Vec<Section>,
|
|
pub(crate) parent: Option<Box<ClassifierInstance>>,
|
|
pub(crate) idx: usize,
|
|
}
|
|
|
|
impl ClassifierInstance {
|
|
fn new(name: &str, idx: usize) -> Self {
|
|
Self {
|
|
name: name.into(),
|
|
desc: None,
|
|
sections: Vec::new(),
|
|
parent: None,
|
|
idx,
|
|
}
|
|
}
|
|
|
|
fn set_desc(&mut self, desc: &str) {
|
|
self.desc = Some(desc.into());
|
|
}
|
|
|
|
fn add_section(&mut self, section: Section) {
|
|
self.sections.push(section);
|
|
}
|
|
|
|
fn add_parent(&mut self, parent: &ClassifierInstance) {
|
|
self.parent = Some(Box::new(parent.clone()));
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub(crate) struct Classifier {
|
|
pub(crate) name: String, // Hauptstück, Theil, Abschnitt, ol
|
|
pub(crate) parent_index: Option<usize>,
|
|
pub(crate) instances: Vec<ClassifierInstance>,
|
|
pub(crate) used_for_fn: Arc<dyn Fn(&str, &str) -> bool>,
|
|
}
|
|
|
|
impl PartialEq for Classifier {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
self.name == other.name
|
|
&& self.parent_index == other.parent_index
|
|
&& self.instances == other.instances
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Debug for Classifier {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.debug_struct("Classifier")
|
|
.field("name", &self.name)
|
|
.field("parent_index", &self.parent_index)
|
|
.field("instances", &self.instances)
|
|
.finish()
|
|
}
|
|
}
|
|
|
|
impl Classifier {
|
|
fn new(name: &str, used_for_fn: Arc<dyn Fn(&str, &str) -> bool>) -> Self {
|
|
Self {
|
|
name: name.into(),
|
|
parent_index: None,
|
|
instances: Vec::new(),
|
|
used_for_fn,
|
|
}
|
|
}
|
|
|
|
fn set_parent(&mut self, parent: usize) {
|
|
self.parent_index = Some(parent);
|
|
}
|
|
|
|
fn add_instance(&mut self, name: ClassifierInstance) {
|
|
self.instances.push(name);
|
|
}
|
|
|
|
fn set_desc(&mut self, desc: &str) {
|
|
self.instances.last_mut().unwrap().set_desc(desc.trim());
|
|
}
|
|
|
|
fn used_for(&self, name: &str) -> bool {
|
|
(self.used_for_fn)(&self.name, name)
|
|
}
|
|
|
|
fn add_section(&mut self, section: Section) {
|
|
self.instances.last_mut().unwrap().add_section(section);
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
|
pub(crate) enum Content {
|
|
Text(String), //This is my direct law text
|
|
Item(Vec<Content>), //(1) This is general law. (2) This is more specific law
|
|
List(Vec<Content>),
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use pretty_assertions::assert_eq;
|
|
use std::{
|
|
fs::File,
|
|
io::{self, BufRead, Read},
|
|
path::Path,
|
|
};
|
|
|
|
use super::*;
|
|
|
|
fn read_lines<P>(filename: P) -> io::Result<Vec<String>>
|
|
where
|
|
P: AsRef<Path>,
|
|
{
|
|
let file = File::open(filename)?;
|
|
let buf_reader = io::BufReader::new(file);
|
|
buf_reader.lines().collect()
|
|
}
|
|
|
|
#[ignore]
|
|
#[test]
|
|
fn test_with_live_data() {
|
|
let law = LawBuilder::new("UrhG");
|
|
|
|
let path = Path::new("./data/urhg/builder.result");
|
|
let mut file = File::open(path).unwrap();
|
|
let mut json = String::new();
|
|
file.read_to_string(&mut json).unwrap();
|
|
|
|
let expected: Law = serde_json::from_str(&json).unwrap();
|
|
|
|
assert_eq!(law, expected);
|
|
}
|
|
|
|
#[test]
|
|
fn test_builder_full_urhg() {
|
|
let mut builder = LawBuilder::test("UrhG");
|
|
|
|
let path = Path::new("./data/urhg/par");
|
|
let input = read_lines(path.join("../par.result")).unwrap();
|
|
|
|
for i in input {
|
|
let (command, content) = i.split_once(":").unwrap();
|
|
|
|
match command {
|
|
"New_header" => builder.new_header(content),
|
|
"New desc" => builder.new_desc(content),
|
|
"New_new_para_header" => builder.new_next_para_header(content),
|
|
"New_par" => {
|
|
let (par, real_content) = i.split_once(";").unwrap();
|
|
let (_, real_par) = par.split_once(":").unwrap();
|
|
let real_content: Content = serde_json::from_str(real_content).unwrap();
|
|
builder.new_par(real_par.trim().into(), real_content);
|
|
}
|
|
_ => {
|
|
panic!("Don't know command '{command}'");
|
|
}
|
|
}
|
|
}
|
|
|
|
let actual: Law = builder.into();
|
|
|
|
//println!("{}", serde_json::to_string(&law).unwrap());
|
|
|
|
let mut file = File::open(path.join("../builder.result")).unwrap();
|
|
let mut json = String::new();
|
|
file.read_to_string(&mut json).unwrap();
|
|
|
|
let expected = serde_json::from_str(&json).unwrap();
|
|
|
|
assert_eq!(actual, expected);
|
|
}
|
|
}
|