risp/src/law/mod.rs
philipp 443807d2b8
All checks were successful
CI/CD Pipeline / test (push) Successful in 36s
more clean up w/ pedclippy
2024-02-04 21:58:47 +01:00

620 lines
19 KiB
Rust

use log::{debug, info};
use risp::risparser::overview::parse;
use serde::{Deserialize, Serialize};
use std::{
cell::RefCell,
fmt::{self, Display},
rc::Rc,
sync::Arc,
};
use crate::par;
use self::responsible::{
contains, contains_at_start, contains_without_unter, starts_with_letter, starts_with_number,
starts_with_roman_number, starts_with_uppercaseletter,
};
mod responsible;
#[derive(Debug, Serialize, Deserialize, PartialEq)]
pub(crate) struct Law {
name: String, //ABGB, UrhG
header: Vec<Heading>,
}
impl Law {
pub(crate) fn to_md(&self) {
println!("# {}", self.name);
for header in &self.header {
Self::print_md(header, 2);
}
}
fn print_md(header: &Heading, level: usize) {
println!("{} {}", "#".repeat(level), header);
match &header.content {
HeadingContent::Heading(h) => {
for child in h {
Self::print_md(child, level + 1);
}
}
HeadingContent::Paragraph(p) => {
for par in p {
println!("{} {par}", "#".repeat(level + 1));
}
}
}
}
}
impl From<LawBuilder> for Law {
fn from(builder: LawBuilder) -> Self {
let mut ret = Vec::new();
for header in builder.header {
ret.push(Heading {
name: header.borrow().name.clone(),
desc: header.borrow().desc.clone(),
content: header.borrow().clone().into(),
});
}
Self {
name: builder.name,
header: ret,
}
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
struct Heading {
name: String, //1. Hauptstück; 3. Theil; ...
desc: Option<String>,
content: HeadingContent, // 1. Theil; 1. Subtheil; ...
}
impl Display for Heading {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(desc) = &self.desc {
f.write_str(&format!("{} ({desc})\n", self.name))
} else {
f.write_str(&format!("{}\n", self.name))
}
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
enum HeadingContent {
Paragraph(Vec<Section>),
Heading(Vec<Heading>),
}
impl From<ClassifierInstance> for HeadingContent {
fn from(value: ClassifierInstance) -> Self {
if value.sections.is_empty() {
let mut ret = Vec::new();
for child in value.children {
ret.push(Heading {
name: child.borrow().name.clone(),
desc: child.borrow().desc.clone(),
content: child.borrow().clone().into(),
});
}
Self::Heading(ret)
} else {
Self::Paragraph(value.sections)
}
}
}
/// Is used to generate a law struct. It's organized mainly by classifier.
#[derive(Debug)]
pub(crate) struct LawBuilder {
/// Name of the law
pub(crate) name: String, //ABGB, UrhG
/// Structure of the law text
pub(crate) classifiers: Vec<Classifier>,
/// Instances
pub(crate) header: Vec<Rc<RefCell<ClassifierInstance>>>,
last_instance: Option<Rc<RefCell<ClassifierInstance>>>,
/// Stores the header of the next paragraph
pub(crate) next_para_header: Option<String>,
#[cfg(test)]
pub(crate) history: Vec<String>,
}
impl PartialEq for LawBuilder {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
&& self.classifiers == other.classifiers
&& self.header == other.header
&& self.next_para_header == other.next_para_header
}
}
impl LawBuilder {
#[cfg(test)]
pub(crate) fn test(name: &str) -> Self {
let mut classifiers = Vec::new();
if name == "new" {
classifiers.push(Classifier::new("a", Arc::new(&contains)).root());
classifiers.push(Classifier::new("b", Arc::new(&contains)));
classifiers.push(Classifier::new("c", Arc::new(&contains)));
classifiers.push(Classifier::new("d", Arc::new(&contains)));
} else if name == "UrhG" {
classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)).root());
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
classifiers.push(Classifier::new("Number", Arc::new(&starts_with_number)));
}
Self {
name: name.into(),
classifiers,
header: Vec::new(),
next_para_header: None,
last_instance: None,
#[cfg(test)]
history: Vec::new(),
}
}
/// Creates a new law builder. Adds classifier for known law texts.
pub(crate) fn new(name: &str) -> Self {
let mut classifiers = Vec::new();
let mut law_id = None;
if name == "UrhG" {
law_id = Some(10_001_848);
classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)).root());
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
classifiers.push(Classifier::new("Number", Arc::new(&starts_with_number)));
} else if name == "MSchG" {
law_id = Some(10_002_180);
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)).root());
classifiers.push(Classifier::new("Number", Arc::new(&starts_with_number)));
} else if name == "ABGB" {
law_id = Some(10_001_622);
classifiers.push(Classifier::new("Einleitung", Arc::new(&contains)).root());
classifiers.push(Classifier::new("Theil", Arc::new(&contains)).root());
classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)));
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
classifiers.push(Classifier::new("Abtheilung", Arc::new(&contains)));
classifiers.push(Classifier::new("heading", Arc::new(&contains_at_start)));
classifiers.push(Classifier::new("letter", Arc::new(&starts_with_letter)));
classifiers.push(Classifier::new("num", Arc::new(&starts_with_number)));
classifiers.push(Classifier::new("rom", Arc::new(&starts_with_roman_number)));
} else if name == "FSG" {
law_id = Some(10_003_898);
classifiers.push(Classifier::new("Artikel", Arc::new(&contains)).root());
classifiers.push(Classifier::new(
"Abschnitt",
Arc::new(&contains_without_unter),
));
classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)));
classifiers.push(Classifier::new("Unterabschnitt", Arc::new(&contains)));
classifiers.push(Classifier::new(
"uppercase letter",
Arc::new(&starts_with_uppercaseletter),
));
classifiers.push(Classifier::new("num", Arc::new(&starts_with_number)));
} else if name == "VVG" {
law_id = Some(20_004_425);
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)).root());
} else if name == "KSchG" {
law_id = Some(10_002_462);
classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)).root());
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
} else if name == "StGB" {
law_id = Some(10_002_296);
classifiers.push(Classifier::new("Teil", Arc::new(&contains)).root());
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
}
let mut builder = Self {
name: name.into(),
classifiers,
header: Vec::new(),
next_para_header: None,
last_instance: None,
#[cfg(test)]
history: Vec::new(),
};
let paragraphs = parse(law_id.unwrap()).unwrap();
for paragraph in tqdm::tqdm(paragraphs.into_iter()) {
let cont = par::parse(&paragraph, &mut builder).unwrap();
if !cont {
break;
}
}
builder
}
fn responsible_classifier(&self, name: &str) -> Option<&Classifier> {
self.classifiers.iter().find(|&c| c.used_for(name))
}
fn find_parent(
&self,
cur: Option<Rc<RefCell<ClassifierInstance>>>,
class: &Classifier,
) -> Option<Rc<RefCell<ClassifierInstance>>> {
let mut cur = cur;
while let Some(c) = cur {
let (cur_name, cur_parent) = {
let c_borrow = c.borrow();
(c_borrow.name.clone(), c_borrow.parent.clone())
};
let cur_responsible_class = self.responsible_classifier(&cur_name).unwrap();
if cur_responsible_class == class {
return c.borrow_mut().get_parent();
}
cur = cur_parent;
}
None
}
/// Sets a new header.
pub(crate) fn new_header(&mut self, name: &str) {
let name = name.trim();
#[cfg(test)]
self.history.push(format!("New_header: {name}"));
info!("new_header={name}");
let responsible_class = self
.responsible_classifier(name)
.unwrap_or_else(|| panic!("No classifier for '{name}'"));
let mut heading: ClassifierInstance = name.into();
if let Some(last_instance) = &self.last_instance {
let cur = Some(last_instance.clone());
let parent = &self.find_parent(cur, responsible_class);
match parent {
None => {
if responsible_class.root {
let c = Rc::new(RefCell::new(heading));
self.header.push(c.clone());
self.last_instance = Some(c.clone());
} else {
heading.set_parent(last_instance.clone());
let c = Rc::new(RefCell::new(heading));
last_instance.borrow_mut().add_child(c.clone());
self.last_instance = Some(c.clone());
}
}
Some(parent) => {
heading.set_parent(parent.clone());
let c = Rc::new(RefCell::new(heading));
parent.borrow_mut().add_child(c.clone());
self.last_instance = Some(c.clone());
}
}
} else {
let c = Rc::new(RefCell::new(heading));
self.header.push(c.clone());
self.last_instance = Some(c.clone());
}
}
/// Sets a new description for the last classifier.
pub(crate) fn new_desc(&mut self, desc: &str) {
let desc = desc.trim();
#[cfg(test)]
self.history.push(format!("New desc: {desc}"));
debug!("new_desc={desc}");
self.last_instance
.clone()
.unwrap()
.borrow_mut()
.set_desc(desc);
}
/// Adds a new paragraph.
pub(crate) fn new_par(&mut self, par: String, content: Content) {
#[cfg(test)]
self.history.push(format!(
"New_par: {par};{}",
serde_json::to_string(&content).unwrap()
));
debug!("new_par=par:{par};content:{content:#?}");
let par_header = self.next_para_header.clone();
self.next_para_header = None;
self.last_instance
.clone()
.expect("Expect at least one classifier")
.borrow_mut()
.add_section(Section {
symb: par,
par_header,
content,
});
}
/// Next paragraph has a header, store its name.
pub(crate) fn new_next_para_header(&mut self, header: &str) {
#[cfg(test)]
self.history.push(format!("New_new_para_header: {header}"));
if let Some(next_para_header) = &self.next_para_header {
self.new_header(&next_para_header.clone()); // promote to bigger header :-)
}
debug!("new_next_para_header={header}");
self.next_para_header = Some(header.trim().into());
}
}
#[derive(Clone, PartialEq, Serialize, Deserialize)]
pub(crate) struct Section {
pub(crate) symb: String, // §"1", §"2", ...
pub(crate) par_header: Option<String>,
pub(crate) content: Content,
}
impl fmt::Debug for Section {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let par_header = self.par_header.as_deref().unwrap_or("");
write!(f, "{} ({})", self.symb, par_header)
}
}
impl fmt::Display for Section {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(header) = &self.par_header {
f.write_str(&format!("{} ({})\n{}", self.symb, header, self.content))
} else {
f.write_str(&format!("{}\n{}", self.symb, self.content))
}
}
}
#[derive(Clone, PartialEq)]
pub(crate) struct ClassifierInstance {
pub(crate) name: String, //e.g. 1 Theilstück
pub(crate) desc: Option<String>,
pub(crate) sections: Vec<Section>,
pub(crate) children: Vec<Rc<RefCell<ClassifierInstance>>>,
pub(crate) parent: Option<Rc<RefCell<ClassifierInstance>>>,
}
impl ClassifierInstance {
fn new(name: &str) -> Self {
Self {
name: name.into(),
desc: None,
parent: None,
sections: Vec::new(),
children: Vec::new(),
}
}
fn set_parent(&mut self, parent: Rc<RefCell<ClassifierInstance>>) {
self.parent = Some(parent);
}
fn get_parent(&self) -> Option<Rc<RefCell<ClassifierInstance>>> {
self.parent.clone()
}
fn add_child(&mut self, child: Rc<RefCell<ClassifierInstance>>) {
self.children.push(child);
}
fn set_desc(&mut self, desc: &str) {
self.desc = Some(desc.into());
}
fn add_section(&mut self, section: Section) {
self.sections.push(section);
}
}
impl std::fmt::Debug for ClassifierInstance {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Classifier")
.field("name", &self.name)
.field("desc", &self.desc)
.field("sections", &self.sections)
.field("children", &self.children)
.finish_non_exhaustive()
}
}
impl From<&str> for ClassifierInstance {
fn from(value: &str) -> Self {
Self::new(value)
}
}
type ClassifierApplicable = Arc<dyn Fn(&str, &str) -> bool>;
#[derive(Clone)]
pub(crate) struct Classifier {
pub(crate) name: String, // Hauptstück, Theil, Abschnitt, ol
pub(crate) used_for_fn: ClassifierApplicable,
pub(crate) instances: Vec<ClassifierInstance>,
pub(crate) child: Vec<Rc<RefCell<Classifier>>>,
pub(crate) root: bool,
}
impl PartialEq for Classifier {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
}
}
impl Classifier {
fn new(name: &str, used_for_fn: ClassifierApplicable) -> Self {
Self {
name: name.into(),
used_for_fn,
child: Vec::new(),
instances: Vec::new(),
root: false,
}
}
fn root(self) -> Self {
Self { root: true, ..self }
}
fn used_for(&self, name: &str) -> bool {
(self.used_for_fn)(&self.name, name)
}
}
impl std::fmt::Debug for Classifier {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Classifier")
.field("name", &self.name)
.field("instances", &self.instances)
.field("child", &self.child)
.finish_non_exhaustive()
}
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub(crate) enum Content {
Text(String), //This is my direct law text
Item(Vec<Content>), //(1) This is general law. (2) This is more specific law
List(Vec<Content>),
}
impl Display for Content {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Text(a) => f.write_str(&format!("{a}\n")),
Self::Item(a) | Self::List(a) => {
let mut ret = String::new();
for aa in a {
ret.push_str(&format!("{aa}"));
}
f.write_str(&ret)
}
}
}
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use std::{
fs::File,
io::{self, BufRead, Read},
path::Path,
};
use super::*;
fn read_lines<P>(filename: P) -> io::Result<Vec<String>>
where
P: AsRef<Path>,
{
let file = File::open(filename)?;
let buf_reader = io::BufReader::new(file);
buf_reader.lines().collect()
}
#[ignore]
#[test]
fn test_with_live_data() {
let law: Law = LawBuilder::new("UrhG").into();
let path = Path::new("./data/urhg/builder.result");
let mut file = File::open(path).unwrap();
let mut json = String::new();
file.read_to_string(&mut json).unwrap();
let expected: Law = serde_json::from_str(&json).unwrap();
assert_eq!(law, expected);
}
#[ignore]
#[test]
fn test_stgb_with_live_data() {
let law: Law = LawBuilder::new("StGB").into();
let path = Path::new("./data/stgb/builder.result");
let mut file = File::open(path).unwrap();
let mut json = String::new();
file.read_to_string(&mut json).unwrap();
let expected: Law = serde_json::from_str(&json).unwrap();
//println!("{}", serde_json::to_string(&law).unwrap());
assert_eq!(law, expected);
}
#[test]
fn test_builder_full_urhg() {
let mut builder = LawBuilder::test("UrhG");
let path = Path::new("./data/urhg/par");
let input = read_lines(path.join("../par.result")).unwrap();
for i in input {
let (command, content) = i.split_once(":").unwrap();
match command {
"New_header" => builder.new_header(content),
"New desc" => builder.new_desc(content),
"New_new_para_header" => builder.new_next_para_header(content),
"New_par" => {
let (par, real_content) = i.split_once(";").unwrap();
let (_, real_par) = par.split_once(":").unwrap();
let real_content: Content = serde_json::from_str(real_content).unwrap();
builder.new_par(real_par.trim().into(), real_content);
}
_ => {
panic!("Don't know command '{command}'");
}
}
}
let actual: Law = builder.into();
//println!("{}", serde_json::to_string(&law).unwrap());
let mut file = File::open(path.join("../builder.result")).unwrap();
let mut json = String::new();
file.read_to_string(&mut json).unwrap();
let expected = serde_json::from_str(&json).unwrap();
assert_eq!(actual, expected);
}
}