620 lines
19 KiB
Rust
620 lines
19 KiB
Rust
use log::{debug, info};
|
|
use risp::risparser::overview::parse;
|
|
use serde::{Deserialize, Serialize};
|
|
use std::{
|
|
cell::RefCell,
|
|
fmt::{self, Display},
|
|
rc::Rc,
|
|
sync::Arc,
|
|
};
|
|
|
|
use crate::par;
|
|
|
|
use self::responsible::{
|
|
contains, contains_at_start, contains_without_unter, starts_with_letter, starts_with_number,
|
|
starts_with_roman_number, starts_with_uppercaseletter,
|
|
};
|
|
|
|
mod responsible;
|
|
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq)]
|
|
pub(crate) struct Law {
|
|
name: String, //ABGB, UrhG
|
|
header: Vec<Heading>,
|
|
}
|
|
|
|
impl Law {
|
|
pub(crate) fn to_md(&self) {
|
|
println!("# {}", self.name);
|
|
|
|
for header in &self.header {
|
|
Self::print_md(header, 2);
|
|
}
|
|
}
|
|
|
|
fn print_md(header: &Heading, level: usize) {
|
|
println!("{} {}", "#".repeat(level), header);
|
|
match &header.content {
|
|
HeadingContent::Heading(h) => {
|
|
for child in h {
|
|
Self::print_md(child, level + 1);
|
|
}
|
|
}
|
|
HeadingContent::Paragraph(p) => {
|
|
for par in p {
|
|
println!("{} {par}", "#".repeat(level + 1));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<LawBuilder> for Law {
|
|
fn from(builder: LawBuilder) -> Self {
|
|
let mut ret = Vec::new();
|
|
|
|
for header in builder.header {
|
|
ret.push(Heading {
|
|
name: header.borrow().name.clone(),
|
|
desc: header.borrow().desc.clone(),
|
|
content: header.borrow().clone().into(),
|
|
});
|
|
}
|
|
|
|
Self {
|
|
name: builder.name,
|
|
header: ret,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq)]
|
|
struct Heading {
|
|
name: String, //1. Hauptstück; 3. Theil; ...
|
|
desc: Option<String>,
|
|
content: HeadingContent, // 1. Theil; 1. Subtheil; ...
|
|
}
|
|
|
|
impl Display for Heading {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
if let Some(desc) = &self.desc {
|
|
f.write_str(&format!("{} ({desc})\n", self.name))
|
|
} else {
|
|
f.write_str(&format!("{}\n", self.name))
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq)]
|
|
enum HeadingContent {
|
|
Paragraph(Vec<Section>),
|
|
Heading(Vec<Heading>),
|
|
}
|
|
|
|
impl From<ClassifierInstance> for HeadingContent {
|
|
fn from(value: ClassifierInstance) -> Self {
|
|
if value.sections.is_empty() {
|
|
let mut ret = Vec::new();
|
|
for child in value.children {
|
|
ret.push(Heading {
|
|
name: child.borrow().name.clone(),
|
|
desc: child.borrow().desc.clone(),
|
|
content: child.borrow().clone().into(),
|
|
});
|
|
}
|
|
|
|
Self::Heading(ret)
|
|
} else {
|
|
Self::Paragraph(value.sections)
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Is used to generate a law struct. It's organized mainly by classifier.
|
|
#[derive(Debug)]
|
|
pub(crate) struct LawBuilder {
|
|
/// Name of the law
|
|
pub(crate) name: String, //ABGB, UrhG
|
|
|
|
/// Structure of the law text
|
|
pub(crate) classifiers: Vec<Classifier>,
|
|
|
|
/// Instances
|
|
pub(crate) header: Vec<Rc<RefCell<ClassifierInstance>>>,
|
|
|
|
last_instance: Option<Rc<RefCell<ClassifierInstance>>>,
|
|
|
|
/// Stores the header of the next paragraph
|
|
pub(crate) next_para_header: Option<String>,
|
|
|
|
#[cfg(test)]
|
|
pub(crate) history: Vec<String>,
|
|
}
|
|
|
|
impl PartialEq for LawBuilder {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
self.name == other.name
|
|
&& self.classifiers == other.classifiers
|
|
&& self.header == other.header
|
|
&& self.next_para_header == other.next_para_header
|
|
}
|
|
}
|
|
|
|
impl LawBuilder {
|
|
#[cfg(test)]
|
|
pub(crate) fn test(name: &str) -> Self {
|
|
let mut classifiers = Vec::new();
|
|
|
|
if name == "new" {
|
|
classifiers.push(Classifier::new("a", Arc::new(&contains)).root());
|
|
classifiers.push(Classifier::new("b", Arc::new(&contains)));
|
|
classifiers.push(Classifier::new("c", Arc::new(&contains)));
|
|
classifiers.push(Classifier::new("d", Arc::new(&contains)));
|
|
} else if name == "UrhG" {
|
|
classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)).root());
|
|
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
|
|
classifiers.push(Classifier::new("Number", Arc::new(&starts_with_number)));
|
|
}
|
|
|
|
Self {
|
|
name: name.into(),
|
|
classifiers,
|
|
header: Vec::new(),
|
|
next_para_header: None,
|
|
last_instance: None,
|
|
#[cfg(test)]
|
|
history: Vec::new(),
|
|
}
|
|
}
|
|
|
|
/// Creates a new law builder. Adds classifier for known law texts.
|
|
pub(crate) fn new(name: &str) -> Self {
|
|
let mut classifiers = Vec::new();
|
|
|
|
let mut law_id = None;
|
|
if name == "UrhG" {
|
|
law_id = Some(10_001_848);
|
|
|
|
classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)).root());
|
|
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
|
|
classifiers.push(Classifier::new("Number", Arc::new(&starts_with_number)));
|
|
} else if name == "MSchG" {
|
|
law_id = Some(10_002_180);
|
|
|
|
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)).root());
|
|
classifiers.push(Classifier::new("Number", Arc::new(&starts_with_number)));
|
|
} else if name == "ABGB" {
|
|
law_id = Some(10_001_622);
|
|
|
|
classifiers.push(Classifier::new("Einleitung", Arc::new(&contains)).root());
|
|
classifiers.push(Classifier::new("Theil", Arc::new(&contains)).root());
|
|
|
|
classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)));
|
|
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
|
|
classifiers.push(Classifier::new("Abtheilung", Arc::new(&contains)));
|
|
|
|
classifiers.push(Classifier::new("heading", Arc::new(&contains_at_start)));
|
|
classifiers.push(Classifier::new("letter", Arc::new(&starts_with_letter)));
|
|
classifiers.push(Classifier::new("num", Arc::new(&starts_with_number)));
|
|
classifiers.push(Classifier::new("rom", Arc::new(&starts_with_roman_number)));
|
|
} else if name == "FSG" {
|
|
law_id = Some(10_003_898);
|
|
|
|
classifiers.push(Classifier::new("Artikel", Arc::new(&contains)).root());
|
|
|
|
classifiers.push(Classifier::new(
|
|
"Abschnitt",
|
|
Arc::new(&contains_without_unter),
|
|
));
|
|
classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)));
|
|
classifiers.push(Classifier::new("Unterabschnitt", Arc::new(&contains)));
|
|
|
|
classifiers.push(Classifier::new(
|
|
"uppercase letter",
|
|
Arc::new(&starts_with_uppercaseletter),
|
|
));
|
|
classifiers.push(Classifier::new("num", Arc::new(&starts_with_number)));
|
|
} else if name == "VVG" {
|
|
law_id = Some(20_004_425);
|
|
|
|
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)).root());
|
|
} else if name == "KSchG" {
|
|
law_id = Some(10_002_462);
|
|
|
|
classifiers.push(Classifier::new("Hauptstück", Arc::new(&contains)).root());
|
|
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
|
|
} else if name == "StGB" {
|
|
law_id = Some(10_002_296);
|
|
|
|
classifiers.push(Classifier::new("Teil", Arc::new(&contains)).root());
|
|
classifiers.push(Classifier::new("Abschnitt", Arc::new(&contains)));
|
|
}
|
|
|
|
let mut builder = Self {
|
|
name: name.into(),
|
|
classifiers,
|
|
header: Vec::new(),
|
|
next_para_header: None,
|
|
last_instance: None,
|
|
#[cfg(test)]
|
|
history: Vec::new(),
|
|
};
|
|
|
|
let paragraphs = parse(law_id.unwrap()).unwrap();
|
|
|
|
for paragraph in tqdm::tqdm(paragraphs.into_iter()) {
|
|
let cont = par::parse(¶graph, &mut builder).unwrap();
|
|
if !cont {
|
|
break;
|
|
}
|
|
}
|
|
|
|
builder
|
|
}
|
|
|
|
fn responsible_classifier(&self, name: &str) -> Option<&Classifier> {
|
|
self.classifiers.iter().find(|&c| c.used_for(name))
|
|
}
|
|
|
|
fn find_parent(
|
|
&self,
|
|
cur: Option<Rc<RefCell<ClassifierInstance>>>,
|
|
class: &Classifier,
|
|
) -> Option<Rc<RefCell<ClassifierInstance>>> {
|
|
let mut cur = cur;
|
|
while let Some(c) = cur {
|
|
let (cur_name, cur_parent) = {
|
|
let c_borrow = c.borrow();
|
|
(c_borrow.name.clone(), c_borrow.parent.clone())
|
|
};
|
|
|
|
let cur_responsible_class = self.responsible_classifier(&cur_name).unwrap();
|
|
if cur_responsible_class == class {
|
|
return c.borrow_mut().get_parent();
|
|
}
|
|
|
|
cur = cur_parent;
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Sets a new header.
|
|
pub(crate) fn new_header(&mut self, name: &str) {
|
|
let name = name.trim();
|
|
#[cfg(test)]
|
|
self.history.push(format!("New_header: {name}"));
|
|
|
|
info!("new_header={name}");
|
|
|
|
let responsible_class = self
|
|
.responsible_classifier(name)
|
|
.unwrap_or_else(|| panic!("No classifier for '{name}'"));
|
|
|
|
let mut heading: ClassifierInstance = name.into();
|
|
|
|
if let Some(last_instance) = &self.last_instance {
|
|
let cur = Some(last_instance.clone());
|
|
|
|
let parent = &self.find_parent(cur, responsible_class);
|
|
match parent {
|
|
None => {
|
|
if responsible_class.root {
|
|
let c = Rc::new(RefCell::new(heading));
|
|
self.header.push(c.clone());
|
|
self.last_instance = Some(c.clone());
|
|
} else {
|
|
heading.set_parent(last_instance.clone());
|
|
let c = Rc::new(RefCell::new(heading));
|
|
last_instance.borrow_mut().add_child(c.clone());
|
|
self.last_instance = Some(c.clone());
|
|
}
|
|
}
|
|
Some(parent) => {
|
|
heading.set_parent(parent.clone());
|
|
let c = Rc::new(RefCell::new(heading));
|
|
parent.borrow_mut().add_child(c.clone());
|
|
self.last_instance = Some(c.clone());
|
|
}
|
|
}
|
|
} else {
|
|
let c = Rc::new(RefCell::new(heading));
|
|
self.header.push(c.clone());
|
|
self.last_instance = Some(c.clone());
|
|
}
|
|
}
|
|
|
|
/// Sets a new description for the last classifier.
|
|
pub(crate) fn new_desc(&mut self, desc: &str) {
|
|
let desc = desc.trim();
|
|
#[cfg(test)]
|
|
self.history.push(format!("New desc: {desc}"));
|
|
|
|
debug!("new_desc={desc}");
|
|
self.last_instance
|
|
.clone()
|
|
.unwrap()
|
|
.borrow_mut()
|
|
.set_desc(desc);
|
|
}
|
|
|
|
/// Adds a new paragraph.
|
|
pub(crate) fn new_par(&mut self, par: String, content: Content) {
|
|
#[cfg(test)]
|
|
self.history.push(format!(
|
|
"New_par: {par};{}",
|
|
serde_json::to_string(&content).unwrap()
|
|
));
|
|
|
|
debug!("new_par=par:{par};content:{content:#?}");
|
|
|
|
let par_header = self.next_para_header.clone();
|
|
self.next_para_header = None;
|
|
|
|
self.last_instance
|
|
.clone()
|
|
.expect("Expect at least one classifier")
|
|
.borrow_mut()
|
|
.add_section(Section {
|
|
symb: par,
|
|
par_header,
|
|
content,
|
|
});
|
|
}
|
|
|
|
/// Next paragraph has a header, store its name.
|
|
pub(crate) fn new_next_para_header(&mut self, header: &str) {
|
|
#[cfg(test)]
|
|
self.history.push(format!("New_new_para_header: {header}"));
|
|
|
|
if let Some(next_para_header) = &self.next_para_header {
|
|
self.new_header(&next_para_header.clone()); // promote to bigger header :-)
|
|
}
|
|
|
|
debug!("new_next_para_header={header}");
|
|
self.next_para_header = Some(header.trim().into());
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, PartialEq, Serialize, Deserialize)]
|
|
pub(crate) struct Section {
|
|
pub(crate) symb: String, // §"1", §"2", ...
|
|
pub(crate) par_header: Option<String>,
|
|
pub(crate) content: Content,
|
|
}
|
|
|
|
impl fmt::Debug for Section {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
let par_header = self.par_header.as_deref().unwrap_or("");
|
|
write!(f, "{} ({})", self.symb, par_header)
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for Section {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
if let Some(header) = &self.par_header {
|
|
f.write_str(&format!("{} ({})\n{}", self.symb, header, self.content))
|
|
} else {
|
|
f.write_str(&format!("{}\n{}", self.symb, self.content))
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, PartialEq)]
|
|
pub(crate) struct ClassifierInstance {
|
|
pub(crate) name: String, //e.g. 1 Theilstück
|
|
pub(crate) desc: Option<String>,
|
|
pub(crate) sections: Vec<Section>,
|
|
pub(crate) children: Vec<Rc<RefCell<ClassifierInstance>>>,
|
|
pub(crate) parent: Option<Rc<RefCell<ClassifierInstance>>>,
|
|
}
|
|
|
|
impl ClassifierInstance {
|
|
fn new(name: &str) -> Self {
|
|
Self {
|
|
name: name.into(),
|
|
desc: None,
|
|
parent: None,
|
|
sections: Vec::new(),
|
|
children: Vec::new(),
|
|
}
|
|
}
|
|
|
|
fn set_parent(&mut self, parent: Rc<RefCell<ClassifierInstance>>) {
|
|
self.parent = Some(parent);
|
|
}
|
|
|
|
fn get_parent(&self) -> Option<Rc<RefCell<ClassifierInstance>>> {
|
|
self.parent.clone()
|
|
}
|
|
|
|
fn add_child(&mut self, child: Rc<RefCell<ClassifierInstance>>) {
|
|
self.children.push(child);
|
|
}
|
|
|
|
fn set_desc(&mut self, desc: &str) {
|
|
self.desc = Some(desc.into());
|
|
}
|
|
|
|
fn add_section(&mut self, section: Section) {
|
|
self.sections.push(section);
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Debug for ClassifierInstance {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.debug_struct("Classifier")
|
|
.field("name", &self.name)
|
|
.field("desc", &self.desc)
|
|
.field("sections", &self.sections)
|
|
.field("children", &self.children)
|
|
.finish_non_exhaustive()
|
|
}
|
|
}
|
|
|
|
impl From<&str> for ClassifierInstance {
|
|
fn from(value: &str) -> Self {
|
|
Self::new(value)
|
|
}
|
|
}
|
|
|
|
type ClassifierApplicable = Arc<dyn Fn(&str, &str) -> bool>;
|
|
|
|
#[derive(Clone)]
|
|
pub(crate) struct Classifier {
|
|
pub(crate) name: String, // Hauptstück, Theil, Abschnitt, ol
|
|
pub(crate) used_for_fn: ClassifierApplicable,
|
|
pub(crate) instances: Vec<ClassifierInstance>,
|
|
pub(crate) child: Vec<Rc<RefCell<Classifier>>>,
|
|
pub(crate) root: bool,
|
|
}
|
|
|
|
impl PartialEq for Classifier {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
self.name == other.name
|
|
}
|
|
}
|
|
|
|
impl Classifier {
|
|
fn new(name: &str, used_for_fn: ClassifierApplicable) -> Self {
|
|
Self {
|
|
name: name.into(),
|
|
used_for_fn,
|
|
child: Vec::new(),
|
|
instances: Vec::new(),
|
|
root: false,
|
|
}
|
|
}
|
|
|
|
fn root(self) -> Self {
|
|
Self { root: true, ..self }
|
|
}
|
|
|
|
fn used_for(&self, name: &str) -> bool {
|
|
(self.used_for_fn)(&self.name, name)
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Debug for Classifier {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.debug_struct("Classifier")
|
|
.field("name", &self.name)
|
|
.field("instances", &self.instances)
|
|
.field("child", &self.child)
|
|
.finish_non_exhaustive()
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
|
pub(crate) enum Content {
|
|
Text(String), //This is my direct law text
|
|
Item(Vec<Content>), //(1) This is general law. (2) This is more specific law
|
|
List(Vec<Content>),
|
|
}
|
|
|
|
impl Display for Content {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
Self::Text(a) => f.write_str(&format!("{a}\n")),
|
|
Self::Item(a) | Self::List(a) => {
|
|
let mut ret = String::new();
|
|
for aa in a {
|
|
ret.push_str(&format!("{aa}"));
|
|
}
|
|
f.write_str(&ret)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use pretty_assertions::assert_eq;
|
|
use std::{
|
|
fs::File,
|
|
io::{self, BufRead, Read},
|
|
path::Path,
|
|
};
|
|
|
|
use super::*;
|
|
|
|
fn read_lines<P>(filename: P) -> io::Result<Vec<String>>
|
|
where
|
|
P: AsRef<Path>,
|
|
{
|
|
let file = File::open(filename)?;
|
|
let buf_reader = io::BufReader::new(file);
|
|
buf_reader.lines().collect()
|
|
}
|
|
|
|
#[ignore]
|
|
#[test]
|
|
fn test_with_live_data() {
|
|
let law: Law = LawBuilder::new("UrhG").into();
|
|
|
|
let path = Path::new("./data/urhg/builder.result");
|
|
let mut file = File::open(path).unwrap();
|
|
let mut json = String::new();
|
|
file.read_to_string(&mut json).unwrap();
|
|
|
|
let expected: Law = serde_json::from_str(&json).unwrap();
|
|
|
|
assert_eq!(law, expected);
|
|
}
|
|
|
|
#[ignore]
|
|
#[test]
|
|
fn test_stgb_with_live_data() {
|
|
let law: Law = LawBuilder::new("StGB").into();
|
|
|
|
let path = Path::new("./data/stgb/builder.result");
|
|
let mut file = File::open(path).unwrap();
|
|
let mut json = String::new();
|
|
file.read_to_string(&mut json).unwrap();
|
|
|
|
let expected: Law = serde_json::from_str(&json).unwrap();
|
|
|
|
//println!("{}", serde_json::to_string(&law).unwrap());
|
|
|
|
assert_eq!(law, expected);
|
|
}
|
|
|
|
#[test]
|
|
fn test_builder_full_urhg() {
|
|
let mut builder = LawBuilder::test("UrhG");
|
|
|
|
let path = Path::new("./data/urhg/par");
|
|
let input = read_lines(path.join("../par.result")).unwrap();
|
|
|
|
for i in input {
|
|
let (command, content) = i.split_once(":").unwrap();
|
|
|
|
match command {
|
|
"New_header" => builder.new_header(content),
|
|
"New desc" => builder.new_desc(content),
|
|
"New_new_para_header" => builder.new_next_para_header(content),
|
|
"New_par" => {
|
|
let (par, real_content) = i.split_once(";").unwrap();
|
|
let (_, real_par) = par.split_once(":").unwrap();
|
|
let real_content: Content = serde_json::from_str(real_content).unwrap();
|
|
builder.new_par(real_par.trim().into(), real_content);
|
|
}
|
|
_ => {
|
|
panic!("Don't know command '{command}'");
|
|
}
|
|
}
|
|
}
|
|
|
|
let actual: Law = builder.into();
|
|
|
|
//println!("{}", serde_json::to_string(&law).unwrap());
|
|
|
|
let mut file = File::open(path.join("../builder.result")).unwrap();
|
|
let mut json = String::new();
|
|
file.read_to_string(&mut json).unwrap();
|
|
|
|
let expected = serde_json::from_str(&json).unwrap();
|
|
|
|
assert_eq!(actual, expected);
|
|
}
|
|
}
|