diff --git a/Cargo.lock b/Cargo.lock
index 92cf46f..b34b955 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -396,6 +396,7 @@ dependencies = [
"serde",
"serde_json",
"time",
+ "toml",
"tqdm",
"ureq",
]
@@ -603,6 +604,15 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+[[package]]
+name = "toml"
+version = "0.5.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234"
+dependencies = [
+ "serde",
+]
+
[[package]]
name = "tqdm"
version = "0.6.0"
diff --git a/Cargo.toml b/Cargo.toml
index 4db3db1..e0e355a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,6 +14,8 @@ roxmltree = "0.18"
env_logger = "0.10"
log = "0.4"
tqdm = "0.6"
+toml = "0.5"
+
[dev-dependencies]
pretty_assertions = "1.4"
diff --git a/data/configs/mschg.toml b/data/configs/mschg.toml
new file mode 100644
index 0000000..db3f0f0
--- /dev/null
+++ b/data/configs/mschg.toml
@@ -0,0 +1,34 @@
+[law]
+id = "10002180"
+
+[[law.classifiers]]
+name = "Abschnitt"
+match_function = "contains"
+
+[[law.classifiers]]
+name = "Number"
+match_function = "starts_with_number"
+
+[parser]
+remove_strings = ["", ""]
+
+[[parser.replace_rules]]
+find = "\\u{a0}"
+replace_with = " "
+
+[[parser.replace_rules]]
+find = "bis"
+replace_with = "bis"
+
+[[parser.replace_rules]]
+find = "ter"
+replace_with = "ter"
+
+[[parser.replace_rules]]
+find = ""
+replace_with = "-"
+
+[[parser.replace_rules]]
+find = "(Anm.: § 69 aufgehoben durch Art. 1 Z 12, BGBl. I Nr. 124/2017)"
+replace_with = "§ 69.(Anm.: § 69 aufgehoben durch Art. 1 Z 12, BGBl. I Nr. 124/2017)"
+
diff --git a/src/config.rs b/src/config.rs
new file mode 100644
index 0000000..a0dc3d4
--- /dev/null
+++ b/src/config.rs
@@ -0,0 +1,93 @@
+use serde::Deserialize;
+use std::fs;
+use std::path::Path;
+use std::sync::Arc;
+
+use crate::law::{self, responsible::*};
+use crate::law::{ClassifierApplicable, LawBuilder};
+use crate::misc::Error;
+use crate::risparser::paragraph::Parser;
+
+// TODO: more generic
+fn create_classifier(match_function: &str) -> Result {
+ let func: ClassifierApplicable = match match_function {
+ "contains" => Arc::new(contains),
+ "starts_with_roman_number" => Arc::new(starts_with_roman_number),
+ "contains_at_start" => Arc::new(contains_at_start),
+ "starts_with_number" => Arc::new(starts_with_number),
+ "starts_with_letter" => Arc::new(starts_with_letter),
+ "starts_with_uppercaseletter" => Arc::new(starts_with_letter),
+ "contains_without_unter" => Arc::new(contains_without_unter),
+ _ => {
+ return Err(Error::new(&format!(
+ "Unknown match function: {}",
+ match_function
+ )))
+ }
+ };
+
+ Ok(func)
+}
+
+#[derive(Debug, Deserialize)]
+pub struct Config {
+ law: Law,
+ parser: ParserConfig,
+}
+
+impl Config {
+ pub fn load>(path: P) -> Result<(usize, LawBuilder, Parser), Error> {
+ let config_str = fs::read_to_string(path)?;
+ let config: Config = toml::from_str(&config_str)?;
+
+ let mut builder = LawBuilder::new();
+ for classifier in config.law.classifiers {
+ let to_add = law::Classifier::new(
+ &classifier.name,
+ create_classifier(&classifier.match_function)?,
+ );
+ if classifier.is_root {
+ builder.add_classifier(to_add.root());
+ } else {
+ builder.add_classifier(to_add);
+ }
+ }
+
+ let mut parser = Parser::new();
+
+ for to_remove in config.parser.remove_strings {
+ parser.add_string_to_remove(&to_remove);
+ }
+
+ for to_replace in config.parser.replace_rules {
+ parser.add_string_to_replace(&to_replace.find, &to_replace.replace_with);
+ }
+
+ Ok((config.law.id, builder, parser))
+ }
+}
+
+#[derive(Debug, Deserialize)]
+struct Law {
+ id: usize,
+ classifiers: Vec,
+}
+
+#[derive(Debug, Deserialize)]
+struct Classifier {
+ name: String,
+ is_root: bool,
+ match_function: String,
+}
+
+#[derive(Debug, Deserialize)]
+struct ParserConfig {
+ remove_strings: Vec,
+ replace_rules: Vec,
+}
+
+#[derive(Debug, Deserialize)]
+struct ReplaceRule {
+ find: String,
+ replace_with: String,
+}
diff --git a/src/law/mod.rs b/src/law/mod.rs
index e9147d0..4fe41fd 100644
--- a/src/law/mod.rs
+++ b/src/law/mod.rs
@@ -7,20 +7,33 @@ use std::{
sync::Arc,
};
+use crate::{config::Config, misc::Error, risparser::overview::parse};
+
pub mod responsible;
/// That's our struct, holding all the information of the law text.
#[derive(Debug, Serialize, Deserialize, PartialEq)]
pub struct Law {
- pub name: String, //ABGB, UrhG
pub header: Vec,
}
impl Law {
+ pub fn from_config(path: &str) -> Result {
+ let (law_id, mut builder, parser) = Config::load(path)?;
+ let pars = parse(law_id).unwrap();
+
+ for par in pars {
+ let cont = parser.parse(&par, &mut builder).unwrap();
+ if !cont {
+ break;
+ }
+ }
+
+ Ok(builder.into())
+ }
+
//TODO: add test
pub fn to_md(&self) {
- println!("# {}", self.name);
-
for header in &self.header {
Self::print_md(header, 2);
}
@@ -55,10 +68,7 @@ impl From for Law {
});
}
- Self {
- name: builder.name,
- header: ret,
- }
+ Self { header: ret }
}
}
@@ -107,9 +117,6 @@ impl From for HeadingContent {
/// Is used to generate a law struct. It's organized mainly by classifier.
#[derive(Debug)]
pub struct LawBuilder {
- /// Name of the law
- name: String, //ABGB, UrhG
-
/// Structure of the law text
classifiers: Vec,
@@ -127,8 +134,7 @@ pub struct LawBuilder {
impl PartialEq for LawBuilder {
fn eq(&self, other: &Self) -> bool {
- self.name == other.name
- && self.classifiers == other.classifiers
+ self.classifiers == other.classifiers
&& self.header == other.header
&& self.next_para_header == other.next_para_header
}
@@ -136,9 +142,8 @@ impl PartialEq for LawBuilder {
impl LawBuilder {
/// Creates a new law builder. Adds classifier for known law texts.
- pub fn new(name: &str) -> Self {
+ pub fn new() -> Self {
Self {
- name: name.into(),
classifiers: Vec::new(),
header: Vec::new(),
next_para_header: None,
@@ -357,7 +362,7 @@ impl From<&str> for ClassifierInstance {
}
}
-type ClassifierApplicable = Arc bool>;
+pub(crate) type ClassifierApplicable = Arc bool>;
#[derive(Clone)]
pub struct Classifier {
diff --git a/src/lib.rs b/src/lib.rs
index 8de8ebe..4479647 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,4 @@
+pub mod config;
pub mod law;
pub mod misc;
pub mod risparser;
diff --git a/src/main.rs b/src/main.rs
index bf7f331..caa8f05 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,8 +1,11 @@
-use risp::law::{Law, LawBuilder};
+use risp::law::Law;
fn main() {
env_logger::init();
- let law: Law = LawBuilder::new("StGB").into();
+ let config_path = "./data/config/mschg.toml";
+
+ let law = Law::from_config(config_path).unwrap();
+
law.to_md();
}
diff --git a/src/misc.rs b/src/misc.rs
index da10d14..c4d0734 100644
--- a/src/misc.rs
+++ b/src/misc.rs
@@ -8,6 +8,12 @@ pub struct Error {
msg: String,
}
+impl Error {
+ pub fn new(msg: &str) -> Self {
+ Self { msg: msg.into() }
+ }
+}
+
impl From for Error {
fn from(value: ureq::Error) -> Self {
Self {
@@ -29,6 +35,14 @@ impl From for Error {
}
}
}
+impl From for Error {
+ fn from(value: toml::de::Error) -> Self {
+ Self {
+ msg: value.to_string(),
+ }
+ }
+}
+
impl From for Error {
fn from(value: roxmltree::Error) -> Self {
Self {
diff --git a/src/risparser/paragraph/mod.rs b/src/risparser/paragraph/mod.rs
index 517b5b2..8560a6f 100644
--- a/src/risparser/paragraph/mod.rs
+++ b/src/risparser/paragraph/mod.rs
@@ -172,7 +172,7 @@ mod tests {
#[test]
fn teg() {
let law_id = "10001905";
- let mut builder = LawBuilder::new("law");
+ let mut builder = LawBuilder::new();
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)).root());
let mut parser = Parser::new();
@@ -190,7 +190,7 @@ mod tests {
#[test]
fn mschg() {
let law_id = "10002180";
- let mut builder = LawBuilder::new("law");
+ let mut builder = LawBuilder::new();
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)).root());
builder.add_classifier(Classifier::new("Number", Arc::new(&starts_with_number)));
@@ -213,7 +213,7 @@ mod tests {
#[test]
fn stgb() {
let law_id = "10002296";
- let mut builder = LawBuilder::new("law");
+ let mut builder = LawBuilder::new();
builder.add_classifier(Classifier::new("Teil", Arc::new(&contains)).root());
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)));
@@ -259,7 +259,7 @@ mod tests {
#[test]
fn kschg() {
let law_id = "10002462";
- let mut builder = LawBuilder::new("law");
+ let mut builder = LawBuilder::new();
builder.add_classifier(Classifier::new("Hauptstück", Arc::new(&contains)).root());
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)));
@@ -273,7 +273,7 @@ mod tests {
#[test]
fn vvg() {
let law_id = "20011654";
- let mut builder = LawBuilder::new("law");
+ let mut builder = LawBuilder::new();
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)).root());
let parser = Parser::new();
@@ -283,7 +283,7 @@ mod tests {
#[test]
fn urhg() {
let law_id = "10001848";
- let mut builder = LawBuilder::new("law");
+ let mut builder = LawBuilder::new();
builder.add_classifier(Classifier::new("Hauptstück", Arc::new(&contains)).root());
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)));
builder.add_classifier(Classifier::new("Number", Arc::new(&starts_with_number)));