add config for law texts, fixes #3
All checks were successful
CI/CD Pipeline / test (push) Successful in 9m17s
All checks were successful
CI/CD Pipeline / test (push) Successful in 9m17s
This commit is contained in:
parent
54a8c7a0fe
commit
15e0e485c4
10
Cargo.lock
generated
10
Cargo.lock
generated
@ -396,6 +396,7 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"time",
|
"time",
|
||||||
|
"toml",
|
||||||
"tqdm",
|
"tqdm",
|
||||||
"ureq",
|
"ureq",
|
||||||
]
|
]
|
||||||
@ -603,6 +604,15 @@ version = "0.1.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "toml"
|
||||||
|
version = "0.5.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tqdm"
|
name = "tqdm"
|
||||||
version = "0.6.0"
|
version = "0.6.0"
|
||||||
|
@ -14,6 +14,8 @@ roxmltree = "0.18"
|
|||||||
env_logger = "0.10"
|
env_logger = "0.10"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
tqdm = "0.6"
|
tqdm = "0.6"
|
||||||
|
toml = "0.5"
|
||||||
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
pretty_assertions = "1.4"
|
pretty_assertions = "1.4"
|
||||||
|
34
data/configs/mschg.toml
Normal file
34
data/configs/mschg.toml
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
[law]
|
||||||
|
id = "10002180"
|
||||||
|
|
||||||
|
[[law.classifiers]]
|
||||||
|
name = "Abschnitt"
|
||||||
|
match_function = "contains"
|
||||||
|
|
||||||
|
[[law.classifiers]]
|
||||||
|
name = "Number"
|
||||||
|
match_function = "starts_with_number"
|
||||||
|
|
||||||
|
[parser]
|
||||||
|
remove_strings = ["<i>", "</i>"]
|
||||||
|
|
||||||
|
[[parser.replace_rules]]
|
||||||
|
find = "\\u{a0}"
|
||||||
|
replace_with = " "
|
||||||
|
|
||||||
|
[[parser.replace_rules]]
|
||||||
|
find = "<super>bis</super>"
|
||||||
|
replace_with = "bis"
|
||||||
|
|
||||||
|
[[parser.replace_rules]]
|
||||||
|
find = "<super>ter</super>"
|
||||||
|
replace_with = "ter"
|
||||||
|
|
||||||
|
[[parser.replace_rules]]
|
||||||
|
find = "<gdash />"
|
||||||
|
replace_with = "-"
|
||||||
|
|
||||||
|
[[parser.replace_rules]]
|
||||||
|
find = "(Anm.: § 69 aufgehoben durch Art. 1 Z 12, BGBl. I Nr. 124/2017)"
|
||||||
|
replace_with = "<gldsym>§ 69.</gldsym>(Anm.: § 69 aufgehoben durch Art. 1 Z 12, BGBl. I Nr. 124/2017)"
|
||||||
|
|
93
src/config.rs
Normal file
93
src/config.rs
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
use serde::Deserialize;
|
||||||
|
use std::fs;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use crate::law::{self, responsible::*};
|
||||||
|
use crate::law::{ClassifierApplicable, LawBuilder};
|
||||||
|
use crate::misc::Error;
|
||||||
|
use crate::risparser::paragraph::Parser;
|
||||||
|
|
||||||
|
// TODO: more generic
|
||||||
|
fn create_classifier(match_function: &str) -> Result<ClassifierApplicable, Error> {
|
||||||
|
let func: ClassifierApplicable = match match_function {
|
||||||
|
"contains" => Arc::new(contains),
|
||||||
|
"starts_with_roman_number" => Arc::new(starts_with_roman_number),
|
||||||
|
"contains_at_start" => Arc::new(contains_at_start),
|
||||||
|
"starts_with_number" => Arc::new(starts_with_number),
|
||||||
|
"starts_with_letter" => Arc::new(starts_with_letter),
|
||||||
|
"starts_with_uppercaseletter" => Arc::new(starts_with_letter),
|
||||||
|
"contains_without_unter" => Arc::new(contains_without_unter),
|
||||||
|
_ => {
|
||||||
|
return Err(Error::new(&format!(
|
||||||
|
"Unknown match function: {}",
|
||||||
|
match_function
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(func)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct Config {
|
||||||
|
law: Law,
|
||||||
|
parser: ParserConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Config {
|
||||||
|
pub fn load<P: AsRef<Path>>(path: P) -> Result<(usize, LawBuilder, Parser), Error> {
|
||||||
|
let config_str = fs::read_to_string(path)?;
|
||||||
|
let config: Config = toml::from_str(&config_str)?;
|
||||||
|
|
||||||
|
let mut builder = LawBuilder::new();
|
||||||
|
for classifier in config.law.classifiers {
|
||||||
|
let to_add = law::Classifier::new(
|
||||||
|
&classifier.name,
|
||||||
|
create_classifier(&classifier.match_function)?,
|
||||||
|
);
|
||||||
|
if classifier.is_root {
|
||||||
|
builder.add_classifier(to_add.root());
|
||||||
|
} else {
|
||||||
|
builder.add_classifier(to_add);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut parser = Parser::new();
|
||||||
|
|
||||||
|
for to_remove in config.parser.remove_strings {
|
||||||
|
parser.add_string_to_remove(&to_remove);
|
||||||
|
}
|
||||||
|
|
||||||
|
for to_replace in config.parser.replace_rules {
|
||||||
|
parser.add_string_to_replace(&to_replace.find, &to_replace.replace_with);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((config.law.id, builder, parser))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct Law {
|
||||||
|
id: usize,
|
||||||
|
classifiers: Vec<Classifier>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct Classifier {
|
||||||
|
name: String,
|
||||||
|
is_root: bool,
|
||||||
|
match_function: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct ParserConfig {
|
||||||
|
remove_strings: Vec<String>,
|
||||||
|
replace_rules: Vec<ReplaceRule>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct ReplaceRule {
|
||||||
|
find: String,
|
||||||
|
replace_with: String,
|
||||||
|
}
|
@ -7,20 +7,33 @@ use std::{
|
|||||||
sync::Arc,
|
sync::Arc,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use crate::{config::Config, misc::Error, risparser::overview::parse};
|
||||||
|
|
||||||
pub mod responsible;
|
pub mod responsible;
|
||||||
|
|
||||||
/// That's our struct, holding all the information of the law text.
|
/// That's our struct, holding all the information of the law text.
|
||||||
#[derive(Debug, Serialize, Deserialize, PartialEq)]
|
#[derive(Debug, Serialize, Deserialize, PartialEq)]
|
||||||
pub struct Law {
|
pub struct Law {
|
||||||
pub name: String, //ABGB, UrhG
|
|
||||||
pub header: Vec<Heading>,
|
pub header: Vec<Heading>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Law {
|
impl Law {
|
||||||
|
pub fn from_config(path: &str) -> Result<Law, Error> {
|
||||||
|
let (law_id, mut builder, parser) = Config::load(path)?;
|
||||||
|
let pars = parse(law_id).unwrap();
|
||||||
|
|
||||||
|
for par in pars {
|
||||||
|
let cont = parser.parse(&par, &mut builder).unwrap();
|
||||||
|
if !cont {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(builder.into())
|
||||||
|
}
|
||||||
|
|
||||||
//TODO: add test
|
//TODO: add test
|
||||||
pub fn to_md(&self) {
|
pub fn to_md(&self) {
|
||||||
println!("# {}", self.name);
|
|
||||||
|
|
||||||
for header in &self.header {
|
for header in &self.header {
|
||||||
Self::print_md(header, 2);
|
Self::print_md(header, 2);
|
||||||
}
|
}
|
||||||
@ -55,10 +68,7 @@ impl From<LawBuilder> for Law {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Self {
|
Self { header: ret }
|
||||||
name: builder.name,
|
|
||||||
header: ret,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -107,9 +117,6 @@ impl From<ClassifierInstance> for HeadingContent {
|
|||||||
/// Is used to generate a law struct. It's organized mainly by classifier.
|
/// Is used to generate a law struct. It's organized mainly by classifier.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct LawBuilder {
|
pub struct LawBuilder {
|
||||||
/// Name of the law
|
|
||||||
name: String, //ABGB, UrhG
|
|
||||||
|
|
||||||
/// Structure of the law text
|
/// Structure of the law text
|
||||||
classifiers: Vec<Classifier>,
|
classifiers: Vec<Classifier>,
|
||||||
|
|
||||||
@ -127,8 +134,7 @@ pub struct LawBuilder {
|
|||||||
|
|
||||||
impl PartialEq for LawBuilder {
|
impl PartialEq for LawBuilder {
|
||||||
fn eq(&self, other: &Self) -> bool {
|
fn eq(&self, other: &Self) -> bool {
|
||||||
self.name == other.name
|
self.classifiers == other.classifiers
|
||||||
&& self.classifiers == other.classifiers
|
|
||||||
&& self.header == other.header
|
&& self.header == other.header
|
||||||
&& self.next_para_header == other.next_para_header
|
&& self.next_para_header == other.next_para_header
|
||||||
}
|
}
|
||||||
@ -136,9 +142,8 @@ impl PartialEq for LawBuilder {
|
|||||||
|
|
||||||
impl LawBuilder {
|
impl LawBuilder {
|
||||||
/// Creates a new law builder. Adds classifier for known law texts.
|
/// Creates a new law builder. Adds classifier for known law texts.
|
||||||
pub fn new(name: &str) -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
name: name.into(),
|
|
||||||
classifiers: Vec::new(),
|
classifiers: Vec::new(),
|
||||||
header: Vec::new(),
|
header: Vec::new(),
|
||||||
next_para_header: None,
|
next_para_header: None,
|
||||||
@ -357,7 +362,7 @@ impl From<&str> for ClassifierInstance {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type ClassifierApplicable = Arc<dyn Fn(&str, &str) -> bool>;
|
pub(crate) type ClassifierApplicable = Arc<dyn Fn(&str, &str) -> bool>;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Classifier {
|
pub struct Classifier {
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
pub mod config;
|
||||||
pub mod law;
|
pub mod law;
|
||||||
pub mod misc;
|
pub mod misc;
|
||||||
pub mod risparser;
|
pub mod risparser;
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
use risp::law::{Law, LawBuilder};
|
use risp::law::Law;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
env_logger::init();
|
env_logger::init();
|
||||||
|
|
||||||
let law: Law = LawBuilder::new("StGB").into();
|
let config_path = "./data/config/mschg.toml";
|
||||||
|
|
||||||
|
let law = Law::from_config(config_path).unwrap();
|
||||||
|
|
||||||
law.to_md();
|
law.to_md();
|
||||||
}
|
}
|
||||||
|
14
src/misc.rs
14
src/misc.rs
@ -8,6 +8,12 @@ pub struct Error {
|
|||||||
msg: String,
|
msg: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Error {
|
||||||
|
pub fn new(msg: &str) -> Self {
|
||||||
|
Self { msg: msg.into() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl From<ureq::Error> for Error {
|
impl From<ureq::Error> for Error {
|
||||||
fn from(value: ureq::Error) -> Self {
|
fn from(value: ureq::Error) -> Self {
|
||||||
Self {
|
Self {
|
||||||
@ -29,6 +35,14 @@ impl From<serde_json::Error> for Error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
impl From<toml::de::Error> for Error {
|
||||||
|
fn from(value: toml::de::Error) -> Self {
|
||||||
|
Self {
|
||||||
|
msg: value.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl From<roxmltree::Error> for Error {
|
impl From<roxmltree::Error> for Error {
|
||||||
fn from(value: roxmltree::Error) -> Self {
|
fn from(value: roxmltree::Error) -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
@ -172,7 +172,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn teg() {
|
fn teg() {
|
||||||
let law_id = "10001905";
|
let law_id = "10001905";
|
||||||
let mut builder = LawBuilder::new("law");
|
let mut builder = LawBuilder::new();
|
||||||
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)).root());
|
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)).root());
|
||||||
|
|
||||||
let mut parser = Parser::new();
|
let mut parser = Parser::new();
|
||||||
@ -190,7 +190,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn mschg() {
|
fn mschg() {
|
||||||
let law_id = "10002180";
|
let law_id = "10002180";
|
||||||
let mut builder = LawBuilder::new("law");
|
let mut builder = LawBuilder::new();
|
||||||
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)).root());
|
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)).root());
|
||||||
builder.add_classifier(Classifier::new("Number", Arc::new(&starts_with_number)));
|
builder.add_classifier(Classifier::new("Number", Arc::new(&starts_with_number)));
|
||||||
|
|
||||||
@ -213,7 +213,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn stgb() {
|
fn stgb() {
|
||||||
let law_id = "10002296";
|
let law_id = "10002296";
|
||||||
let mut builder = LawBuilder::new("law");
|
let mut builder = LawBuilder::new();
|
||||||
builder.add_classifier(Classifier::new("Teil", Arc::new(&contains)).root());
|
builder.add_classifier(Classifier::new("Teil", Arc::new(&contains)).root());
|
||||||
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)));
|
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)));
|
||||||
|
|
||||||
@ -259,7 +259,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn kschg() {
|
fn kschg() {
|
||||||
let law_id = "10002462";
|
let law_id = "10002462";
|
||||||
let mut builder = LawBuilder::new("law");
|
let mut builder = LawBuilder::new();
|
||||||
builder.add_classifier(Classifier::new("Hauptstück", Arc::new(&contains)).root());
|
builder.add_classifier(Classifier::new("Hauptstück", Arc::new(&contains)).root());
|
||||||
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)));
|
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)));
|
||||||
|
|
||||||
@ -273,7 +273,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn vvg() {
|
fn vvg() {
|
||||||
let law_id = "20011654";
|
let law_id = "20011654";
|
||||||
let mut builder = LawBuilder::new("law");
|
let mut builder = LawBuilder::new();
|
||||||
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)).root());
|
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)).root());
|
||||||
|
|
||||||
let parser = Parser::new();
|
let parser = Parser::new();
|
||||||
@ -283,7 +283,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn urhg() {
|
fn urhg() {
|
||||||
let law_id = "10001848";
|
let law_id = "10001848";
|
||||||
let mut builder = LawBuilder::new("law");
|
let mut builder = LawBuilder::new();
|
||||||
builder.add_classifier(Classifier::new("Hauptstück", Arc::new(&contains)).root());
|
builder.add_classifier(Classifier::new("Hauptstück", Arc::new(&contains)).root());
|
||||||
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)));
|
builder.add_classifier(Classifier::new("Abschnitt", Arc::new(&contains)));
|
||||||
builder.add_classifier(Classifier::new("Number", Arc::new(&starts_with_number)));
|
builder.add_classifier(Classifier::new("Number", Arc::new(&starts_with_number)));
|
||||||
|
Loading…
Reference in New Issue
Block a user