add -p argument to debug single paragraph, fix issues, sorry for the large commit...
Some checks failed
CI/CD Pipeline / test (push) Failing after 1m31s
Some checks failed
CI/CD Pipeline / test (push) Failing after 1m31s
This commit is contained in:
@ -5,6 +5,10 @@ pub fn contains_without_unter(classifier_name: &str, instance_name: &str) -> boo
|
||||
&& !instance_name.to_lowercase().contains("unter")
|
||||
}
|
||||
|
||||
pub fn always_true(_: &str, _: &str) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
pub fn contains(classifier_name: &str, instance_name: &str) -> bool {
|
||||
instance_name
|
||||
.to_lowercase()
|
||||
|
27
src/main.rs
27
src/main.rs
@ -1,5 +1,11 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use clap::{command, Parser};
|
||||
use risp::{law::Law, misc::clear_cache};
|
||||
use risp::{
|
||||
config::Config,
|
||||
law::{responsible::always_true, Classifier, Law},
|
||||
misc::clear_cache,
|
||||
};
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(version, about, long_about = None)]
|
||||
@ -8,7 +14,13 @@ struct Args {
|
||||
#[arg(short, long)]
|
||||
config: String,
|
||||
|
||||
/// Parses a single paragraph (for debugging, e.g. https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR40217849/NOR40217849.xml)
|
||||
/// Conflicts with `config` (either parse full law XOR single paragraph)
|
||||
#[arg(short, long)]
|
||||
par_url: Option<String>,
|
||||
|
||||
/// Clears the cache (downloaded laws + paragraphs)
|
||||
#[arg(long)]
|
||||
clear_cache: bool,
|
||||
}
|
||||
|
||||
@ -23,7 +35,16 @@ fn main() {
|
||||
}
|
||||
}
|
||||
|
||||
let law = Law::from_config(&args.config).unwrap();
|
||||
if let Some(par_url) = &args.par_url {
|
||||
let (_, mut builder, parser) = Config::load(&args.config).unwrap();
|
||||
builder.add_classifier(Classifier::new("always-true", Arc::new(always_true)));
|
||||
builder.new_header("initial");
|
||||
parser.parse(&par_url, &mut builder).unwrap();
|
||||
let law: Law = builder.into();
|
||||
law.to_md();
|
||||
} else {
|
||||
let law = Law::from_config(&args.config).unwrap();
|
||||
|
||||
law.to_md();
|
||||
law.to_md();
|
||||
}
|
||||
}
|
||||
|
@ -122,11 +122,16 @@ mod tests {
|
||||
|
||||
let actual = &builder.history;
|
||||
|
||||
let expected = fs::read_to_string(&expected_path)
|
||||
.unwrap_or_else(|_| panic!("Could not read file {expected_path}."));
|
||||
let expected = expected.trim().split('\n').collect::<Vec<&str>>();
|
||||
|
||||
assert_eq!(actual, &expected);
|
||||
match fs::read_to_string(&expected_path) {
|
||||
Ok(expected) => {
|
||||
let e = expected.trim().split('\n').collect::<Vec<&str>>();
|
||||
assert_eq!(actual, &e);
|
||||
}
|
||||
Err(_) => {
|
||||
let to_write = actual.join("\n");
|
||||
fs::write(expected_path, to_write).expect("Unable to write file");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -140,10 +140,18 @@ impl Abschnitt {
|
||||
if let Some(child) = c.peek() {
|
||||
if Liste::test(child) {
|
||||
let liste = Liste::parse(c.next().unwrap());
|
||||
absatze.push(Content::List(vec![
|
||||
let mut to_add = vec![
|
||||
Content::Text(absatz.content.replace('\u{a0}', " ")),
|
||||
liste.get_content(),
|
||||
]));
|
||||
];
|
||||
if let Some(subchild) = c.peek() {
|
||||
if Absatz::test_with_typ(&subchild, "satz") {
|
||||
// After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list
|
||||
// (e.g. 85 StGB)
|
||||
to_add.push(Content::Text(Absatz::parse(c.next().unwrap()).content));
|
||||
}
|
||||
}
|
||||
absatze.push(Content::List(to_add));
|
||||
} else if Table::test(child) {
|
||||
// If there's a "table" after an "absatz", the "table" should be part of the "absatz"
|
||||
let table = Table::parse(c.next().unwrap());
|
||||
@ -162,6 +170,13 @@ impl Abschnitt {
|
||||
]));
|
||||
}
|
||||
}
|
||||
} else if Absatz::test_with_typ(&child, "satz") {
|
||||
// After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list
|
||||
// (e.g. 1209 ABGB)
|
||||
absatze.push(Content::List(vec![
|
||||
Content::Text(absatz.content.replace('\u{a0}', " ").clone()),
|
||||
Content::Text(Absatz::parse(c.next().unwrap()).content),
|
||||
]));
|
||||
} else {
|
||||
absatze.push(Content::Text(absatz.content.replace('\u{a0}', " ").clone()));
|
||||
}
|
||||
@ -178,12 +193,30 @@ impl Abschnitt {
|
||||
if let Some(child) = c.peek() {
|
||||
if Liste::test(child) {
|
||||
let liste = Liste::parse(c.next().unwrap());
|
||||
absatze.push(Content::List(vec![
|
||||
let mut to_add = vec![
|
||||
Content::Text(abs.content.replace('\u{a0}', " ")),
|
||||
liste.get_content(),
|
||||
]));
|
||||
];
|
||||
println!("{to_add:#?}");
|
||||
if let Some(subchild) = c.peek() {
|
||||
if Absatz::test_with_typ(&subchild, "satz") {
|
||||
// After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list
|
||||
// (e.g. 85 StGB)
|
||||
to_add
|
||||
.push(Content::Text(Absatz::parse(c.next().unwrap()).content));
|
||||
}
|
||||
}
|
||||
absatze.push(Content::List(to_add));
|
||||
} else {
|
||||
absatze.push(Content::Text(abs.content.replace('\u{a0}', " ")));
|
||||
let mut content = abs.content.replace('\u{a0}', " ");
|
||||
while let Some(subchild) = c.peek() {
|
||||
if Absatz::test_with_typ(&subchild, "erltext") {
|
||||
content += &Absatz::parse(c.next().unwrap()).content;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
absatze.push(Content::Text(content));
|
||||
}
|
||||
} else {
|
||||
absatze.push(Content::Text(abs.content.replace('\u{a0}', " ")));
|
||||
@ -209,7 +242,7 @@ impl Abschnitt {
|
||||
c.next();
|
||||
continue;
|
||||
}
|
||||
if Absatz::test(child) {
|
||||
if Absatz::test_with_typ(child, "erltext") {
|
||||
c.next();
|
||||
continue;
|
||||
}
|
||||
@ -409,11 +442,7 @@ impl Liste {
|
||||
if Ziffernliste::test(child) {
|
||||
content.push(Ziffernliste::parse(c.next().unwrap()).get_content());
|
||||
} else if Schlussteil::test(child) {
|
||||
content.push(Content::Text(
|
||||
Schlussteil::parse(c.next().unwrap())
|
||||
.content
|
||||
.replace('\u{a0}', " "),
|
||||
));
|
||||
content.push(Content::Text(Schlussteil::parse(c.next().unwrap()).content));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
@ -439,8 +468,7 @@ impl AbsatzAbs {
|
||||
n.tag_name().name() == "absatz" && n.attribute("typ").unwrap() == "abs"
|
||||
}
|
||||
pub(crate) fn parse(n: Node) -> Self {
|
||||
assert!(n.tag_name().name() == "absatz");
|
||||
assert_eq!(n.attribute("typ").unwrap(), "abs");
|
||||
assert!(Self::test(&n));
|
||||
|
||||
let mut c = n.children().peekable();
|
||||
|
||||
|
Reference in New Issue
Block a user