add -p argument to debug single paragraph, fix issues, sorry for the large commit...
Some checks failed
CI/CD Pipeline / test (push) Failing after 1m31s

This commit is contained in:
2024-02-15 11:40:02 +01:00
parent ddb2ebe5b7
commit 91fc2da6f7
11 changed files with 93 additions and 35 deletions

View File

@ -5,6 +5,10 @@ pub fn contains_without_unter(classifier_name: &str, instance_name: &str) -> boo
&& !instance_name.to_lowercase().contains("unter")
}
pub fn always_true(_: &str, _: &str) -> bool {
true
}
pub fn contains(classifier_name: &str, instance_name: &str) -> bool {
instance_name
.to_lowercase()

View File

@ -1,5 +1,11 @@
use std::sync::Arc;
use clap::{command, Parser};
use risp::{law::Law, misc::clear_cache};
use risp::{
config::Config,
law::{responsible::always_true, Classifier, Law},
misc::clear_cache,
};
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
@ -8,7 +14,13 @@ struct Args {
#[arg(short, long)]
config: String,
/// Parses a single paragraph (for debugging, e.g. https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR40217849/NOR40217849.xml)
/// Conflicts with `config` (either parse full law XOR single paragraph)
#[arg(short, long)]
par_url: Option<String>,
/// Clears the cache (downloaded laws + paragraphs)
#[arg(long)]
clear_cache: bool,
}
@ -23,7 +35,16 @@ fn main() {
}
}
let law = Law::from_config(&args.config).unwrap();
if let Some(par_url) = &args.par_url {
let (_, mut builder, parser) = Config::load(&args.config).unwrap();
builder.add_classifier(Classifier::new("always-true", Arc::new(always_true)));
builder.new_header("initial");
parser.parse(&par_url, &mut builder).unwrap();
let law: Law = builder.into();
law.to_md();
} else {
let law = Law::from_config(&args.config).unwrap();
law.to_md();
law.to_md();
}
}

View File

@ -122,11 +122,16 @@ mod tests {
let actual = &builder.history;
let expected = fs::read_to_string(&expected_path)
.unwrap_or_else(|_| panic!("Could not read file {expected_path}."));
let expected = expected.trim().split('\n').collect::<Vec<&str>>();
assert_eq!(actual, &expected);
match fs::read_to_string(&expected_path) {
Ok(expected) => {
let e = expected.trim().split('\n').collect::<Vec<&str>>();
assert_eq!(actual, &e);
}
Err(_) => {
let to_write = actual.join("\n");
fs::write(expected_path, to_write).expect("Unable to write file");
}
}
}
}
}

View File

@ -140,10 +140,18 @@ impl Abschnitt {
if let Some(child) = c.peek() {
if Liste::test(child) {
let liste = Liste::parse(c.next().unwrap());
absatze.push(Content::List(vec![
let mut to_add = vec![
Content::Text(absatz.content.replace('\u{a0}', " ")),
liste.get_content(),
]));
];
if let Some(subchild) = c.peek() {
if Absatz::test_with_typ(&subchild, "satz") {
// After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list
// (e.g. 85 StGB)
to_add.push(Content::Text(Absatz::parse(c.next().unwrap()).content));
}
}
absatze.push(Content::List(to_add));
} else if Table::test(child) {
// If there's a "table" after an "absatz", the "table" should be part of the "absatz"
let table = Table::parse(c.next().unwrap());
@ -162,6 +170,13 @@ impl Abschnitt {
]));
}
}
} else if Absatz::test_with_typ(&child, "satz") {
// After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list
// (e.g. 1209 ABGB)
absatze.push(Content::List(vec![
Content::Text(absatz.content.replace('\u{a0}', " ").clone()),
Content::Text(Absatz::parse(c.next().unwrap()).content),
]));
} else {
absatze.push(Content::Text(absatz.content.replace('\u{a0}', " ").clone()));
}
@ -178,12 +193,30 @@ impl Abschnitt {
if let Some(child) = c.peek() {
if Liste::test(child) {
let liste = Liste::parse(c.next().unwrap());
absatze.push(Content::List(vec![
let mut to_add = vec![
Content::Text(abs.content.replace('\u{a0}', " ")),
liste.get_content(),
]));
];
println!("{to_add:#?}");
if let Some(subchild) = c.peek() {
if Absatz::test_with_typ(&subchild, "satz") {
// After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list
// (e.g. 85 StGB)
to_add
.push(Content::Text(Absatz::parse(c.next().unwrap()).content));
}
}
absatze.push(Content::List(to_add));
} else {
absatze.push(Content::Text(abs.content.replace('\u{a0}', " ")));
let mut content = abs.content.replace('\u{a0}', " ");
while let Some(subchild) = c.peek() {
if Absatz::test_with_typ(&subchild, "erltext") {
content += &Absatz::parse(c.next().unwrap()).content;
} else {
break;
}
}
absatze.push(Content::Text(content));
}
} else {
absatze.push(Content::Text(abs.content.replace('\u{a0}', " ")));
@ -209,7 +242,7 @@ impl Abschnitt {
c.next();
continue;
}
if Absatz::test(child) {
if Absatz::test_with_typ(child, "erltext") {
c.next();
continue;
}
@ -409,11 +442,7 @@ impl Liste {
if Ziffernliste::test(child) {
content.push(Ziffernliste::parse(c.next().unwrap()).get_content());
} else if Schlussteil::test(child) {
content.push(Content::Text(
Schlussteil::parse(c.next().unwrap())
.content
.replace('\u{a0}', " "),
));
content.push(Content::Text(Schlussteil::parse(c.next().unwrap()).content));
} else {
break;
}
@ -439,8 +468,7 @@ impl AbsatzAbs {
n.tag_name().name() == "absatz" && n.attribute("typ").unwrap() == "abs"
}
pub(crate) fn parse(n: Node) -> Self {
assert!(n.tag_name().name() == "absatz");
assert_eq!(n.attribute("typ").unwrap(), "abs");
assert!(Self::test(&n));
let mut c = n.children().peekable();