deal with html non-breaking-whitespace in single location
Some checks failed
CI/CD Pipeline / test (push) Failing after 1m30s

This commit is contained in:
philipp 2024-02-15 11:44:58 +01:00
parent 91fc2da6f7
commit 2f631abf3e
2 changed files with 20 additions and 25 deletions

View File

@ -39,7 +39,7 @@ fn main() {
let (_, mut builder, parser) = Config::load(&args.config).unwrap();
builder.add_classifier(Classifier::new("always-true", Arc::new(always_true)));
builder.new_header("initial");
parser.parse(&par_url, &mut builder).unwrap();
parser.parse(par_url, &mut builder).unwrap();
let law: Law = builder.into();
law.to_md();
} else {

View File

@ -140,12 +140,9 @@ impl Abschnitt {
if let Some(child) = c.peek() {
if Liste::test(child) {
let liste = Liste::parse(c.next().unwrap());
let mut to_add = vec![
Content::Text(absatz.content.replace('\u{a0}', " ")),
liste.get_content(),
];
let mut to_add = vec![Content::Text(absatz.content), liste.get_content()];
if let Some(subchild) = c.peek() {
if Absatz::test_with_typ(&subchild, "satz") {
if Absatz::test_with_typ(subchild, "satz") {
// After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list
// (e.g. 85 StGB)
to_add.push(Content::Text(Absatz::parse(c.next().unwrap()).content));
@ -159,29 +156,29 @@ impl Abschnitt {
if Absatz::test_with_typ(child, "erltext") {
let after_absatz = Absatz::parse(c.next().unwrap());
absatze.push(Content::List(vec![
Content::Text(absatz.content.replace('\u{a0}', " ")),
Content::Text(absatz.content),
Content::List(table.get_list()),
Content::Text(after_absatz.content),
]));
} else {
absatze.push(Content::List(vec![
Content::Text(absatz.content.replace('\u{a0}', " ")),
Content::Text(absatz.content),
Content::List(table.get_list()),
]));
}
}
} else if Absatz::test_with_typ(&child, "satz") {
} else if Absatz::test_with_typ(child, "satz") {
// After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list
// (e.g. 1209 ABGB)
absatze.push(Content::List(vec![
Content::Text(absatz.content.replace('\u{a0}', " ").clone()),
Content::Text(absatz.content.clone()),
Content::Text(Absatz::parse(c.next().unwrap()).content),
]));
} else {
absatze.push(Content::Text(absatz.content.replace('\u{a0}', " ").clone()));
absatze.push(Content::Text(absatz.content.clone()));
}
} else {
absatze.push(Content::Text(absatz.content.replace('\u{a0}', " ").clone()));
absatze.push(Content::Text(absatz.content.clone()));
}
//There can be as many 'Absätze' as our lovely lawsetter wants
@ -193,13 +190,10 @@ impl Abschnitt {
if let Some(child) = c.peek() {
if Liste::test(child) {
let liste = Liste::parse(c.next().unwrap());
let mut to_add = vec![
Content::Text(abs.content.replace('\u{a0}', " ")),
liste.get_content(),
];
let mut to_add = vec![Content::Text(abs.content), liste.get_content()];
println!("{to_add:#?}");
if let Some(subchild) = c.peek() {
if Absatz::test_with_typ(&subchild, "satz") {
if Absatz::test_with_typ(subchild, "satz") {
// After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list
// (e.g. 85 StGB)
to_add
@ -208,9 +202,9 @@ impl Abschnitt {
}
absatze.push(Content::List(to_add));
} else {
let mut content = abs.content.replace('\u{a0}', " ");
let mut content = abs.content;
while let Some(subchild) = c.peek() {
if Absatz::test_with_typ(&subchild, "erltext") {
if Absatz::test_with_typ(subchild, "erltext") {
content += &Absatz::parse(c.next().unwrap()).content;
} else {
break;
@ -219,7 +213,7 @@ impl Abschnitt {
absatze.push(Content::Text(content));
}
} else {
absatze.push(Content::Text(abs.content.replace('\u{a0}', " ")));
absatze.push(Content::Text(abs.content));
}
continue;
}
@ -322,9 +316,10 @@ impl Ziffernliste {
let mut elems = Vec::new();
for elem in &self.listelems {
elems.push(Content::Text(
format!("{} {}", elem.symbol.content, elem.text).replace('\u{a0}', " "),
));
elems.push(Content::Text(format!(
"{} {}",
elem.symbol.content, elem.text
)));
}
Content::List(elems)
@ -394,7 +389,7 @@ impl Table {
txt.push_str(&format!("{} ", td.absatz.content));
}
ret.push(Content::Text(format!("- {txt}",).replace('\u{a0}', " ")));
ret.push(Content::Text(format!("- {txt}",)));
}
ret
@ -475,7 +470,7 @@ impl AbsatzAbs {
let gldsym = match c.peek() {
Some(child) => {
if Leaf::test(child, "gldsym") {
Some(Leaf::parse(c.next().unwrap(), "gldsym").replace('\u{a0}', " "))
Some(Leaf::parse(c.next().unwrap(), "gldsym"))
} else {
None
}