deal with html non-breaking-whitespace in single location
Some checks failed
CI/CD Pipeline / test (push) Failing after 1m30s

This commit is contained in:
philipp 2024-02-15 11:44:58 +01:00
parent 91fc2da6f7
commit 2f631abf3e
2 changed files with 20 additions and 25 deletions

View File

@ -39,7 +39,7 @@ fn main() {
let (_, mut builder, parser) = Config::load(&args.config).unwrap(); let (_, mut builder, parser) = Config::load(&args.config).unwrap();
builder.add_classifier(Classifier::new("always-true", Arc::new(always_true))); builder.add_classifier(Classifier::new("always-true", Arc::new(always_true)));
builder.new_header("initial"); builder.new_header("initial");
parser.parse(&par_url, &mut builder).unwrap(); parser.parse(par_url, &mut builder).unwrap();
let law: Law = builder.into(); let law: Law = builder.into();
law.to_md(); law.to_md();
} else { } else {

View File

@ -140,12 +140,9 @@ impl Abschnitt {
if let Some(child) = c.peek() { if let Some(child) = c.peek() {
if Liste::test(child) { if Liste::test(child) {
let liste = Liste::parse(c.next().unwrap()); let liste = Liste::parse(c.next().unwrap());
let mut to_add = vec![ let mut to_add = vec![Content::Text(absatz.content), liste.get_content()];
Content::Text(absatz.content.replace('\u{a0}', " ")),
liste.get_content(),
];
if let Some(subchild) = c.peek() { if let Some(subchild) = c.peek() {
if Absatz::test_with_typ(&subchild, "satz") { if Absatz::test_with_typ(subchild, "satz") {
// After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list // After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list
// (e.g. 85 StGB) // (e.g. 85 StGB)
to_add.push(Content::Text(Absatz::parse(c.next().unwrap()).content)); to_add.push(Content::Text(Absatz::parse(c.next().unwrap()).content));
@ -159,29 +156,29 @@ impl Abschnitt {
if Absatz::test_with_typ(child, "erltext") { if Absatz::test_with_typ(child, "erltext") {
let after_absatz = Absatz::parse(c.next().unwrap()); let after_absatz = Absatz::parse(c.next().unwrap());
absatze.push(Content::List(vec![ absatze.push(Content::List(vec![
Content::Text(absatz.content.replace('\u{a0}', " ")), Content::Text(absatz.content),
Content::List(table.get_list()), Content::List(table.get_list()),
Content::Text(after_absatz.content), Content::Text(after_absatz.content),
])); ]));
} else { } else {
absatze.push(Content::List(vec![ absatze.push(Content::List(vec![
Content::Text(absatz.content.replace('\u{a0}', " ")), Content::Text(absatz.content),
Content::List(table.get_list()), Content::List(table.get_list()),
])); ]));
} }
} }
} else if Absatz::test_with_typ(&child, "satz") { } else if Absatz::test_with_typ(child, "satz") {
// After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list // After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list
// (e.g. 1209 ABGB) // (e.g. 1209 ABGB)
absatze.push(Content::List(vec![ absatze.push(Content::List(vec![
Content::Text(absatz.content.replace('\u{a0}', " ").clone()), Content::Text(absatz.content.clone()),
Content::Text(Absatz::parse(c.next().unwrap()).content), Content::Text(Absatz::parse(c.next().unwrap()).content),
])); ]));
} else { } else {
absatze.push(Content::Text(absatz.content.replace('\u{a0}', " ").clone())); absatze.push(Content::Text(absatz.content.clone()));
} }
} else { } else {
absatze.push(Content::Text(absatz.content.replace('\u{a0}', " ").clone())); absatze.push(Content::Text(absatz.content.clone()));
} }
//There can be as many 'Absätze' as our lovely lawsetter wants //There can be as many 'Absätze' as our lovely lawsetter wants
@ -193,13 +190,10 @@ impl Abschnitt {
if let Some(child) = c.peek() { if let Some(child) = c.peek() {
if Liste::test(child) { if Liste::test(child) {
let liste = Liste::parse(c.next().unwrap()); let liste = Liste::parse(c.next().unwrap());
let mut to_add = vec![ let mut to_add = vec![Content::Text(abs.content), liste.get_content()];
Content::Text(abs.content.replace('\u{a0}', " ")),
liste.get_content(),
];
println!("{to_add:#?}"); println!("{to_add:#?}");
if let Some(subchild) = c.peek() { if let Some(subchild) = c.peek() {
if Absatz::test_with_typ(&subchild, "satz") { if Absatz::test_with_typ(subchild, "satz") {
// After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list // After a 'liste' there can be a '<absatz typ="satz"' which should be part of the list
// (e.g. 85 StGB) // (e.g. 85 StGB)
to_add to_add
@ -208,9 +202,9 @@ impl Abschnitt {
} }
absatze.push(Content::List(to_add)); absatze.push(Content::List(to_add));
} else { } else {
let mut content = abs.content.replace('\u{a0}', " "); let mut content = abs.content;
while let Some(subchild) = c.peek() { while let Some(subchild) = c.peek() {
if Absatz::test_with_typ(&subchild, "erltext") { if Absatz::test_with_typ(subchild, "erltext") {
content += &Absatz::parse(c.next().unwrap()).content; content += &Absatz::parse(c.next().unwrap()).content;
} else { } else {
break; break;
@ -219,7 +213,7 @@ impl Abschnitt {
absatze.push(Content::Text(content)); absatze.push(Content::Text(content));
} }
} else { } else {
absatze.push(Content::Text(abs.content.replace('\u{a0}', " "))); absatze.push(Content::Text(abs.content));
} }
continue; continue;
} }
@ -322,9 +316,10 @@ impl Ziffernliste {
let mut elems = Vec::new(); let mut elems = Vec::new();
for elem in &self.listelems { for elem in &self.listelems {
elems.push(Content::Text( elems.push(Content::Text(format!(
format!("{} {}", elem.symbol.content, elem.text).replace('\u{a0}', " "), "{} {}",
)); elem.symbol.content, elem.text
)));
} }
Content::List(elems) Content::List(elems)
@ -394,7 +389,7 @@ impl Table {
txt.push_str(&format!("{} ", td.absatz.content)); txt.push_str(&format!("{} ", td.absatz.content));
} }
ret.push(Content::Text(format!("- {txt}",).replace('\u{a0}', " "))); ret.push(Content::Text(format!("- {txt}",)));
} }
ret ret
@ -475,7 +470,7 @@ impl AbsatzAbs {
let gldsym = match c.peek() { let gldsym = match c.peek() {
Some(child) => { Some(child) => {
if Leaf::test(child, "gldsym") { if Leaf::test(child, "gldsym") {
Some(Leaf::parse(c.next().unwrap(), "gldsym").replace('\u{a0}', " ")) Some(Leaf::parse(c.next().unwrap(), "gldsym"))
} else { } else {
None None
} }