From 33e10185c01e388f616ae3c42b7e8662c0c4bf01 Mon Sep 17 00:00:00 2001
From: philipp <philipp@hofer.link>
Date: Fri, 16 Feb 2024 10:29:30 +0100
Subject: [PATCH] clean code, parse footer metadata

---
 src/paragraph/parser/abschnitt.rs | 36 +++++++++++++++++++------------
 src/paragraph/parser/liste.rs     | 25 ---------------------
 src/paragraph/parser/mod.rs       | 18 ++++++++--------
 3 files changed, 31 insertions(+), 48 deletions(-)
diff --git a/src/paragraph/parser/abschnitt.rs b/src/paragraph/parser/abschnitt.rs
index f270a56..2a63cd5 100644
--- a/src/paragraph/parser/abschnitt.rs
+++ b/src/paragraph/parser/abschnitt.rs
@@ -54,12 +54,12 @@ impl Abschnitt {
             builder.new_par(par_id, absatze[0].clone());
         } else {
             let mut contents = Vec::new();
-            for a in &absatze {
-                contents.push(a.clone());
-            }
+            contents.extend(absatze.iter().cloned());
             builder.new_par(par_id, Content::Item(contents));
         }
 
+        ret.handle_metadata(&mut c, builder);
+
         // Skip all UeberschriftTitle and Absatz
         while let Some(child) = c.peek() {
             if Ueberschrift::test(child, "titel") {
@@ -73,29 +73,38 @@ impl Abschnitt {
             break;
         }
 
-        // assert_eq!(c.next(), None);
+        assert_eq!(c.next(), None);
 
         ret.cont = true;
         ret
     }
 
-    // There are paragraph-specific meta-data at the top of each xml file. We parse those. When we
-    // encounter the title "Text" the real content starts, we stop parsing meta data.
+    // There are paragraph-specific meta-data at the top and bottom of each xml file. We parse
+    // those. When we encounter the title "Text" the real content starts, we stop parsing meta
+    // data.
     fn handle_metadata(&mut self, c: &mut Peekable<Children>, builder: &mut LawBuilder) {
-        loop {
+        while c.peek().is_some() {
             let key = Ueberschrift::parse(c.next().unwrap(), "titel").content;
+            println!("{key}");
 
             // We are done with meta-data parsing
             if key == "Text" {
                 break;
             }
 
-            let absatz = Absatz::parse(
-                c.next()
-                    .expect("Expected absatz after title in par headers"),
-            );
-
-            let value = absatz.content;
+            let mut value = String::new();
+            while let Some(child) = c.peek() {
+                if Absatz::test_with_typ(child, "erltext") {
+                    let absatz = Absatz::parse(c.next().unwrap());
+                    value.push_str(&format!("{}\n", absatz.content));
+                } else {
+                    break;
+                }
+            }
+            value = value.trim().into();
+            if value == "" {
+                panic!("Expected at least on erltext-absatz after title meta-data");
+            }
 
             // We want ot use this information in our markdown output.
             // TODO: Use all metadata, instead of this specific call
@@ -104,7 +113,6 @@ impl Abschnitt {
             }
 
             self.metadata.insert(key, value);
-            continue;
         }
     }
 
diff --git a/src/paragraph/parser/liste.rs b/src/paragraph/parser/liste.rs
index e8b632f..412a61d 100644
--- a/src/paragraph/parser/liste.rs
+++ b/src/paragraph/parser/liste.rs
@@ -46,31 +46,6 @@ impl Liste {
         Self { content }
     }
 
-    pub(crate) fn parse(n: Node) -> Self {
-        Expect::from(&n).tag("liste");
-
-        let mut content = Vec::new();
-
-        let mut c = n.children().peekable();
-
-        while let Some(child) = c.peek() {
-            if Ziffernliste::test(child) {
-                content.push(Ziffernliste::parse(c.next().unwrap()).get_content());
-            } else if Schlussteil::test(child) {
-                // 162 Schifffahrtsgesetz show use that a 'schlussteil' can be at the start of a list
-                content.push(Content::Text(Schlussteil::parse(c.next().unwrap()).content));
-            } else if Absatz::test_with_typ(child, "satz") {
-                content.push(Content::Text(Absatz::parse(c.next().unwrap()).content));
-            } else {
-                break;
-            }
-        }
-
-        assert_eq!(c.next(), None);
-
-        Self { content }
-    }
-
     pub(crate) fn get_content(&self) -> Content {
         Content::List(self.content.clone())
     }
diff --git a/src/paragraph/parser/mod.rs b/src/paragraph/parser/mod.rs
index a7869af..806c474 100644
--- a/src/paragraph/parser/mod.rs
+++ b/src/paragraph/parser/mod.rs
@@ -303,17 +303,17 @@ impl Absatz {
     pub(crate) fn parse(n: Node) -> Self {
         Expect::from(&n).tag("absatz");
 
-        if let Some(text) = n.text() {
-            Self {
-                content: text.into(),
-                typ: n.attribute("typ").unwrap().into(),
-            }
-        } else {
-            Self {
-                content: String::new(),
-                typ: n.attribute("typ").unwrap().into(),
+        let typ = n.attribute("typ").unwrap().into();
+
+        let mut content = String::new();
+        // Get text from this element + all direct childs
+        for c in n.children() {
+            if let Some(text) = c.text() {
+                content.push_str(text);
             }
         }
+
+        Self { content, typ }
     }
 }