From a78ba95775669ec43e2712a0a8d56d33266f2663 Mon Sep 17 00:00:00 2001 From: philipp Date: Sat, 4 Nov 2023 18:41:17 +0100 Subject: [PATCH] push --- src/law.rs | 32 +++++---- src/main.rs | 3 +- src/overview/mod.rs | 4 +- src/par/mod.rs | 1 + src/par/parser.rs | 168 ++++++++++++++++++++++++++++++++++++++++---- 5 files changed, 180 insertions(+), 28 deletions(-) diff --git a/src/law.rs b/src/law.rs index 2392c67..6838c6c 100644 --- a/src/law.rs +++ b/src/law.rs @@ -76,11 +76,12 @@ impl LawBuilder { last_header_index: None, }; - overview::parse(law_id.unwrap(), &mut builder); + overview::parse(law_id.unwrap(), &mut builder).unwrap(); } /// Sets a new header. pub(crate) fn new_header(&mut self, name: &str) { + println!("new_header={name}"); let classifier_index = self .classifiers .iter() @@ -97,6 +98,7 @@ impl LawBuilder { /// Sets a new description for the last classifier. pub(crate) fn new_desc(&mut self, desc: &str) { + println!("new_desc={desc}"); if let Some(index) = self.last_header_index { self.classifiers[index].set_desc(desc); } else { @@ -105,9 +107,11 @@ impl LawBuilder { } /// Adds a new paragraph. - pub(crate) fn new_par(&mut self, par: Content) { + pub(crate) fn new_par(&mut self, par: String, content: Content) { + println!("new_par=par:{par};content:{content:#?}"); if let Some(class) = self.classifiers.last_mut() { - class.add_par(par); + let section = Section { symb: par, content }; + class.add_section(section); } else { panic!("Expected at least one classifier"); } @@ -115,14 +119,16 @@ impl LawBuilder { /// Next paragraph has a header, store its name. pub(crate) fn new_next_para_header(&mut self, header: &str) { + println!("new_next_para_header={header}"); self.next_para_header = Some(header.into()); } } +#[derive(Debug, PartialEq, Clone)] pub(crate) struct Section { symb: String, // §"1", §"2", ... content: Content, - header: Option
, + //header: Option
, } #[derive(Clone)] @@ -150,7 +156,7 @@ impl Header { pub(crate) struct ClassifierInstance { name: String, desc: Option, - content: Vec, + sections: Vec
, } impl ClassifierInstance { @@ -158,7 +164,7 @@ impl ClassifierInstance { Self { name: name.into(), desc: None, - content: Vec::new(), + sections: Vec::new(), } } @@ -166,8 +172,8 @@ impl ClassifierInstance { self.desc = Some(desc.into()); } - fn add_par(&mut self, content: Content) { - self.content.push(content); + fn add_section(&mut self, section: Section) { + self.sections.push(section); } } @@ -199,9 +205,6 @@ impl Classifier { self.instances.push(name); } - fn add_par(&mut self, content: Content) { - self.instances.last_mut().unwrap().add_par(content); - } fn set_desc(&mut self, desc: &str) { self.instances.last_mut().unwrap().set_desc(desc); } @@ -209,11 +212,16 @@ impl Classifier { fn used_for(&self, name: &str) -> bool { name.contains(&self.name) } + + fn add_section(&mut self, section: Section) { + self.instances.last_mut().unwrap().add_section(section); + } } #[derive(Clone, Debug, PartialEq)] pub(crate) enum Content { - Text(String), //This is my direct law text + Text(String), //This is my direct law text + TextWithList(String, Vec>), Item(Vec>), //(1) This is general law. (2) This is more specific law List(Vec>), //1. my first item } diff --git a/src/main.rs b/src/main.rs index 54071d1..0d247ea 100644 --- a/src/main.rs +++ b/src/main.rs @@ -41,6 +41,5 @@ impl From for Error { } fn main() { - let mut law = LawBuilder::new("UrhG"); - //overview::parse(10001899).unwrap(); //TEG + LawBuilder::new("UrhG"); } diff --git a/src/overview/mod.rs b/src/overview/mod.rs index 58619f1..0c5a119 100644 --- a/src/overview/mod.rs +++ b/src/overview/mod.rs @@ -43,8 +43,10 @@ pub(crate) fn parse(overview_id: usize, builder: &mut LawBuilder) -> Result<(), let wrapper: Wrapper = serde_json::from_str(&json)?; for par in wrapper.ogd_search_result.get_par().into_iter().skip(1) { + // skip bc. first one is + // always not relevant for + // me :-) crate::par::parse(&par, builder).unwrap(); - break; } Ok(()) diff --git a/src/par/mod.rs b/src/par/mod.rs index b6b4079..9b3a3b6 100644 --- a/src/par/mod.rs +++ b/src/par/mod.rs @@ -9,6 +9,7 @@ fn fetch_page(url: &str) -> Result { pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> { println!("{url}"); let xml = fetch_page(url)?; + let xml = xml.replace("", "-"); // used e.g. in §11 Abs. 3 UrhG let risdok = Risdok::from_str(&xml, builder)?; println!("{builder:#?}"); diff --git a/src/par/parser.rs b/src/par/parser.rs index 69c1cb5..4e62335 100644 --- a/src/par/parser.rs +++ b/src/par/parser.rs @@ -144,12 +144,40 @@ impl Abschnitt { } } + // TODO: Continue here: We want to create a `Section`. + // + // We have 2 tasks + // 1) Get paragraph id + // 2) Get content + let mut absatze = Vec::new(); + let absatz = AbsatzAbs::parse(c.next().expect("We need at least one 'Absatz'")); + let par_id = absatz + .gldsym + .clone() + .expect("First 'Absatz' needs to have § id"); + absatze.push(absatz); + // If there's a "liste" after an "absatz", the "liste" should be part of the "absatz" + if let Some(child) = c.peek() { + if Liste::test(child) { + let liste = Liste::parse(c.next().unwrap()); + //TODO do something with list + } + } + + //There can be as many 'Absätze' as our lovely lawsetter wants loop { match c.peek() { Some(child) => { if AbsatzAbs::test(child) { absatze.push(AbsatzAbs::parse(c.next().unwrap())); + // If there's a "liste" after an "absatz", the "liste" should be part of the "absatz" + if let Some(child) = c.peek() { + if Liste::test(child) { + let liste = Liste::parse(c.next().unwrap()); + //TODO do something with list + } + } continue; } break; @@ -159,24 +187,38 @@ impl Abschnitt { } if absatze.len() == 1 { - builder.new_par(Content::Text(format!( - "{} {}", - absatze[0].gldsym.clone().unwrap(), - absatze[0].content - ))); + builder.new_par(par_id, Content::Text(absatze[0].content.clone())); } else { - let mut content = Vec::new(); + let mut contents = Vec::new(); for a in &absatze { - let mut txt = String::new(); - if let Some(sym) = &a.gldsym { - txt.push_str(&format!("{sym} ")); - } - txt.push_str(&a.content); - content.push(Box::new(Content::Text(txt))); + contents.push(Box::new(Content::Text(a.content.clone()))); } - builder.new_par(Content::Item(content)); + builder.new_par(par_id, Content::Item(contents)); } + //if absatze.len() == 1 { + // builder.new_par(Content::Text(format!( + // "{} {}", + // absatze[0].gldsym.clone().unwrap(), + // absatze[0].content + // ))); + //} else { + // let mut content = Vec::new(); + // for a in &absatze { + // let mut txt = String::new(); + // if let Some(sym) = &a.gldsym { + // if symb.is_some() { + // panic!("Two (or more) § symbols in single paragraph ?!?"); + // } else { + // symb = Some(sym); + // } + // } + // txt.push_str(&a.content); + // content.push(Box::new(Content::Text(txt))); + // } + // builder.new_par(Content::Item(content)); + //} + // Skip all UeberschriftTitle and Absatz loop { match c.peek() { @@ -195,12 +237,112 @@ impl Abschnitt { } } + println!("===="); + println!("{c:#?}"); assert_eq!(c.next(), None); Self { absatze } } } +#[derive(Debug, PartialEq)] +pub(crate) struct Symbol { + stellen: String, + content: String, +} +impl Symbol { + pub(crate) fn parse(n: Node) -> Self { + assert!(n.tag_name().name() == "symbol"); + assert_eq!(n.children().count(), 1); + + let stellen = n.attribute("stellen").unwrap().into(); + let content = n.text().unwrap().into(); + + Self { stellen, content } + } +} + +#[derive(Debug, PartialEq)] +pub(crate) struct Listelem { + symbol: Symbol, + text: String, +} +impl Listelem { + pub(crate) fn test(n: &Node) -> bool { + n.tag_name().name() == "listelem" + } + + pub(crate) fn parse(n: Node) -> Self { + assert!(n.tag_name().name() == "listelem"); + + let mut c = n.children(); + + let symbol = Symbol::parse(c.next().unwrap()); + + let text = c.next().unwrap().text().unwrap().into(); + + assert_eq!(c.next(), None); + + Self { symbol, text } + } +} + +#[derive(Debug, PartialEq)] +pub(crate) struct Ziffernliste { + ebene: String, + listelems: Vec, +} +impl Ziffernliste { + pub(crate) fn parse(n: Node) -> Self { + assert!(n.tag_name().name() == "ziffernliste"); + + let ebene = n.attribute("ebene").unwrap().into(); + let mut c = n.children().peekable(); + + let mut listelems = Vec::new(); + loop { + match c.peek() { + Some(child) => { + if Listelem::test(child) { + listelems.push(Listelem::parse(c.next().unwrap())); + continue; + } + } + None => break, + } + break; + } + + assert_eq!(c.next(), None); + + Self { ebene, listelems } + } +} + +#[derive(Debug, PartialEq)] +pub(crate) struct Liste { + ziffernliste: Ziffernliste, +} +impl Liste { + pub(crate) fn test(n: &Node) -> bool { + n.tag_name().name() == "liste" + && n.children().count() == 1 + && n.children().next().unwrap().tag_name().name() == "ziffernliste" + } + + pub(crate) fn parse(n: Node) -> Self { + assert!(Self::test(&n)); + + let mut c = n.children(); + + let ziffernliste = Ziffernliste::parse(c.next().unwrap()); + + assert_eq!(c.next(), None); + + Self { ziffernliste } + } +} + #[derive(Debug, PartialEq)] pub(crate) struct AbsatzAbs { gldsym: Option,