This commit is contained in:
philipp 2023-11-04 18:41:17 +01:00
parent ed109e1784
commit a78ba95775
5 changed files with 180 additions and 28 deletions

View File

@ -76,11 +76,12 @@ impl LawBuilder {
last_header_index: None, last_header_index: None,
}; };
overview::parse(law_id.unwrap(), &mut builder); overview::parse(law_id.unwrap(), &mut builder).unwrap();
} }
/// Sets a new header. /// Sets a new header.
pub(crate) fn new_header(&mut self, name: &str) { pub(crate) fn new_header(&mut self, name: &str) {
println!("new_header={name}");
let classifier_index = self let classifier_index = self
.classifiers .classifiers
.iter() .iter()
@ -97,6 +98,7 @@ impl LawBuilder {
/// Sets a new description for the last classifier. /// Sets a new description for the last classifier.
pub(crate) fn new_desc(&mut self, desc: &str) { pub(crate) fn new_desc(&mut self, desc: &str) {
println!("new_desc={desc}");
if let Some(index) = self.last_header_index { if let Some(index) = self.last_header_index {
self.classifiers[index].set_desc(desc); self.classifiers[index].set_desc(desc);
} else { } else {
@ -105,9 +107,11 @@ impl LawBuilder {
} }
/// Adds a new paragraph. /// Adds a new paragraph.
pub(crate) fn new_par(&mut self, par: Content) { pub(crate) fn new_par(&mut self, par: String, content: Content) {
println!("new_par=par:{par};content:{content:#?}");
if let Some(class) = self.classifiers.last_mut() { if let Some(class) = self.classifiers.last_mut() {
class.add_par(par); let section = Section { symb: par, content };
class.add_section(section);
} else { } else {
panic!("Expected at least one classifier"); panic!("Expected at least one classifier");
} }
@ -115,14 +119,16 @@ impl LawBuilder {
/// Next paragraph has a header, store its name. /// Next paragraph has a header, store its name.
pub(crate) fn new_next_para_header(&mut self, header: &str) { pub(crate) fn new_next_para_header(&mut self, header: &str) {
println!("new_next_para_header={header}");
self.next_para_header = Some(header.into()); self.next_para_header = Some(header.into());
} }
} }
#[derive(Debug, PartialEq, Clone)]
pub(crate) struct Section { pub(crate) struct Section {
symb: String, // §"1", §"2", ... symb: String, // §"1", §"2", ...
content: Content, content: Content,
header: Option<Header>, //header: Option<Header>,
} }
#[derive(Clone)] #[derive(Clone)]
@ -150,7 +156,7 @@ impl Header {
pub(crate) struct ClassifierInstance { pub(crate) struct ClassifierInstance {
name: String, name: String,
desc: Option<String>, desc: Option<String>,
content: Vec<Content>, sections: Vec<Section>,
} }
impl ClassifierInstance { impl ClassifierInstance {
@ -158,7 +164,7 @@ impl ClassifierInstance {
Self { Self {
name: name.into(), name: name.into(),
desc: None, desc: None,
content: Vec::new(), sections: Vec::new(),
} }
} }
@ -166,8 +172,8 @@ impl ClassifierInstance {
self.desc = Some(desc.into()); self.desc = Some(desc.into());
} }
fn add_par(&mut self, content: Content) { fn add_section(&mut self, section: Section) {
self.content.push(content); self.sections.push(section);
} }
} }
@ -199,9 +205,6 @@ impl Classifier {
self.instances.push(name); self.instances.push(name);
} }
fn add_par(&mut self, content: Content) {
self.instances.last_mut().unwrap().add_par(content);
}
fn set_desc(&mut self, desc: &str) { fn set_desc(&mut self, desc: &str) {
self.instances.last_mut().unwrap().set_desc(desc); self.instances.last_mut().unwrap().set_desc(desc);
} }
@ -209,11 +212,16 @@ impl Classifier {
fn used_for(&self, name: &str) -> bool { fn used_for(&self, name: &str) -> bool {
name.contains(&self.name) name.contains(&self.name)
} }
fn add_section(&mut self, section: Section) {
self.instances.last_mut().unwrap().add_section(section);
}
} }
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub(crate) enum Content { pub(crate) enum Content {
Text(String), //This is my direct law text Text(String), //This is my direct law text
TextWithList(String, Vec<Box<Content>>),
Item(Vec<Box<Content>>), //(1) This is general law. (2) This is more specific law Item(Vec<Box<Content>>), //(1) This is general law. (2) This is more specific law
List(Vec<Box<Content>>), //1. my first item List(Vec<Box<Content>>), //1. my first item
} }

View File

@ -41,6 +41,5 @@ impl From<roxmltree::Error> for Error {
} }
fn main() { fn main() {
let mut law = LawBuilder::new("UrhG"); LawBuilder::new("UrhG");
//overview::parse(10001899).unwrap(); //TEG
} }

View File

@ -43,8 +43,10 @@ pub(crate) fn parse(overview_id: usize, builder: &mut LawBuilder) -> Result<(),
let wrapper: Wrapper = serde_json::from_str(&json)?; let wrapper: Wrapper = serde_json::from_str(&json)?;
for par in wrapper.ogd_search_result.get_par().into_iter().skip(1) { for par in wrapper.ogd_search_result.get_par().into_iter().skip(1) {
// skip bc. first one is
// always not relevant for
// me :-)
crate::par::parse(&par, builder).unwrap(); crate::par::parse(&par, builder).unwrap();
break;
} }
Ok(()) Ok(())

View File

@ -9,6 +9,7 @@ fn fetch_page(url: &str) -> Result<String, Error> {
pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> { pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> {
println!("{url}"); println!("{url}");
let xml = fetch_page(url)?; let xml = fetch_page(url)?;
let xml = xml.replace("<gdash />", "-"); // used e.g. in §11 Abs. 3 UrhG
let risdok = Risdok::from_str(&xml, builder)?; let risdok = Risdok::from_str(&xml, builder)?;
println!("{builder:#?}"); println!("{builder:#?}");

View File

@ -144,12 +144,40 @@ impl Abschnitt {
} }
} }
// TODO: Continue here: We want to create a `Section`.
//
// We have 2 tasks
// 1) Get paragraph id
// 2) Get content
let mut absatze = Vec::new(); let mut absatze = Vec::new();
let absatz = AbsatzAbs::parse(c.next().expect("We need at least one 'Absatz'"));
let par_id = absatz
.gldsym
.clone()
.expect("First 'Absatz' needs to have § id");
absatze.push(absatz);
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
if let Some(child) = c.peek() {
if Liste::test(child) {
let liste = Liste::parse(c.next().unwrap());
//TODO do something with list
}
}
//There can be as many 'Absätze' as our lovely lawsetter wants
loop { loop {
match c.peek() { match c.peek() {
Some(child) => { Some(child) => {
if AbsatzAbs::test(child) { if AbsatzAbs::test(child) {
absatze.push(AbsatzAbs::parse(c.next().unwrap())); absatze.push(AbsatzAbs::parse(c.next().unwrap()));
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
if let Some(child) = c.peek() {
if Liste::test(child) {
let liste = Liste::parse(c.next().unwrap());
//TODO do something with list
}
}
continue; continue;
} }
break; break;
@ -159,24 +187,38 @@ impl Abschnitt {
} }
if absatze.len() == 1 { if absatze.len() == 1 {
builder.new_par(Content::Text(format!( builder.new_par(par_id, Content::Text(absatze[0].content.clone()));
"{} {}",
absatze[0].gldsym.clone().unwrap(),
absatze[0].content
)));
} else { } else {
let mut content = Vec::new(); let mut contents = Vec::new();
for a in &absatze { for a in &absatze {
let mut txt = String::new(); contents.push(Box::new(Content::Text(a.content.clone())));
if let Some(sym) = &a.gldsym {
txt.push_str(&format!("{sym} "));
} }
txt.push_str(&a.content); builder.new_par(par_id, Content::Item(contents));
content.push(Box::new(Content::Text(txt)));
}
builder.new_par(Content::Item(content));
} }
//if absatze.len() == 1 {
// builder.new_par(Content::Text(format!(
// "{} {}",
// absatze[0].gldsym.clone().unwrap(),
// absatze[0].content
// )));
//} else {
// let mut content = Vec::new();
// for a in &absatze {
// let mut txt = String::new();
// if let Some(sym) = &a.gldsym {
// if symb.is_some() {
// panic!("Two (or more) § symbols in single paragraph ?!?");
// } else {
// symb = Some(sym);
// }
// }
// txt.push_str(&a.content);
// content.push(Box::new(Content::Text(txt)));
// }
// builder.new_par(Content::Item(content));
//}
// Skip all UeberschriftTitle and Absatz // Skip all UeberschriftTitle and Absatz
loop { loop {
match c.peek() { match c.peek() {
@ -195,12 +237,112 @@ impl Abschnitt {
} }
} }
println!("====");
println!("{c:#?}");
assert_eq!(c.next(), None); assert_eq!(c.next(), None);
Self { absatze } Self { absatze }
} }
} }
#[derive(Debug, PartialEq)]
pub(crate) struct Symbol {
stellen: String,
content: String,
}
impl Symbol {
pub(crate) fn parse(n: Node) -> Self {
assert!(n.tag_name().name() == "symbol");
assert_eq!(n.children().count(), 1);
let stellen = n.attribute("stellen").unwrap().into();
let content = n.text().unwrap().into();
Self { stellen, content }
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Listelem {
symbol: Symbol,
text: String,
}
impl Listelem {
pub(crate) fn test(n: &Node) -> bool {
n.tag_name().name() == "listelem"
}
pub(crate) fn parse(n: Node) -> Self {
assert!(n.tag_name().name() == "listelem");
let mut c = n.children();
let symbol = Symbol::parse(c.next().unwrap());
let text = c.next().unwrap().text().unwrap().into();
assert_eq!(c.next(), None);
Self { symbol, text }
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Ziffernliste {
ebene: String,
listelems: Vec<Listelem>,
}
impl Ziffernliste {
pub(crate) fn parse(n: Node) -> Self {
assert!(n.tag_name().name() == "ziffernliste");
let ebene = n.attribute("ebene").unwrap().into();
let mut c = n.children().peekable();
let mut listelems = Vec::new();
loop {
match c.peek() {
Some(child) => {
if Listelem::test(child) {
listelems.push(Listelem::parse(c.next().unwrap()));
continue;
}
}
None => break,
}
break;
}
assert_eq!(c.next(), None);
Self { ebene, listelems }
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Liste {
ziffernliste: Ziffernliste,
}
impl Liste {
pub(crate) fn test(n: &Node) -> bool {
n.tag_name().name() == "liste"
&& n.children().count() == 1
&& n.children().next().unwrap().tag_name().name() == "ziffernliste"
}
pub(crate) fn parse(n: Node) -> Self {
assert!(Self::test(&n));
let mut c = n.children();
let ziffernliste = Ziffernliste::parse(c.next().unwrap());
assert_eq!(c.next(), None);
Self { ziffernliste }
}
}
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub(crate) struct AbsatzAbs { pub(crate) struct AbsatzAbs {
gldsym: Option<String>, gldsym: Option<String>,