push
This commit is contained in:
parent
ed109e1784
commit
a78ba95775
32
src/law.rs
32
src/law.rs
@ -76,11 +76,12 @@ impl LawBuilder {
|
|||||||
last_header_index: None,
|
last_header_index: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
overview::parse(law_id.unwrap(), &mut builder);
|
overview::parse(law_id.unwrap(), &mut builder).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sets a new header.
|
/// Sets a new header.
|
||||||
pub(crate) fn new_header(&mut self, name: &str) {
|
pub(crate) fn new_header(&mut self, name: &str) {
|
||||||
|
println!("new_header={name}");
|
||||||
let classifier_index = self
|
let classifier_index = self
|
||||||
.classifiers
|
.classifiers
|
||||||
.iter()
|
.iter()
|
||||||
@ -97,6 +98,7 @@ impl LawBuilder {
|
|||||||
|
|
||||||
/// Sets a new description for the last classifier.
|
/// Sets a new description for the last classifier.
|
||||||
pub(crate) fn new_desc(&mut self, desc: &str) {
|
pub(crate) fn new_desc(&mut self, desc: &str) {
|
||||||
|
println!("new_desc={desc}");
|
||||||
if let Some(index) = self.last_header_index {
|
if let Some(index) = self.last_header_index {
|
||||||
self.classifiers[index].set_desc(desc);
|
self.classifiers[index].set_desc(desc);
|
||||||
} else {
|
} else {
|
||||||
@ -105,9 +107,11 @@ impl LawBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Adds a new paragraph.
|
/// Adds a new paragraph.
|
||||||
pub(crate) fn new_par(&mut self, par: Content) {
|
pub(crate) fn new_par(&mut self, par: String, content: Content) {
|
||||||
|
println!("new_par=par:{par};content:{content:#?}");
|
||||||
if let Some(class) = self.classifiers.last_mut() {
|
if let Some(class) = self.classifiers.last_mut() {
|
||||||
class.add_par(par);
|
let section = Section { symb: par, content };
|
||||||
|
class.add_section(section);
|
||||||
} else {
|
} else {
|
||||||
panic!("Expected at least one classifier");
|
panic!("Expected at least one classifier");
|
||||||
}
|
}
|
||||||
@ -115,14 +119,16 @@ impl LawBuilder {
|
|||||||
|
|
||||||
/// Next paragraph has a header, store its name.
|
/// Next paragraph has a header, store its name.
|
||||||
pub(crate) fn new_next_para_header(&mut self, header: &str) {
|
pub(crate) fn new_next_para_header(&mut self, header: &str) {
|
||||||
|
println!("new_next_para_header={header}");
|
||||||
self.next_para_header = Some(header.into());
|
self.next_para_header = Some(header.into());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
pub(crate) struct Section {
|
pub(crate) struct Section {
|
||||||
symb: String, // §"1", §"2", ...
|
symb: String, // §"1", §"2", ...
|
||||||
content: Content,
|
content: Content,
|
||||||
header: Option<Header>,
|
//header: Option<Header>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@ -150,7 +156,7 @@ impl Header {
|
|||||||
pub(crate) struct ClassifierInstance {
|
pub(crate) struct ClassifierInstance {
|
||||||
name: String,
|
name: String,
|
||||||
desc: Option<String>,
|
desc: Option<String>,
|
||||||
content: Vec<Content>,
|
sections: Vec<Section>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ClassifierInstance {
|
impl ClassifierInstance {
|
||||||
@ -158,7 +164,7 @@ impl ClassifierInstance {
|
|||||||
Self {
|
Self {
|
||||||
name: name.into(),
|
name: name.into(),
|
||||||
desc: None,
|
desc: None,
|
||||||
content: Vec::new(),
|
sections: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -166,8 +172,8 @@ impl ClassifierInstance {
|
|||||||
self.desc = Some(desc.into());
|
self.desc = Some(desc.into());
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_par(&mut self, content: Content) {
|
fn add_section(&mut self, section: Section) {
|
||||||
self.content.push(content);
|
self.sections.push(section);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -199,9 +205,6 @@ impl Classifier {
|
|||||||
self.instances.push(name);
|
self.instances.push(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_par(&mut self, content: Content) {
|
|
||||||
self.instances.last_mut().unwrap().add_par(content);
|
|
||||||
}
|
|
||||||
fn set_desc(&mut self, desc: &str) {
|
fn set_desc(&mut self, desc: &str) {
|
||||||
self.instances.last_mut().unwrap().set_desc(desc);
|
self.instances.last_mut().unwrap().set_desc(desc);
|
||||||
}
|
}
|
||||||
@ -209,11 +212,16 @@ impl Classifier {
|
|||||||
fn used_for(&self, name: &str) -> bool {
|
fn used_for(&self, name: &str) -> bool {
|
||||||
name.contains(&self.name)
|
name.contains(&self.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn add_section(&mut self, section: Section) {
|
||||||
|
self.instances.last_mut().unwrap().add_section(section);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub(crate) enum Content {
|
pub(crate) enum Content {
|
||||||
Text(String), //This is my direct law text
|
Text(String), //This is my direct law text
|
||||||
|
TextWithList(String, Vec<Box<Content>>),
|
||||||
Item(Vec<Box<Content>>), //(1) This is general law. (2) This is more specific law
|
Item(Vec<Box<Content>>), //(1) This is general law. (2) This is more specific law
|
||||||
List(Vec<Box<Content>>), //1. my first item
|
List(Vec<Box<Content>>), //1. my first item
|
||||||
}
|
}
|
||||||
|
@ -41,6 +41,5 @@ impl From<roxmltree::Error> for Error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let mut law = LawBuilder::new("UrhG");
|
LawBuilder::new("UrhG");
|
||||||
//overview::parse(10001899).unwrap(); //TEG
|
|
||||||
}
|
}
|
||||||
|
@ -43,8 +43,10 @@ pub(crate) fn parse(overview_id: usize, builder: &mut LawBuilder) -> Result<(),
|
|||||||
let wrapper: Wrapper = serde_json::from_str(&json)?;
|
let wrapper: Wrapper = serde_json::from_str(&json)?;
|
||||||
|
|
||||||
for par in wrapper.ogd_search_result.get_par().into_iter().skip(1) {
|
for par in wrapper.ogd_search_result.get_par().into_iter().skip(1) {
|
||||||
|
// skip bc. first one is
|
||||||
|
// always not relevant for
|
||||||
|
// me :-)
|
||||||
crate::par::parse(&par, builder).unwrap();
|
crate::par::parse(&par, builder).unwrap();
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -9,6 +9,7 @@ fn fetch_page(url: &str) -> Result<String, Error> {
|
|||||||
pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> {
|
pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> {
|
||||||
println!("{url}");
|
println!("{url}");
|
||||||
let xml = fetch_page(url)?;
|
let xml = fetch_page(url)?;
|
||||||
|
let xml = xml.replace("<gdash />", "-"); // used e.g. in §11 Abs. 3 UrhG
|
||||||
let risdok = Risdok::from_str(&xml, builder)?;
|
let risdok = Risdok::from_str(&xml, builder)?;
|
||||||
|
|
||||||
println!("{builder:#?}");
|
println!("{builder:#?}");
|
||||||
|
@ -144,12 +144,40 @@ impl Abschnitt {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Continue here: We want to create a `Section`.
|
||||||
|
//
|
||||||
|
// We have 2 tasks
|
||||||
|
// 1) Get paragraph id
|
||||||
|
// 2) Get content
|
||||||
|
|
||||||
let mut absatze = Vec::new();
|
let mut absatze = Vec::new();
|
||||||
|
let absatz = AbsatzAbs::parse(c.next().expect("We need at least one 'Absatz'"));
|
||||||
|
let par_id = absatz
|
||||||
|
.gldsym
|
||||||
|
.clone()
|
||||||
|
.expect("First 'Absatz' needs to have § id");
|
||||||
|
absatze.push(absatz);
|
||||||
|
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
|
||||||
|
if let Some(child) = c.peek() {
|
||||||
|
if Liste::test(child) {
|
||||||
|
let liste = Liste::parse(c.next().unwrap());
|
||||||
|
//TODO do something with list
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//There can be as many 'Absätze' as our lovely lawsetter wants
|
||||||
loop {
|
loop {
|
||||||
match c.peek() {
|
match c.peek() {
|
||||||
Some(child) => {
|
Some(child) => {
|
||||||
if AbsatzAbs::test(child) {
|
if AbsatzAbs::test(child) {
|
||||||
absatze.push(AbsatzAbs::parse(c.next().unwrap()));
|
absatze.push(AbsatzAbs::parse(c.next().unwrap()));
|
||||||
|
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
|
||||||
|
if let Some(child) = c.peek() {
|
||||||
|
if Liste::test(child) {
|
||||||
|
let liste = Liste::parse(c.next().unwrap());
|
||||||
|
//TODO do something with list
|
||||||
|
}
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -159,24 +187,38 @@ impl Abschnitt {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if absatze.len() == 1 {
|
if absatze.len() == 1 {
|
||||||
builder.new_par(Content::Text(format!(
|
builder.new_par(par_id, Content::Text(absatze[0].content.clone()));
|
||||||
"{} {}",
|
|
||||||
absatze[0].gldsym.clone().unwrap(),
|
|
||||||
absatze[0].content
|
|
||||||
)));
|
|
||||||
} else {
|
} else {
|
||||||
let mut content = Vec::new();
|
let mut contents = Vec::new();
|
||||||
for a in &absatze {
|
for a in &absatze {
|
||||||
let mut txt = String::new();
|
contents.push(Box::new(Content::Text(a.content.clone())));
|
||||||
if let Some(sym) = &a.gldsym {
|
|
||||||
txt.push_str(&format!("{sym} "));
|
|
||||||
}
|
|
||||||
txt.push_str(&a.content);
|
|
||||||
content.push(Box::new(Content::Text(txt)));
|
|
||||||
}
|
}
|
||||||
builder.new_par(Content::Item(content));
|
builder.new_par(par_id, Content::Item(contents));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//if absatze.len() == 1 {
|
||||||
|
// builder.new_par(Content::Text(format!(
|
||||||
|
// "{} {}",
|
||||||
|
// absatze[0].gldsym.clone().unwrap(),
|
||||||
|
// absatze[0].content
|
||||||
|
// )));
|
||||||
|
//} else {
|
||||||
|
// let mut content = Vec::new();
|
||||||
|
// for a in &absatze {
|
||||||
|
// let mut txt = String::new();
|
||||||
|
// if let Some(sym) = &a.gldsym {
|
||||||
|
// if symb.is_some() {
|
||||||
|
// panic!("Two (or more) § symbols in single paragraph ?!?");
|
||||||
|
// } else {
|
||||||
|
// symb = Some(sym);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// txt.push_str(&a.content);
|
||||||
|
// content.push(Box::new(Content::Text(txt)));
|
||||||
|
// }
|
||||||
|
// builder.new_par(Content::Item(content));
|
||||||
|
//}
|
||||||
|
|
||||||
// Skip all UeberschriftTitle and Absatz
|
// Skip all UeberschriftTitle and Absatz
|
||||||
loop {
|
loop {
|
||||||
match c.peek() {
|
match c.peek() {
|
||||||
@ -195,12 +237,112 @@ impl Abschnitt {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
println!("====");
|
||||||
|
println!("{c:#?}");
|
||||||
assert_eq!(c.next(), None);
|
assert_eq!(c.next(), None);
|
||||||
|
|
||||||
Self { absatze }
|
Self { absatze }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub(crate) struct Symbol {
|
||||||
|
stellen: String,
|
||||||
|
content: String,
|
||||||
|
}
|
||||||
|
impl Symbol {
|
||||||
|
pub(crate) fn parse(n: Node) -> Self {
|
||||||
|
assert!(n.tag_name().name() == "symbol");
|
||||||
|
assert_eq!(n.children().count(), 1);
|
||||||
|
|
||||||
|
let stellen = n.attribute("stellen").unwrap().into();
|
||||||
|
let content = n.text().unwrap().into();
|
||||||
|
|
||||||
|
Self { stellen, content }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub(crate) struct Listelem {
|
||||||
|
symbol: Symbol,
|
||||||
|
text: String,
|
||||||
|
}
|
||||||
|
impl Listelem {
|
||||||
|
pub(crate) fn test(n: &Node) -> bool {
|
||||||
|
n.tag_name().name() == "listelem"
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn parse(n: Node) -> Self {
|
||||||
|
assert!(n.tag_name().name() == "listelem");
|
||||||
|
|
||||||
|
let mut c = n.children();
|
||||||
|
|
||||||
|
let symbol = Symbol::parse(c.next().unwrap());
|
||||||
|
|
||||||
|
let text = c.next().unwrap().text().unwrap().into();
|
||||||
|
|
||||||
|
assert_eq!(c.next(), None);
|
||||||
|
|
||||||
|
Self { symbol, text }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub(crate) struct Ziffernliste {
|
||||||
|
ebene: String,
|
||||||
|
listelems: Vec<Listelem>,
|
||||||
|
}
|
||||||
|
impl Ziffernliste {
|
||||||
|
pub(crate) fn parse(n: Node) -> Self {
|
||||||
|
assert!(n.tag_name().name() == "ziffernliste");
|
||||||
|
|
||||||
|
let ebene = n.attribute("ebene").unwrap().into();
|
||||||
|
let mut c = n.children().peekable();
|
||||||
|
|
||||||
|
let mut listelems = Vec::new();
|
||||||
|
loop {
|
||||||
|
match c.peek() {
|
||||||
|
Some(child) => {
|
||||||
|
if Listelem::test(child) {
|
||||||
|
listelems.push(Listelem::parse(c.next().unwrap()));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => break,
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(c.next(), None);
|
||||||
|
|
||||||
|
Self { ebene, listelems }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub(crate) struct Liste {
|
||||||
|
ziffernliste: Ziffernliste,
|
||||||
|
}
|
||||||
|
impl Liste {
|
||||||
|
pub(crate) fn test(n: &Node) -> bool {
|
||||||
|
n.tag_name().name() == "liste"
|
||||||
|
&& n.children().count() == 1
|
||||||
|
&& n.children().next().unwrap().tag_name().name() == "ziffernliste"
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn parse(n: Node) -> Self {
|
||||||
|
assert!(Self::test(&n));
|
||||||
|
|
||||||
|
let mut c = n.children();
|
||||||
|
|
||||||
|
let ziffernliste = Ziffernliste::parse(c.next().unwrap());
|
||||||
|
|
||||||
|
assert_eq!(c.next(), None);
|
||||||
|
|
||||||
|
Self { ziffernliste }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub(crate) struct AbsatzAbs {
|
pub(crate) struct AbsatzAbs {
|
||||||
gldsym: Option<String>,
|
gldsym: Option<String>,
|
||||||
|
Loading…
Reference in New Issue
Block a user