This commit is contained in:
philipp 2023-11-04 13:49:59 +01:00
parent 023d8cf073
commit 58dd701c95
5 changed files with 68 additions and 40 deletions

View File

@ -1,10 +1,12 @@
struct Law {
use crate::overview;
pub(crate) struct Law {
name: String, //ABGB, UrhG
section: Vec<Section>, // § 1, § 2, ...
}
impl Law {
fn new(name: &str) -> Self {
pub(crate) fn new(name: &str) -> Self {
Self {
name: name.into(),
section: Vec::new(),
@ -13,17 +15,20 @@ impl Law {
}
#[derive(Debug, PartialEq)]
struct LawBuilder {
pub(crate) struct LawBuilder {
name: String, //ABGB, UrhG
classifiers: Vec<Classifier>,
cur_classifier_index: Option<usize>,
}
impl LawBuilder {
fn new(name: &str) -> Self {
pub(crate) fn new(name: &str) {
//TODO: return Law (not LawBuilder)
let mut classifiers = Vec::new();
let mut law_id = None;
if name == "UrhG" {
law_id = Some(10001848);
let hauptstueck = Classifier::new("Hauptstück");
classifiers.push(hauptstueck.clone());
@ -32,14 +37,16 @@ impl LawBuilder {
classifiers.push(abschnitt);
}
Self {
let mut builder = Self {
name: name.into(),
classifiers,
cur_classifier_index: None,
}
};
overview::parse(law_id.unwrap(), &mut builder);
}
fn new_header(&mut self, name: &str) {
pub(crate) fn new_header(&mut self, name: &str) {
let classifier_index = self
.classifiers
.iter()
@ -53,7 +60,13 @@ impl LawBuilder {
}
}
fn new_par(&mut self, par: Content) {
pub(crate) fn new_desc(&mut self, desc: &str) {
if let Some(index) = self.cur_classifier_index {
self.classifiers[index].set_desc(desc);
}
}
pub(crate) fn new_par(&mut self, par: Content) {
if let Some(index) = self.cur_classifier_index {
self.classifiers[index].add_par(par);
} else {
@ -62,14 +75,14 @@ impl LawBuilder {
}
}
struct Section {
pub(crate) struct Section {
symb: String, // §"1", §"2", ...
content: Content,
header: Option<Header>,
}
#[derive(Clone)]
struct Header {
pub(crate) struct Header {
classifier: Classifier, // Hauptstück, Theil, Abschnitt, ol
name: String, // 1. Hauptstück, 3. Theil, 7. Abschnitt, li
parent: Option<Box<Header>>,
@ -90,8 +103,9 @@ impl Header {
}
#[derive(Clone, Debug, PartialEq)]
struct ClassifierInstance {
pub(crate) struct ClassifierInstance {
name: String,
desc: Option<String>,
content: Vec<Content>,
}
@ -99,16 +113,22 @@ impl ClassifierInstance {
fn new(name: &str) -> Self {
Self {
name: name.into(),
desc: None,
content: Vec::new(),
}
}
fn set_desc(&mut self, desc: &str) {
self.desc = Some(desc.into());
}
fn add_par(&mut self, content: Content) {
self.content.push(content);
}
}
#[derive(Clone, Debug, PartialEq)]
struct Classifier {
pub(crate) struct Classifier {
name: String, // Hauptstück, Theil, Abschnitt, ol
parent: Option<Box<Classifier>>,
instances: Vec<ClassifierInstance>,
@ -138,10 +158,13 @@ impl Classifier {
fn add_par(&mut self, content: Content) {
self.instances.last_mut().unwrap().add_par(content);
}
fn set_desc(&mut self, desc: &str) {
self.instances.last_mut().unwrap().set_desc(desc);
}
}
#[derive(Clone, Debug, PartialEq)]
enum Content {
pub(crate) enum Content {
Text(String), //This is my direct law text
Item((String, Box<Content>)), //(1) This is general law. (2) This is more specific law
List(Vec<Box<Content>>), //1. my first item
@ -155,8 +178,8 @@ mod tests {
fn test() {
let mut builder = LawBuilder::new("UrhG");
builder.new_header("1. Hauptstück");
builder.new_header("2. Abschnitt");
builder.new_header("1. Hauptstück", None);
builder.new_header("2. Abschnitt", None);
builder.new_par(Content::Text("Mein erster Paragraph".into()));
@ -168,6 +191,7 @@ mod tests {
parent: None,
instances: vec![ClassifierInstance {
name: "1. Hauptstück".into(),
desc: None,
content: vec![],
}],
},
@ -180,6 +204,7 @@ mod tests {
})),
instances: vec![ClassifierInstance {
name: "2. Abschnitt".into(),
desc: None,
content: vec![Content::Text("Mein erster Paragraph".into())],
}],
},

View File

@ -1,5 +1,7 @@
use std::io;
use law::LawBuilder;
mod law;
mod overview;
mod par;
@ -39,7 +41,6 @@ impl From<roxmltree::Error> for Error {
}
fn main() {
let mut law = LawBuilder::new("UrhG");
//overview::parse(10001899).unwrap(); //TEG
overview::parse(10001848).unwrap(); //UrhG
//par::parse("https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12025172/NOR12025172.xml");
}

View File

@ -4,7 +4,7 @@ mod parser;
use serde::Deserialize;
use time::{format_description, OffsetDateTime};
use crate::{overview::parser::OgdSearchResult, Error};
use crate::{law::LawBuilder, overview::parser::OgdSearchResult, Error};
/// Returns the current date in YYYY-MM-DD format. Needed for RIS API query to get current version of the overview.
fn current_date() -> String {
@ -37,13 +37,14 @@ pub(crate) struct Wrapper {
ogd_search_result: OgdSearchResult,
}
pub(crate) fn parse(overview_id: usize) -> Result<(), Error> {
pub(crate) fn parse(overview_id: usize, builder: &mut LawBuilder) -> Result<(), Error> {
let json = fetch_page(overview_id)?;
let wrapper: Wrapper = serde_json::from_str(&json)?;
for par in wrapper.ogd_search_result.get_par() {
crate::par::parse(&par).unwrap();
for par in wrapper.ogd_search_result.get_par().into_iter().skip(1) {
crate::par::parse(&par, builder).unwrap();
break;
}
Ok(())

View File

@ -1,17 +1,18 @@
mod parser;
use crate::{par::parser::Risdok, Error};
use crate::{law::LawBuilder, par::parser::Risdok, Error};
fn fetch_page(url: &str) -> Result<String, Error> {
Ok(ureq::get(url).call()?.into_string()?)
}
pub(crate) fn parse(url: &str) -> Result<(), Error> {
pub(crate) fn parse(url: &str, builder: &mut LawBuilder) -> Result<(), Error> {
println!("{url}");
let xml = fetch_page(url)?;
let risdok = Risdok::from_str(&xml)?;
let risdok = Risdok::from_str(&xml, builder)?;
println!("{risdok}");
println!("{builder:#?}");
//println!("{risdok}");
Ok(())
}

View File

@ -2,7 +2,7 @@ use std::fmt::Display;
use roxmltree::Node;
use crate::Error;
use crate::{law::LawBuilder, Error};
#[derive(Debug, PartialEq)]
pub(crate) struct Risdok {
@ -12,14 +12,14 @@ pub(crate) struct Risdok {
}
impl Risdok {
pub(crate) fn parse(n: Node) -> Self {
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self {
assert!(n.tag_name().name() == "risdok");
let mut c = n.children();
let ret = Self {
metadaten: Metadaten::parse(c.next().unwrap()),
nutzdaten: Nutzdaten::parse(c.next().unwrap()),
nutzdaten: Nutzdaten::parse(c.next().unwrap(), builder),
layoutdaten: Layoutdaten::parse(c.next().unwrap()),
};
@ -28,11 +28,11 @@ impl Risdok {
ret
}
pub(crate) fn from_str(xml: &str) -> Result<Self, Error> {
pub(crate) fn from_str(xml: &str, builder: &mut LawBuilder) -> Result<Self, Error> {
let doc = roxmltree::Document::parse(&xml)?;
let root = doc.root();
assert_eq!(root.children().into_iter().count(), 1);
Ok(Self::parse(root.children().next().unwrap()))
Ok(Self::parse(root.children().next().unwrap(), builder))
}
}
@ -70,12 +70,12 @@ pub(crate) struct Nutzdaten {
abschnitt: Abschnitt,
}
impl Nutzdaten {
pub(crate) fn parse(n: Node) -> Self {
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self {
assert!(n.tag_name().name() == "nutzdaten");
let mut c = n.children();
let ret = Self {
abschnitt: Abschnitt::parse(c.next().unwrap()),
abschnitt: Abschnitt::parse(c.next().unwrap(), builder),
};
assert_eq!(c.next(), None);
@ -86,12 +86,11 @@ impl Nutzdaten {
#[derive(Debug, PartialEq)]
pub(crate) struct Abschnitt {
ueberschrifts: Vec<Ueberschrift>,
ueberschriftPara: Option<Ueberschrift>,
absatze: Vec<AbsatzAbs>,
}
impl Abschnitt {
pub(crate) fn parse(n: Node) -> Self {
pub(crate) fn parse(n: Node, builder: &mut LawBuilder) -> Self {
assert!(n.tag_name().name() == "abschnitt");
let mut c = n.children().peekable();
@ -119,16 +118,18 @@ impl Abschnitt {
}
}
let mut ueberschrifts = Vec::new();
loop {
match &c.peek() {
match c.peek() {
Some(child) => {
if Ueberschrift::test(&child, "g1") {
ueberschrifts.push(Ueberschrift::parse(c.next().unwrap(), "g1"));
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1");
builder.new_header(&ueberschrift.content);
} else if Ueberschrift::test(&child, "g2") {
ueberschrifts.push(Ueberschrift::parse(c.next().unwrap(), "g2"));
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g2");
builder.new_desc(&ueberschrift.content);
} else if Ueberschrift::test(&child, "g1min") {
ueberschrifts.push(Ueberschrift::parse(c.next().unwrap(), "g1min"));
let ueberschrift = Ueberschrift::parse(c.next().unwrap(), "g1min");
builder.new_header(&ueberschrift.content);
} else {
break;
}
@ -181,7 +182,6 @@ impl Abschnitt {
Self {
ueberschriftPara,
absatze,
ueberschrifts,
}
}
}