start parsing urhg

This commit is contained in:
philipp 2023-11-04 11:25:17 +01:00
parent 816b234112
commit f462936790
3 changed files with 38 additions and 29 deletions

View File

@ -38,6 +38,7 @@ impl From<roxmltree::Error> for Error {
}
fn main() {
law::parse(10001899).unwrap();
//par::parse("https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12025172/NOR12025172.xml");
//law::parse(10001899).unwrap(); //TEG
law::parse(10001848).unwrap(); //UrhG
//par::parse("https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12025172/NOR12025172.xml");
}

View File

@ -7,6 +7,7 @@ fn fetch_page(url: &str) -> Result<String, Error> {
}
pub(crate) fn parse(url: &str) -> Result<(), Error> {
println!("{url}");
let xml = fetch_page(url)?;
let risdok = Risdok::from_str(&xml)?;

View File

@ -86,7 +86,8 @@ impl Nutzdaten {
#[derive(Debug, PartialEq)]
pub(crate) struct Abschnitt {
ueberschriftPara: Option<UeberschriftPara>,
ueberschrifts: Vec<Ueberschrift>,
ueberschriftPara: Option<Ueberschrift>,
absatze: Vec<AbsatzAbs>,
}
impl Abschnitt {
@ -104,7 +105,7 @@ impl Abschnitt {
loop {
match c.peek() {
Some(child) => {
if UeberschriftTitle::test(child) {
if Ueberschrift::test(child, "titel") {
c.next();
continue;
}
@ -118,10 +119,28 @@ impl Abschnitt {
}
}
let mut ueberschrifts = Vec::new();
loop {
match &c.peek() {
Some(child) => {
if Ueberschrift::test(&child, "g1") {
ueberschrifts.push(Ueberschrift::parse(c.next().unwrap(), "g1"));
} else if Ueberschrift::test(&child, "g2") {
ueberschrifts.push(Ueberschrift::parse(c.next().unwrap(), "g2"));
} else if Ueberschrift::test(&child, "g1min") {
ueberschrifts.push(Ueberschrift::parse(c.next().unwrap(), "g1min"));
} else {
break;
}
}
None => break,
}
}
let mut ueberschriftPara = None;
if let Some(child) = c.peek() {
if UeberschriftPara::test(child) {
ueberschriftPara = Some(UeberschriftPara::parse(c.next().unwrap()))
if Ueberschrift::test(child, "para") {
ueberschriftPara = Some(Ueberschrift::parse(c.next().unwrap(), "para"))
}
}
@ -143,7 +162,7 @@ impl Abschnitt {
loop {
match c.peek() {
Some(child) => {
if UeberschriftTitle::test(child) {
if Ueberschrift::test(child, "titel") {
c.next();
continue;
}
@ -162,6 +181,7 @@ impl Abschnitt {
Self {
ueberschriftPara,
absatze,
ueberschrifts,
}
}
}
@ -239,40 +259,27 @@ impl Absatz {
}
#[derive(Debug, PartialEq)]
pub(crate) struct UeberschriftPara {
pub(crate) struct Ueberschrift {
typ: String,
content: String,
}
impl UeberschriftPara {
pub(crate) fn test(n: &Node) -> bool {
n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == "para"
impl Ueberschrift {
fn test(n: &Node, typ: &str) -> bool {
n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == typ
}
pub(crate) fn parse(n: Node) -> Self {
pub(crate) fn parse(n: Node, typ: &str) -> Self {
assert!(n.tag_name().name() == "ueberschrift");
assert_eq!(n.attribute("typ").unwrap(), "para");
assert_eq!(n.attribute("typ").unwrap(), typ);
Self {
content: n.text().unwrap().into(),
typ: typ.into(),
}
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct UeberschriftTitle;
impl UeberschriftTitle {
fn test(n: &Node) -> bool {
n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == "titel"
}
pub(crate) fn parse(n: Node) -> Self {
assert!(n.tag_name().name() == "ueberschrift");
assert_eq!(n.attribute("typ").unwrap(), "titel");
//TODO parse if necessary
Self {}
}
}
#[derive(Debug, PartialEq)]
pub(crate) struct Kzinhalt;
impl Kzinhalt {