start parsing urhg
This commit is contained in:
parent
816b234112
commit
f462936790
@ -38,6 +38,7 @@ impl From<roxmltree::Error> for Error {
|
||||
}
|
||||
|
||||
fn main() {
|
||||
law::parse(10001899).unwrap();
|
||||
//par::parse("https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12025172/NOR12025172.xml");
|
||||
//law::parse(10001899).unwrap(); //TEG
|
||||
law::parse(10001848).unwrap(); //UrhG
|
||||
//par::parse("https://www.ris.bka.gv.at/Dokumente/Bundesnormen/NOR12025172/NOR12025172.xml");
|
||||
}
|
||||
|
@ -7,6 +7,7 @@ fn fetch_page(url: &str) -> Result<String, Error> {
|
||||
}
|
||||
|
||||
pub(crate) fn parse(url: &str) -> Result<(), Error> {
|
||||
println!("{url}");
|
||||
let xml = fetch_page(url)?;
|
||||
let risdok = Risdok::from_str(&xml)?;
|
||||
|
||||
|
@ -86,7 +86,8 @@ impl Nutzdaten {
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Abschnitt {
|
||||
ueberschriftPara: Option<UeberschriftPara>,
|
||||
ueberschrifts: Vec<Ueberschrift>,
|
||||
ueberschriftPara: Option<Ueberschrift>,
|
||||
absatze: Vec<AbsatzAbs>,
|
||||
}
|
||||
impl Abschnitt {
|
||||
@ -104,7 +105,7 @@ impl Abschnitt {
|
||||
loop {
|
||||
match c.peek() {
|
||||
Some(child) => {
|
||||
if UeberschriftTitle::test(child) {
|
||||
if Ueberschrift::test(child, "titel") {
|
||||
c.next();
|
||||
continue;
|
||||
}
|
||||
@ -118,10 +119,28 @@ impl Abschnitt {
|
||||
}
|
||||
}
|
||||
|
||||
let mut ueberschrifts = Vec::new();
|
||||
loop {
|
||||
match &c.peek() {
|
||||
Some(child) => {
|
||||
if Ueberschrift::test(&child, "g1") {
|
||||
ueberschrifts.push(Ueberschrift::parse(c.next().unwrap(), "g1"));
|
||||
} else if Ueberschrift::test(&child, "g2") {
|
||||
ueberschrifts.push(Ueberschrift::parse(c.next().unwrap(), "g2"));
|
||||
} else if Ueberschrift::test(&child, "g1min") {
|
||||
ueberschrifts.push(Ueberschrift::parse(c.next().unwrap(), "g1min"));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
let mut ueberschriftPara = None;
|
||||
if let Some(child) = c.peek() {
|
||||
if UeberschriftPara::test(child) {
|
||||
ueberschriftPara = Some(UeberschriftPara::parse(c.next().unwrap()))
|
||||
if Ueberschrift::test(child, "para") {
|
||||
ueberschriftPara = Some(Ueberschrift::parse(c.next().unwrap(), "para"))
|
||||
}
|
||||
}
|
||||
|
||||
@ -143,7 +162,7 @@ impl Abschnitt {
|
||||
loop {
|
||||
match c.peek() {
|
||||
Some(child) => {
|
||||
if UeberschriftTitle::test(child) {
|
||||
if Ueberschrift::test(child, "titel") {
|
||||
c.next();
|
||||
continue;
|
||||
}
|
||||
@ -162,6 +181,7 @@ impl Abschnitt {
|
||||
Self {
|
||||
ueberschriftPara,
|
||||
absatze,
|
||||
ueberschrifts,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -239,40 +259,27 @@ impl Absatz {
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct UeberschriftPara {
|
||||
pub(crate) struct Ueberschrift {
|
||||
typ: String,
|
||||
content: String,
|
||||
}
|
||||
impl UeberschriftPara {
|
||||
pub(crate) fn test(n: &Node) -> bool {
|
||||
n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == "para"
|
||||
impl Ueberschrift {
|
||||
fn test(n: &Node, typ: &str) -> bool {
|
||||
n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == typ
|
||||
}
|
||||
|
||||
pub(crate) fn parse(n: Node) -> Self {
|
||||
pub(crate) fn parse(n: Node, typ: &str) -> Self {
|
||||
assert!(n.tag_name().name() == "ueberschrift");
|
||||
|
||||
assert_eq!(n.attribute("typ").unwrap(), "para");
|
||||
assert_eq!(n.attribute("typ").unwrap(), typ);
|
||||
|
||||
Self {
|
||||
content: n.text().unwrap().into(),
|
||||
typ: typ.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct UeberschriftTitle;
|
||||
impl UeberschriftTitle {
|
||||
fn test(n: &Node) -> bool {
|
||||
n.tag_name().name() == "ueberschrift" && n.attribute("typ").unwrap() == "titel"
|
||||
}
|
||||
|
||||
pub(crate) fn parse(n: Node) -> Self {
|
||||
assert!(n.tag_name().name() == "ueberschrift");
|
||||
|
||||
assert_eq!(n.attribute("typ").unwrap(), "titel");
|
||||
//TODO parse if necessary
|
||||
Self {}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Kzinhalt;
|
||||
impl Kzinhalt {
|
||||
|
Loading…
Reference in New Issue
Block a user