This commit is contained in:
@ -701,6 +701,7 @@ impl std::fmt::Debug for Classifier {
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub enum Content {
|
||||
Image(String),
|
||||
Text(String), //This is my direct law text
|
||||
List(Vec<Content>), //(1) This is general law. (2) This is more specific law
|
||||
Multi(Vec<Content>),
|
||||
@ -709,7 +710,7 @@ pub enum Content {
|
||||
impl Display for Content {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Text(a) => f.write_str(&format!("{a}\n")),
|
||||
Self::Text(a) | Self::Image(a) => f.write_str(&format!("{a}\n")),
|
||||
Self::List(a) | Self::Multi(a) => {
|
||||
let mut ret = String::new();
|
||||
for aa in a {
|
||||
|
@ -96,6 +96,7 @@ struct Overview {
|
||||
|
||||
fn parse_from_str(content: &str, skip_first: bool) -> Result<(bool, Vec<String>), Error> {
|
||||
let mut ret = Vec::new();
|
||||
|
||||
let wrapper: Overview = serde_json::from_str(content)?;
|
||||
|
||||
let iter = wrapper.ogd_search_result.get_par().into_iter();
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::{Deserialize, Deserializer};
|
||||
|
||||
fn deserialize_string_to_usize<'de, D>(deserializer: D) -> Result<usize, D::Error>
|
||||
where
|
||||
@ -48,15 +48,11 @@ impl OgdSearchResult {
|
||||
pub(crate) fn get_par(&self) -> Vec<String> {
|
||||
let mut ret = Vec::new();
|
||||
for doc_ref in &self.ogd_document_results.ogd_document_reference {
|
||||
for urls in &doc_ref
|
||||
.data
|
||||
.document_list
|
||||
.content_reference
|
||||
.urls
|
||||
.content_url
|
||||
{
|
||||
if urls.data_type == "Xml" {
|
||||
ret.push(urls.url.clone());
|
||||
for con_refs in &doc_ref.data.document_list.content_reference {
|
||||
for urls in &con_refs.urls.content_url {
|
||||
if urls.data_type == "Xml" {
|
||||
ret.push(urls.url.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -214,7 +210,26 @@ pub(crate) struct BrKons {
|
||||
#[allow(dead_code)]
|
||||
#[serde(rename_all = "PascalCase")]
|
||||
pub(crate) struct DocumentList {
|
||||
content_reference: ContentReference,
|
||||
#[serde(deserialize_with = "deserialize_content_reference")]
|
||||
content_reference: Vec<ContentReference>,
|
||||
}
|
||||
|
||||
fn deserialize_content_reference<'de, D>(deserializer: D) -> Result<Vec<ContentReference>, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
#[derive(Deserialize)]
|
||||
#[serde(untagged)]
|
||||
enum ContentReferenceField {
|
||||
Single(ContentReference),
|
||||
Multiple(Vec<ContentReference>),
|
||||
}
|
||||
|
||||
let field = ContentReferenceField::deserialize(deserializer)?;
|
||||
Ok(match field {
|
||||
ContentReferenceField::Single(item) => vec![item],
|
||||
ContentReferenceField::Multiple(items) => items,
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
@ -230,9 +245,28 @@ pub(crate) struct ContentReference {
|
||||
#[allow(dead_code)]
|
||||
#[serde(rename_all = "PascalCase")]
|
||||
pub(crate) struct ContentUrl {
|
||||
#[serde(deserialize_with = "deserialize_content_url")]
|
||||
content_url: Vec<ContentUrlItem>,
|
||||
}
|
||||
|
||||
fn deserialize_content_url<'de, D>(deserializer: D) -> Result<Vec<ContentUrlItem>, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
#[derive(Deserialize)]
|
||||
#[serde(untagged)]
|
||||
enum ContentUrlField {
|
||||
Single(ContentUrlItem),
|
||||
Multiple(Vec<ContentUrlItem>),
|
||||
}
|
||||
|
||||
let field = ContentUrlField::deserialize(deserializer)?;
|
||||
Ok(match field {
|
||||
ContentUrlField::Single(item) => vec![item],
|
||||
ContentUrlField::Multiple(items) => items,
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[allow(dead_code)]
|
||||
#[serde(rename_all = "PascalCase")]
|
||||
|
@ -60,6 +60,10 @@ impl Absatz {
|
||||
// After a 'absatz' there can be a '<absatz typ="[satz|erltext]"' which should be part of the first absatz
|
||||
// (e.g. 1209 ABGB)
|
||||
content.push(Content::Text(Absatz::parse(c.next().unwrap()).content));
|
||||
} else if child.tag_name().name() == "absatz"
|
||||
&& child.attribute("typ") == Some("abbobj")
|
||||
{
|
||||
content.push(Self::parse_abbobj(c.next().unwrap()));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
@ -71,6 +75,39 @@ impl Absatz {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn parse_abbobj(n: Node) -> Content {
|
||||
Expect::from(&n).tag("absatz").typ("abbobj");
|
||||
let mut ret = Vec::new();
|
||||
|
||||
let mut c = n.children().peekable();
|
||||
|
||||
// skip tab(s)
|
||||
while let Some(child) = c.peek() {
|
||||
if child.tag_name().name() == "tab" {
|
||||
c.next();
|
||||
continue;
|
||||
}
|
||||
|
||||
let binary = c.next().unwrap();
|
||||
|
||||
// TODO: this if should not be necessary...
|
||||
if binary.tag_name().name() != "binary" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut c = binary.children();
|
||||
let src = c.next().unwrap();
|
||||
|
||||
ret.push(Content::Image(src.text().unwrap().into()))
|
||||
}
|
||||
|
||||
if ret.len() == 1 {
|
||||
ret[0].clone()
|
||||
} else {
|
||||
Content::Multi(ret)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn parse(n: Node) -> Self {
|
||||
Expect::from(&n).tag("absatz");
|
||||
|
||||
|
@ -58,6 +58,8 @@ impl Abschnitt {
|
||||
if AbsatzAbs::test(child) {
|
||||
let (_, absatz) = Absatz::parse_full(&mut c);
|
||||
absatze.push(absatz);
|
||||
} else if Ueberschrift::test(child, "erll") {
|
||||
absatze.push(Ueberschrift::parse_full_erll(&mut c));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
// Copyright (C) 2024 Philipp Hofer
|
||||
//
|
||||
//
|
||||
// Licensed under the EUPL, Version 1.2 or - as soon they will be approved by
|
||||
// the European Commission - subsequent versions of the EUPL (the "Licence").
|
||||
// You may not use this work except in compliance with the Licence.
|
||||
@ -22,6 +23,7 @@ mod table;
|
||||
use std::{fmt::Display, iter::Peekable};
|
||||
|
||||
use abschnitt::Abschnitt;
|
||||
use liste::Liste;
|
||||
use roxmltree::{Children, Node};
|
||||
|
||||
use crate::{
|
||||
@ -418,6 +420,36 @@ impl Ueberschrift {
|
||||
typ: typ.into(),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_full_erll(n: &mut Peekable<Children>) -> Content {
|
||||
let mut ret = Vec::new();
|
||||
let mut curr = Vec::new();
|
||||
|
||||
// We need at least 1 erll
|
||||
curr.push(Content::Text(
|
||||
Self::parse(n.next().unwrap(), "erll").content,
|
||||
));
|
||||
|
||||
while let Some(child) = &mut n.peek() {
|
||||
if Absatz::test_with_typ(child, "abbobj") {
|
||||
curr.push(Absatz::parse_abbobj(n.next().unwrap()));
|
||||
} else if Liste::test(child) {
|
||||
curr.push(Content::List(Liste::parse_full(n).content));
|
||||
} else if Absatz::test_with_typ(child, "abs") {
|
||||
let (_, absatz) = Absatz::parse_full(n);
|
||||
curr.push(absatz);
|
||||
} else if Ueberschrift::test(child, "erll") {
|
||||
ret.push(Content::Multi(curr));
|
||||
curr = vec![Content::Text(
|
||||
Self::parse(n.next().unwrap(), "erll").content,
|
||||
)];
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Content::Multi(ret)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
|
Reference in New Issue
Block a user