risp/src/overview/parser.rs
2023-11-04 11:43:35 +01:00

243 lines
5.9 KiB
Rust

use std::collections::HashMap;
use serde::Deserialize;
fn deserialize_string_to_usize<'de, D>(deserializer: D) -> Result<usize, D::Error>
where
D: serde::Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
s.parse().map_err(serde::de::Error::custom)
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct OgdSearchResult {
ogd_document_results: OgdDocumentResults,
}
impl OgdSearchResult {
fn has_next_page(&self) -> bool {
todo!();
}
pub(crate) fn get_par(&self) -> Vec<String> {
let mut ret = Vec::new();
for doc_ref in &self.ogd_document_results.ogd_document_reference {
for urls in &doc_ref
.data
.document_list
.content_reference
.urls
.content_url
{
if urls.data_type == "Xml" {
ret.push(urls.url.clone());
}
}
}
ret
}
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct OgdDocumentResults {
hits: Hits,
ogd_document_reference: Vec<OgdDocumentReference>,
}
#[derive(Deserialize)]
pub(crate) struct Hits {
#[serde(
rename = "@pageNumber",
deserialize_with = "deserialize_string_to_usize"
)]
page_number: usize,
#[serde(rename = "@pageSize", deserialize_with = "deserialize_string_to_usize")]
page_size: usize,
#[serde(rename = "#text", deserialize_with = "deserialize_string_to_usize")]
text: usize,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct OgdDocumentReference {
data: Data,
}
#[derive(Deserialize)]
pub(crate) struct Data {
#[serde(rename = "Metadaten")]
metadata: Metadata,
#[serde(rename = "Dokumentliste")]
document_list: DocumentList,
}
#[derive(Deserialize)]
pub(crate) struct Metadata {
#[serde(rename = "Technisch")]
technical: TechnicalMetadata,
#[serde(rename = "Allgemein")]
general: GeneralMetadata,
#[serde(rename = "Bundesrecht")]
fed: FedMetadata,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct TechnicalMetadata {
#[serde(rename = "ID")]
id: String,
#[serde(rename = "Applikation")]
application: String,
organ: String,
import_timestamp: ImportTimestamp,
}
#[derive(Deserialize)]
pub(crate) struct ImportTimestamp {
#[serde(rename = "@xsi:nil")]
xsi_nil: String, //TODO: switch to bool
#[serde(rename = "@xmlns:xsi")]
xmlns_xsi: String,
}
#[derive(Deserialize)]
pub(crate) struct GeneralMetadata {
#[serde(rename = "Geaendert")]
changed: String, //TODO: switch to YYYY-MM-DD string
#[serde(rename = "DokumentUrl")]
document_url: String,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct FedMetadata {
#[serde(rename = "Kurztitel")]
short_title: String,
#[serde(rename = "Titel")]
title: Option<String>,
eli: String,
br_kons: BrKons,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct BrKons {
kundmachungsorgan: String,
typ: String,
#[serde(rename = "Dokumenttyp")]
documenttype: String,
artikel_paragraph_anlage: String,
paragraphnummer: Option<String>,
stammnorm_publikationsorgan: String,
stammnorm_bgblnummer: String,
inkrafttretensdatum: String, //TODO: switch to date
#[serde(rename = "Indizes")]
indices: HashMap<String, String>, //e.g. "item": "22/04 Sonstiges Zivilprozess, Außerstreitiges Verfahren"
#[serde(rename = "Aenderung")]
change: String,
anmerkung: Option<String>,
schlagworte: Option<String>,
gesetzesnummer: String,
alte_dokumentnummer: Option<String>,
#[serde(rename = "GesamteRechtsvorschriftUrl")]
full_url: String,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct DocumentList {
content_reference: ContentReference,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct ContentReference {
content_type: String,
name: String,
urls: ContentUrl,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct ContentUrl {
content_url: Vec<ContentUrlItem>,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct ContentUrlItem {
data_type: String,
url: String,
}
#[cfg(test)]
mod tests {
use std::{fs::File, io::Read};
use super::*;
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct Wrapper {
ogd_search_result: OgdSearchResult,
}
#[test]
fn deserialize_teg_success() {
let mut file = File::open("data/teg.json").unwrap();
let mut json = String::new();
file.read_to_string(&mut json).unwrap();
let wrapper: serde_json::Result<Wrapper> = serde_json::from_str(&json);
if wrapper.is_err() {
let dbg = wrapper.as_ref().err().unwrap();
println!("{dbg:#?}");
}
assert!(wrapper.is_ok());
}
#[test]
fn deserialize_abgb_success() {
let mut file = File::open("data/abgb.json").unwrap();
let mut json = String::new();
file.read_to_string(&mut json).unwrap();
let wrapper: serde_json::Result<Wrapper> = serde_json::from_str(&json);
if wrapper.is_err() {
let dbg = wrapper.as_ref().err().unwrap();
println!("{dbg:#?}");
}
assert!(wrapper.is_ok());
}
#[test]
fn deserialize_urhg_success() {
let mut file = File::open("data/urhg.json").unwrap();
let mut json = String::new();
file.read_to_string(&mut json).unwrap();
let wrapper: serde_json::Result<Wrapper> = serde_json::from_str(&json);
if wrapper.is_err() {
let dbg = wrapper.as_ref().err().unwrap();
println!("{dbg:#?}");
}
assert!(wrapper.is_ok());
}
}