finish ris law overview parser

This commit is contained in:
philipp 2023-11-04 00:05:38 +01:00
parent 2f9dbcc43c
commit 2348bddc42
5 changed files with 2359 additions and 88 deletions

2158
data/teg.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,59 +0,0 @@
use serde::Deserialize;
#[derive(Deserialize)]
pub(crate) struct Hits {
#[serde(
rename = "@pageNumber",
deserialize_with = "deserialize_string_to_usize"
)]
page_number: usize,
#[serde(rename = "@pageSize", deserialize_with = "deserialize_string_to_usize")]
page_size: usize,
#[serde(rename = "#text", deserialize_with = "deserialize_string_to_usize")]
text: usize,
}
fn deserialize_string_to_usize<'de, D>(deserializer: D) -> Result<usize, D::Error>
where
D: serde::Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
s.parse().map_err(serde::de::Error::custom)
}
#[cfg(test)]
mod tests {
use super::*;
#[derive(Deserialize)]
struct Wrapper {
hits: Hits,
}
#[test]
fn deserialize_hits_success() {
let json = "{\"hits\": {
\"@pageNumber\": \"1\",
\"@pageSize\": \"100\",
\"#text\": \"32\"}}";
let wrapper: serde_json::Result<Wrapper> = serde_json::from_str(json);
assert!(wrapper.is_ok());
let wrapper = wrapper.unwrap();
assert_eq!(wrapper.hits.page_number, 1);
assert_eq!(wrapper.hits.page_size, 100);
assert_eq!(wrapper.hits.text, 32);
}
#[test]
fn deserialize_hits_failure() {
let json = "{\"hits\": {
\"@pageNumber\": \"one\",
\"@pageSize\": \"one hundred\",
\"#text\": \"thirty-two\"}}";
let wrapper: serde_json::Result<Wrapper> = serde_json::from_str(json);
assert!(wrapper.is_err());
}
}

View File

@ -1,11 +1,8 @@
mod hits; mod parser;
use serde::Deserialize;
use time::{format_description, OffsetDateTime}; use time::{format_description, OffsetDateTime};
use crate::Error; use crate::{law::parser::OgdSearchResult, Error};
use self::hits::Hits;
fn current_date() -> String { fn current_date() -> String {
let local_date = OffsetDateTime::now_utc(); let local_date = OffsetDateTime::now_utc();
@ -27,30 +24,10 @@ fn fetch_page(law_id: usize) -> Result<String, Error> {
) )
} }
#[derive(Deserialize)]
struct OgdSearchResult {
#[serde(rename = "OgdSearchResult")]
ogd_document_results: OgdDocumentResults,
}
#[derive(Deserialize)]
struct OgdDocumentResults {
hits: Hits,
#[serde(rename = "OgdDocumentReference")]
ogd_document_reference: OgdDocumentReference,
}
#[derive(Deserialize)]
struct OgdDocumentReference {
#[serde(rename = "Data")]
data: Vec<Data>,
}
#[derive(Deserialize)]
struct Data {}
pub(crate) fn parse(law_id: usize) -> Result<(), Error> { pub(crate) fn parse(law_id: usize) -> Result<(), Error> {
let result = fetch_page(law_id)?; let json = fetch_page(law_id)?;
println!("{result:#?}");
let ogd_search_result: OgdSearchResult = serde_json::from_str(&json)?;
Ok(()) Ok(())
} }

188
src/law/parser.rs Normal file
View File

@ -0,0 +1,188 @@
use std::collections::HashMap;
use serde::Deserialize;
#[derive(Deserialize)]
pub(crate) struct Hits {
#[serde(
rename = "@pageNumber",
deserialize_with = "deserialize_string_to_usize"
)]
page_number: usize,
#[serde(rename = "@pageSize", deserialize_with = "deserialize_string_to_usize")]
page_size: usize,
#[serde(rename = "#text", deserialize_with = "deserialize_string_to_usize")]
text: usize,
}
fn deserialize_string_to_usize<'de, D>(deserializer: D) -> Result<usize, D::Error>
where
D: serde::Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
s.parse().map_err(serde::de::Error::custom)
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct OgdSearchResult {
ogd_document_results: OgdDocumentResults,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct OgdDocumentResults {
hits: Hits,
ogd_document_reference: Vec<OgdDocumentReference>,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct OgdDocumentReference {
data: Data,
}
#[derive(Deserialize)]
pub(crate) struct Data {
#[serde(rename = "Metadaten")]
metadata: Metadata,
#[serde(rename = "Dokumentliste")]
document_list: DocumentList,
}
#[derive(Deserialize)]
pub(crate) struct Metadata {
#[serde(rename = "Technisch")]
technical: TechnicalMetadata,
#[serde(rename = "Allgemein")]
general: GeneralMetadata,
#[serde(rename = "Bundesrecht")]
fed: FedMetadata,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct TechnicalMetadata {
#[serde(rename = "ID")]
id: String,
#[serde(rename = "Applikation")]
application: String,
organ: String,
import_timestamp: ImportTimestamp,
}
#[derive(Deserialize)]
pub(crate) struct ImportTimestamp {
#[serde(rename = "@xsi:nil")]
xsi_nil: String, //TODO: switch to bool
#[serde(rename = "@xmlns:xsi")]
xmlns_xsi: String,
}
#[derive(Deserialize)]
pub(crate) struct GeneralMetadata {
#[serde(rename = "Geaendert")]
changed: String, //TODO: switch to YYYY-MM-DD string
#[serde(rename = "DokumentUrl")]
document_url: String,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct FedMetadata {
#[serde(rename = "Kurztitel")]
short_title: String,
#[serde(rename = "Titel")]
title: Option<String>,
eli: String,
br_kons: BrKons,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct BrKons {
kundmachungsorgan: String,
typ: String,
#[serde(rename = "Dokumenttyp")]
documenttype: String,
artikel_paragraph_anlage: String,
paragraphnummer: Option<String>,
stammnorm_publikationsorgan: String,
stammnorm_bgblnummer: String,
inkrafttretensdatum: String, //TODO: switch to date
#[serde(rename = "Indizes")]
indices: HashMap<String, String>, //e.g. "item": "22/04 Sonstiges Zivilprozess, Außerstreitiges Verfahren"
#[serde(rename = "Aenderung")]
change: String,
anmerkung: Option<String>,
schlagworte: Option<String>,
gesetzesnummer: String,
alte_dokumentnummer: Option<String>,
#[serde(rename = "GesamteRechtsvorschriftUrl")]
full_url: String,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct DocumentList {
content_reference: ContentReference,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct ContentReference {
content_type: String,
name: String,
urls: ContentUrl,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct ContentUrl {
content_url: Vec<ContentUrlItem>,
}
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct ContentUrlItem {
data_type: String,
url: String,
}
#[cfg(test)]
mod tests {
use std::{fs::File, io::Read};
use super::*;
#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
pub(crate) struct Wrapper {
ogd_search_result: OgdSearchResult,
}
#[test]
fn deserialize_teg_success() {
let mut file = File::open("data/teg.json").unwrap();
let mut json = String::new();
file.read_to_string(&mut json).unwrap();
let wrapper: serde_json::Result<Wrapper> = serde_json::from_str(&json);
if wrapper.is_err() {
let dbg = wrapper.as_ref().err().unwrap();
println!("{dbg:#?}");
}
assert!(wrapper.is_ok());
}
}

View File

@ -20,6 +20,13 @@ impl From<io::Error> for Error {
} }
} }
} }
impl From<serde_json::Error> for Error {
fn from(value: serde_json::Error) -> Self {
Self {
msg: value.to_string(),
}
}
}
fn main() { fn main() {
law::parse(10001905); law::parse(10001905);