Start paring ris law overview

This commit is contained in:
philipp 2023-11-03 22:40:19 +01:00
parent 1aa664b536
commit 23e2de9d42
6 changed files with 584 additions and 2 deletions

430
Cargo.lock generated
View File

@ -2,6 +2,436 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "base64"
version = "0.21.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9"
[[package]]
name = "cc"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"libc",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [
"cfg-if",
]
[[package]]
name = "deranged"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3"
dependencies = [
"powerfmt",
]
[[package]]
name = "flate2"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "form_urlencoded"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652"
dependencies = [
"percent-encoding",
]
[[package]]
name = "getrandom"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "idna"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c"
dependencies = [
"unicode-bidi",
"unicode-normalization",
]
[[package]]
name = "itoa"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
[[package]]
name = "libc"
version = "0.2.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b"
[[package]]
name = "log"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "miniz_oxide"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
dependencies = [
"adler",
]
[[package]]
name = "once_cell"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "percent-encoding"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
[[package]]
name = "powerfmt"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "proc-macro2"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
dependencies = [
"proc-macro2",
]
[[package]]
name = "ring"
version = "0.17.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb0205304757e5d899b9c2e448b867ffd03ae7f988002e47cd24954391394d0b"
dependencies = [
"cc",
"getrandom",
"libc",
"spin",
"untrusted",
"windows-sys",
]
[[package]]
name = "risp"
version = "0.1.0"
dependencies = [
"serde",
"serde_json",
"time",
"ureq",
]
[[package]]
name = "rustls"
version = "0.21.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "446e14c5cda4f3f30fe71863c34ec70f5ac79d6087097ad0bb433e1be5edf04c"
dependencies = [
"log",
"ring",
"rustls-webpki",
"sct",
]
[[package]]
name = "rustls-webpki"
version = "0.101.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
dependencies = [
"ring",
"untrusted",
]
[[package]]
name = "ryu"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
[[package]]
name = "sct"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
dependencies = [
"ring",
"untrusted",
]
[[package]]
name = "serde"
version = "1.0.190"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.190"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "spin"
version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
[[package]]
name = "syn"
version = "2.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "time"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5"
dependencies = [
"deranged",
"itoa",
"powerfmt",
"serde",
"time-core",
"time-macros",
]
[[package]]
name = "time-core"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
[[package]]
name = "time-macros"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20"
dependencies = [
"time-core",
]
[[package]]
name = "tinyvec"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "unicode-bidi"
version = "0.3.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "unicode-normalization"
version = "0.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921"
dependencies = [
"tinyvec",
]
[[package]]
name = "untrusted"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
[[package]]
name = "ureq"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5ccd538d4a604753ebc2f17cd9946e89b77bf87f6a8e2309667c6f2e87855e3"
dependencies = [
"base64",
"flate2",
"log",
"once_cell",
"rustls",
"rustls-webpki",
"url",
"webpki-roots",
]
[[package]]
name = "url"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5"
dependencies = [
"form_urlencoded",
"idna",
"percent-encoding",
]
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "webpki-roots"
version = "0.25.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc"
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"

View File

@ -6,3 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
ureq = "2.8"
time = { version = "0.3", features = [ "formatting" ] }
serde = { version = "1.0", features = [ "derive" ] }
serde_json = "1.0"

View File

@ -1,3 +1,5 @@
RISolve
# History
- [I've created my first parser using RIS API, daily updated. Failed because I tried to do too much automatically (e.g. recognizing headers](https://gitlab.com/PhilippHofer/law)
- [Using print-website, I've extracted stuff w/ regex.](https://gitlab.com/PhilippHofer/ris/)
@ -24,3 +26,11 @@
# Next step
- [ ] Create struct + parse with deserialize
# Naming
- Law ("Gesetz"): e.g. UHG, TEG, ABGB
- Section ("Paragraph")
- Subsection ("Absatz")
- Item ("Ziffer")
- Heading-{1,2,3,...}

59
src/law/hits.rs Normal file
View File

@ -0,0 +1,59 @@
use serde::Deserialize;
#[derive(Deserialize)]
pub(crate) struct Hits {
#[serde(
rename = "@pageNumber",
deserialize_with = "deserialize_string_to_usize"
)]
page_number: usize,
#[serde(rename = "@pageSize", deserialize_with = "deserialize_string_to_usize")]
page_size: usize,
#[serde(rename = "#text", deserialize_with = "deserialize_string_to_usize")]
text: usize,
}
fn deserialize_string_to_usize<'de, D>(deserializer: D) -> Result<usize, D::Error>
where
D: serde::Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
s.parse().map_err(serde::de::Error::custom)
}
#[cfg(test)]
mod tests {
use super::*;
#[derive(Deserialize)]
struct Wrapper {
hits: Hits,
}
#[test]
fn deserialize_hits_success() {
let json = "{\"hits\": {
\"@pageNumber\": \"1\",
\"@pageSize\": \"100\",
\"#text\": \"32\"}}";
let wrapper: serde_json::Result<Wrapper> = serde_json::from_str(json);
assert!(wrapper.is_ok());
let wrapper = wrapper.unwrap();
assert_eq!(wrapper.hits.page_number, 1);
assert_eq!(wrapper.hits.page_size, 100);
assert_eq!(wrapper.hits.text, 32);
}
#[test]
fn deserialize_hits_failure() {
let json = "{\"hits\": {
\"@pageNumber\": \"one\",
\"@pageSize\": \"one hundred\",
\"#text\": \"thirty-two\"}}";
let wrapper: serde_json::Result<Wrapper> = serde_json::from_str(json);
assert!(wrapper.is_err());
}
}

56
src/law/mod.rs Normal file
View File

@ -0,0 +1,56 @@
mod hits;
use serde::Deserialize;
use time::{format_description, OffsetDateTime};
use crate::Error;
use self::hits::Hits;
fn current_date() -> String {
let local_date = OffsetDateTime::now_utc();
let format = format_description::parse("[year]-[month]-[day]").unwrap(); //OK
local_date.format(&format).expect("Failed to format date")
}
fn fetch_page(law_id: usize) -> Result<String, Error> {
Ok(
ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
.send_form(&[
("Applikation", "BrKons"),
("Gesetzesnummer", &format!("{}", law_id)),
("DokumenteProSeite", "OneHundred"),
("Seitennummer", &format!("{}", 1)),
("Fassung.FassungVom", &current_date()),
])?
.into_string()?,
)
}
#[derive(Deserialize)]
struct OgdSearchResult {
#[serde(rename = "OgdSearchResult")]
ogd_document_results: OgdDocumentResults,
}
#[derive(Deserialize)]
struct OgdDocumentResults {
hits: Hits,
#[serde(rename = "OgdDocumentReference")]
ogd_document_reference: OgdDocumentReference,
}
#[derive(Deserialize)]
struct OgdDocumentReference {
#[serde(rename = "Data")]
data: Vec<Data>,
}
#[derive(Deserialize)]
struct Data {}
pub(crate) fn parse(law_id: usize) -> Result<(), Error> {
let result = fetch_page(law_id)?;
println!("{result:#?}");
Ok(())
}

View File

@ -1,3 +1,26 @@
fn main() {
println!("Hello, world!");
use std::io;
mod law;
pub struct Error {
msg: String,
}
impl From<ureq::Error> for Error {
fn from(value: ureq::Error) -> Self {
Self {
msg: value.to_string(),
}
}
}
impl From<io::Error> for Error {
fn from(value: io::Error) -> Self {
Self {
msg: value.to_string(),
}
}
}
fn main() {
law::parse(10001905);
}