From 23e2de9d4204389a631752c9ecb347c0cc938f12 Mon Sep 17 00:00:00 2001 From: philipp Date: Fri, 3 Nov 2023 22:40:19 +0100 Subject: [PATCH] Start paring ris law overview --- Cargo.lock | 430 ++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 4 + README.md | 10 ++ src/law/hits.rs | 59 +++++++ src/law/mod.rs | 56 +++++++ src/main.rs | 27 ++- 6 files changed, 584 insertions(+), 2 deletions(-) create mode 100644 src/law/hits.rs create mode 100644 src/law/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 4a09f45..70c90b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,436 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "base64" +version = "0.21.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "deranged" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "form_urlencoded" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "libc" +version = "0.2.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "percent-encoding" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "proc-macro2" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ring" +version = "0.17.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb0205304757e5d899b9c2e448b867ffd03ae7f988002e47cd24954391394d0b" +dependencies = [ + "cc", + "getrandom", + "libc", + "spin", + "untrusted", + "windows-sys", +] + [[package]] name = "risp" version = "0.1.0" +dependencies = [ + "serde", + "serde_json", + "time", + "ureq", +] + +[[package]] +name = "rustls" +version = "0.21.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446e14c5cda4f3f30fe71863c34ec70f5ac79d6087097ad0bb433e1be5edf04c" +dependencies = [ + "log", + "ring", + "rustls-webpki", + "sct", +] + +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "serde" +version = "1.0.190" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.190" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "syn" +version = "2.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "time" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" +dependencies = [ + "deranged", + "itoa", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" +dependencies = [ + "time-core", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "unicode-bidi" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "ureq" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5ccd538d4a604753ebc2f17cd9946e89b77bf87f6a8e2309667c6f2e87855e3" +dependencies = [ + "base64", + "flate2", + "log", + "once_cell", + "rustls", + "rustls-webpki", + "url", + "webpki-roots", +] + +[[package]] +name = "url" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "webpki-roots" +version = "0.25.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" diff --git a/Cargo.toml b/Cargo.toml index 5457001..76ea5a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,3 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +ureq = "2.8" +time = { version = "0.3", features = [ "formatting" ] } +serde = { version = "1.0", features = [ "derive" ] } +serde_json = "1.0" diff --git a/README.md b/README.md index 64ea237..719d54e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +RISolve + # History - [I've created my first parser using RIS API, daily updated. Failed because I tried to do too much automatically (e.g. recognizing headers](https://gitlab.com/PhilippHofer/law) - [Using print-website, I've extracted stuff w/ regex.](https://gitlab.com/PhilippHofer/ris/) @@ -24,3 +26,11 @@ # Next step - [ ] Create struct + parse with deserialize + +# Naming + +- Law ("Gesetz"): e.g. UHG, TEG, ABGB +- Section ("Paragraph") +- Subsection ("Absatz") +- Item ("Ziffer") +- Heading-{1,2,3,...} diff --git a/src/law/hits.rs b/src/law/hits.rs new file mode 100644 index 0000000..de32979 --- /dev/null +++ b/src/law/hits.rs @@ -0,0 +1,59 @@ +use serde::Deserialize; + +#[derive(Deserialize)] +pub(crate) struct Hits { + #[serde( + rename = "@pageNumber", + deserialize_with = "deserialize_string_to_usize" + )] + page_number: usize, + #[serde(rename = "@pageSize", deserialize_with = "deserialize_string_to_usize")] + page_size: usize, + #[serde(rename = "#text", deserialize_with = "deserialize_string_to_usize")] + text: usize, +} + +fn deserialize_string_to_usize<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let s = String::deserialize(deserializer)?; + s.parse().map_err(serde::de::Error::custom) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Deserialize)] + struct Wrapper { + hits: Hits, + } + + #[test] + fn deserialize_hits_success() { + let json = "{\"hits\": { + \"@pageNumber\": \"1\", + \"@pageSize\": \"100\", + \"#text\": \"32\"}}"; + + let wrapper: serde_json::Result = serde_json::from_str(json); + assert!(wrapper.is_ok()); + + let wrapper = wrapper.unwrap(); + assert_eq!(wrapper.hits.page_number, 1); + assert_eq!(wrapper.hits.page_size, 100); + assert_eq!(wrapper.hits.text, 32); + } + + #[test] + fn deserialize_hits_failure() { + let json = "{\"hits\": { + \"@pageNumber\": \"one\", + \"@pageSize\": \"one hundred\", + \"#text\": \"thirty-two\"}}"; + + let wrapper: serde_json::Result = serde_json::from_str(json); + assert!(wrapper.is_err()); + } +} diff --git a/src/law/mod.rs b/src/law/mod.rs new file mode 100644 index 0000000..f41b49d --- /dev/null +++ b/src/law/mod.rs @@ -0,0 +1,56 @@ +mod hits; + +use serde::Deserialize; +use time::{format_description, OffsetDateTime}; + +use crate::Error; + +use self::hits::Hits; + +fn current_date() -> String { + let local_date = OffsetDateTime::now_utc(); + let format = format_description::parse("[year]-[month]-[day]").unwrap(); //OK + local_date.format(&format).expect("Failed to format date") +} + +fn fetch_page(law_id: usize) -> Result { + Ok( + ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht") + .send_form(&[ + ("Applikation", "BrKons"), + ("Gesetzesnummer", &format!("{}", law_id)), + ("DokumenteProSeite", "OneHundred"), + ("Seitennummer", &format!("{}", 1)), + ("Fassung.FassungVom", ¤t_date()), + ])? + .into_string()?, + ) +} + +#[derive(Deserialize)] +struct OgdSearchResult { + #[serde(rename = "OgdSearchResult")] + ogd_document_results: OgdDocumentResults, +} + +#[derive(Deserialize)] +struct OgdDocumentResults { + hits: Hits, + #[serde(rename = "OgdDocumentReference")] + ogd_document_reference: OgdDocumentReference, +} + +#[derive(Deserialize)] +struct OgdDocumentReference { + #[serde(rename = "Data")] + data: Vec, +} + +#[derive(Deserialize)] +struct Data {} + +pub(crate) fn parse(law_id: usize) -> Result<(), Error> { + let result = fetch_page(law_id)?; + println!("{result:#?}"); + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index e7a11a9..71f2690 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,26 @@ -fn main() { - println!("Hello, world!"); +use std::io; + +mod law; + +pub struct Error { + msg: String, +} + +impl From for Error { + fn from(value: ureq::Error) -> Self { + Self { + msg: value.to_string(), + } + } +} +impl From for Error { + fn from(value: io::Error) -> Self { + Self { + msg: value.to_string(), + } + } +} + +fn main() { + law::parse(10001905); }