use cache by default, add clear-cache argument
Some checks failed
CI/CD Pipeline / test (push) Failing after 1m30s

This commit is contained in:
philipp 2024-02-15 09:24:30 +01:00
parent c511e5a4d8
commit ddb2ebe5b7
6 changed files with 134 additions and 41 deletions

60
Cargo.lock generated
View File

@ -89,6 +89,12 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf"
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.0.83" version = "1.0.83"
@ -165,7 +171,7 @@ version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67" checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67"
dependencies = [ dependencies = [
"bitflags", "bitflags 1.3.2",
"crossterm_winapi", "crossterm_winapi",
"libc", "libc",
"mio", "mio",
@ -199,6 +205,27 @@ version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "directories"
version = "5.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a49173b84e034382284f27f1af4dcbbd231ffa358c0fe316541a7337f376a35"
dependencies = [
"dirs-sys",
]
[[package]]
name = "dirs-sys"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c"
dependencies = [
"libc",
"option-ext",
"redox_users",
"windows-sys 0.48.0",
]
[[package]] [[package]]
name = "env_filter" name = "env_filter"
version = "0.1.0" version = "0.1.0"
@ -349,6 +376,17 @@ version = "0.2.153"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
[[package]]
name = "libredox"
version = "0.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8"
dependencies = [
"bitflags 2.4.2",
"libc",
"redox_syscall",
]
[[package]] [[package]]
name = "lock_api" name = "lock_api"
version = "0.4.11" version = "0.4.11"
@ -404,6 +442,12 @@ version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "option-ext"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
[[package]] [[package]]
name = "parking_lot" name = "parking_lot"
version = "0.12.1" version = "0.12.1"
@ -473,7 +517,18 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
dependencies = [ dependencies = [
"bitflags", "bitflags 1.3.2",
]
[[package]]
name = "redox_users"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4"
dependencies = [
"getrandom",
"libredox",
"thiserror",
] ]
[[package]] [[package]]
@ -524,6 +579,7 @@ name = "risp"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"clap", "clap",
"directories",
"env_logger", "env_logger",
"log", "log",
"pretty_assertions", "pretty_assertions",

View File

@ -16,6 +16,7 @@ log = "0.4"
tqdm = "0.6" tqdm = "0.6"
toml = "0.8" toml = "0.8"
clap = { version = "4.5.0", features = ["derive"] } clap = { version = "4.5.0", features = ["derive"] }
directories = "5.0"
[dev-dependencies] [dev-dependencies]
pretty_assertions = "1.4" pretty_assertions = "1.4"

View File

@ -1,5 +1,5 @@
use clap::{command, Parser}; use clap::{command, Parser};
use risp::law::Law; use risp::{law::Law, misc::clear_cache};
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
#[command(version, about, long_about = None)] #[command(version, about, long_about = None)]
@ -7,12 +7,22 @@ struct Args {
/// Path to the config of a law text /// Path to the config of a law text
#[arg(short, long)] #[arg(short, long)]
config: String, config: String,
/// Clears the cache (downloaded laws + paragraphs)
clear_cache: bool,
} }
fn main() { fn main() {
env_logger::init(); env_logger::init();
let args = Args::parse(); let args = Args::parse();
if args.clear_cache {
if let Err(e) = clear_cache() {
println!("Failed to clear cache: {e:?}");
}
}
let law = Law::from_config(&args.config).unwrap(); let law = Law::from_config(&args.config).unwrap();
law.to_md(); law.to_md();

View File

@ -1,11 +1,11 @@
use std::io; use std::{fs, io, path::Path};
use time::{format_description, OffsetDateTime}; use time::{format_description, OffsetDateTime};
#[derive(Debug)] #[derive(Debug)]
#[allow(dead_code)] #[allow(dead_code)]
pub struct Error { pub struct Error {
msg: String, pub(crate) msg: String,
} }
impl Error { impl Error {
@ -57,3 +57,40 @@ pub(crate) fn current_date() -> String {
let format = format_description::parse("[year]-[month]-[day]").unwrap(); //unwrap okay, supplied format is fine let format = format_description::parse("[year]-[month]-[day]").unwrap(); //unwrap okay, supplied format is fine
local_date.format(&format).expect("Failed to format date") local_date.format(&format).expect("Failed to format date")
} }
#[cfg(not(test))]
pub(crate) fn get_cache_dir() -> Result<String, Error> {
let cache_dir = directories::BaseDirs::new().ok_or(Error {
msg: "directories crate could not find basedirs.".into(),
})?;
let cache_dir = cache_dir.cache_dir();
Ok(format!("{}/risp/", cache_dir.to_str().unwrap()))
}
#[cfg(test)]
pub(crate) fn get_cache_dir() -> Result<String, Error> {
Ok("./data/cache/".into())
}
pub fn clear_cache() -> Result<(), Error> {
Ok(delete_all_in_dir(Path::new(&get_cache_dir()?))?)
}
fn delete_all_in_dir<P: AsRef<Path>>(dir_path: P) -> std::io::Result<()> {
let entries = fs::read_dir(dir_path)?;
for entry in entries {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
delete_all_in_dir(&path)?;
fs::remove_dir(&path)?;
} else {
// If it's a file, delete it
fs::remove_file(&path)?;
}
}
Ok(())
}

View File

@ -2,10 +2,12 @@
mod ris_structure; mod ris_structure;
use std::path::Path;
use log::info; use log::info;
use serde::Deserialize; use serde::Deserialize;
use crate::misc::{current_date, Error}; use crate::misc::{current_date, get_cache_dir, Error};
use ris_structure::OgdSearchResult; use ris_structure::OgdSearchResult;
@ -82,30 +84,10 @@ fn parse_from_str(content: &str, skip_first: bool) -> Result<(bool, Vec<String>)
Ok((true, ret)) Ok((true, ret))
} }
#[cfg(not(test))]
/// Fetches the json content of the given overview (`law_id`) from the RIS API.
///
/// # Errors
/// Fails if `ureq` can't create a connection, probably because there's no internet connection? (Or RIS is not online.)
fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
Ok(
ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
.send_form(&[
("Applikation", "BrKons"),
("Gesetzesnummer", &format!("{overview_id}")),
("DokumenteProSeite", "OneHundred"),
("Seitennummer", &format!("{page}")),
("Fassung.FassungVom", &current_date()),
])?
.into_string()?,
)
}
#[cfg(test)]
fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> { fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
use std::fs; use std::fs;
let expected_filename = format!("./data/cache/law-{overview_id}-{page}"); let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?);
match fs::read_to_string(&expected_filename) { match fs::read_to_string(&expected_filename) {
Ok(data) => Ok(data), Ok(data) => Ok(data),
@ -120,6 +102,11 @@ fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
("Fassung.FassungVom", &current_date()), ("Fassung.FassungVom", &current_date()),
])? ])?
.into_string()?; .into_string()?;
let path = Path::new(&expected_filename);
if let Some(parent) = path.parent() {
// Try to create the directory (and any necessary parent directories)
fs::create_dir_all(parent).expect("Unable to create directory");
}
fs::write(expected_filename, &data).expect("Unable to write file"); fs::write(expected_filename, &data).expect("Unable to write file");
Ok(data) Ok(data)
} }

View File

@ -1,9 +1,18 @@
//! Deals with getting all paragraphs for a given law text //! Deals with getting all paragraphs for a given law text
mod parser; mod parser;
use std::{
fs,
hash::{DefaultHasher, Hash, Hasher},
path::Path,
};
use log::info; use log::info;
use crate::{law::LawBuilder, misc::Error}; use crate::{
law::LawBuilder,
misc::{get_cache_dir, Error},
};
use self::parser::Risdok; use self::parser::Risdok;
@ -57,30 +66,23 @@ impl Parser {
} }
} }
#[cfg(not(test))]
fn fetch(url: &str) -> Result<String, Error> { fn fetch(url: &str) -> Result<String, Error> {
Ok(ureq::get(url).call()?.into_string()?)
}
#[cfg(test)]
fn fetch(url: &str) -> Result<String, Error> {
use std::{
collections::hash_map::DefaultHasher,
fs,
hash::{Hash, Hasher},
};
let mut hasher = DefaultHasher::new(); let mut hasher = DefaultHasher::new();
url.hash(&mut hasher); url.hash(&mut hasher);
let hash = format!("{:x}", hasher.finish()); let hash = format!("{:x}", hasher.finish());
let expected_filename = format!("./data/cache/par-{hash}"); let expected_filename = format!("{}par-{hash}", get_cache_dir()?);
match fs::read_to_string(&expected_filename) { match fs::read_to_string(&expected_filename) {
Ok(data) => Ok(data), Ok(data) => Ok(data),
Err(_) => { Err(_) => {
info!("Not finding url {url} in the cache, downloading..."); info!("Not finding url {url} in the cache, downloading...");
let data = ureq::get(url).call()?.into_string()?; let data = ureq::get(url).call()?.into_string()?;
let path = Path::new(&expected_filename);
if let Some(parent) = path.parent() {
// Try to create the directory (and any necessary parent directories)
fs::create_dir_all(parent).expect("Unable to create directory");
}
fs::write(expected_filename, &data).expect("Unable to write file"); fs::write(expected_filename, &data).expect("Unable to write file");
Ok(data) Ok(data)
} }