diff --git a/Cargo.lock b/Cargo.lock index dd85b23..71f9ce6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -336,6 +336,15 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "memchr" version = "2.7.1" @@ -363,6 +372,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -381,6 +400,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "parking_lot" version = "0.12.1" @@ -478,8 +503,17 @@ checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.5", + "regex-syntax 0.8.2", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", ] [[package]] @@ -490,9 +524,15 @@ checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.2", ] +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.8.2" @@ -529,6 +569,7 @@ dependencies = [ "toml", "tqdm", "tracing", + "tracing-subscriber", "ureq", ] @@ -621,6 +662,15 @@ dependencies = [ "serde", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "signal-hook" version = "0.3.17" @@ -706,6 +756,16 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "time" version = "0.3.34" @@ -826,6 +886,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -889,6 +979,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index 7f58dd5..1a8852a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ clap = { version = "4.5.0", features = ["derive"] } directories = "5.0" regex = "1.10" tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } [dev-dependencies] pretty_assertions = "1.4" diff --git a/src/config.rs b/src/config.rs index 352811f..7826958 100644 --- a/src/config.rs +++ b/src/config.rs @@ -24,6 +24,7 @@ use serde::Deserialize; use std::fs; use std::path::Path; use std::sync::Arc; +use tracing::{event, instrument, Level}; // TODO: more generic fn create_classifier(match_function: &str) -> Result { @@ -55,6 +56,7 @@ pub struct Config { } impl Config { + #[instrument(level = "trace", skip(path))] /// Loads a configuration from a specified path and constructs a `LawBuilder` and `Parser` based on it. /// /// This function reads a configuration file from the given path, expecting it to be in TOML format. It then @@ -92,12 +94,14 @@ impl Config { /// /// assert_eq!(law_id, 10001622); /// ``` - pub fn load>(path: P) -> Result<(usize, law::Builder, Parser), Error> { + pub fn load + std::fmt::Debug>( + path: P, + ) -> Result<(usize, law::Builder, Parser), Error> { let config_str = fs::read_to_string(path)?; let config: Config = toml::from_str(&config_str)?; let mut builder = law::Builder::new(config.law.name); - for classifier in config.law.classifiers { + for classifier in &config.law.classifiers { let to_add = law::Classifier::new( &classifier.name, create_classifier(&classifier.match_function)?, @@ -108,18 +112,37 @@ impl Config { builder.add_classifier(to_add); } } + event!( + Level::INFO, + "Added {} classifiers from config", + &config.law.classifiers.len() + ); let mut parser = Parser::new(); - for to_remove in config.parser.remove_strings { - parser.add_string_to_remove(&to_remove); + for to_remove in &config.parser.remove_strings { + parser.add_string_to_remove(to_remove); } + event!( + Level::INFO, + "Added {} strings to remove", + &config.parser.remove_strings.len() + ); - for to_replace in config.parser.replace_rules { + for to_replace in &config.parser.replace_rules { parser.add_string_to_replace(&to_replace.find, &to_replace.replace_with); } + event!( + Level::INFO, + "Added {} strings to replace", + &config.parser.replace_rules.len() + ); if config.parser.move_para_headers_into_content { + event!( + Level::WARN, + "Move para headers into content. Make sure you know what you do!" + ); parser.move_para_headers_into_content(); } Ok((config.law.id, builder, parser)) diff --git a/src/law/mod.rs b/src/law/mod.rs index b0da977..a90035d 100644 --- a/src/law/mod.rs +++ b/src/law/mod.rs @@ -24,6 +24,7 @@ use std::{ rc::Rc, sync::Arc, }; +use tracing::instrument; use crate::{config::Config, misc::Error, overview::parse}; @@ -37,6 +38,7 @@ pub struct Law { } impl Law { + #[instrument] /// Creates a `Law` instance from a configuration file. /// /// This function initializes the law processing pipeline by loading configurations from the @@ -71,7 +73,9 @@ impl Law { /// /// let law = Law::from_config("./data/configs/abgb.toml").unwrap(); /// ``` - pub fn from_config>(path: P) -> Result { + pub fn from_config + tracing::Value + std::fmt::Debug>( + path: P, + ) -> Result { let (law_id, mut builder, parser) = Config::load(path)?; let pars = parse(law_id)?; diff --git a/src/main.rs b/src/main.rs index 8e8f556..6f8815d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,6 +22,8 @@ use risp::{ law::{responsible::always_true, Classifier, Law}, Config, }; +use tracing_subscriber::filter::EnvFilter; +use tracing_subscriber::fmt; #[derive(Parser, Debug)] #[command(version, about, long_about = None)] @@ -39,8 +41,12 @@ struct Args { #[arg(long)] clear_cache: bool, } - +use tracing_subscriber::prelude::*; fn main() { + tracing_subscriber::registry() + .with(fmt::layer()) + .with(EnvFilter::from_default_env()) + .init(); let args = Args::parse(); if args.clear_cache { diff --git a/src/overview/mod.rs b/src/overview/mod.rs index eec9f66..98259fb 100644 --- a/src/overview/mod.rs +++ b/src/overview/mod.rs @@ -21,11 +21,13 @@ mod ris_structure; use std::path::Path; use serde::Deserialize; +use tracing::{event, instrument, Level}; use crate::misc::{current_date, get_cache_dir, Error}; use ris_structure::OgdSearchResult; +#[instrument(level = "trace")] /// Parses a law text from the Austrian RIS (Rechtsinformationssystem) based on the given `law_id`. /// /// This function iterates over all pages of the law text, with each page containing a maximum of 100 @@ -67,6 +69,7 @@ pub fn parse(law_id: usize) -> Result, Error> { let mut ret = Vec::new(); loop { //info!("=== Fetching overview page #{page} ==="); + event!(Level::INFO, "Fetching over page #{page}"); let json = fetch_page(law_id, page)?; let (cont, nodes) = parse_from_str(&json, skip)?; for n in nodes { @@ -113,9 +116,16 @@ fn fetch_page(overview_id: usize, page: usize) -> Result { let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?); if let Ok(data) = fs::read_to_string(&expected_filename) { + event!( + Level::DEBUG, + "Using cached version of law_id {overview_id} (page {page})" + ); Ok(data) } else { - //info!("Not finding law_id {overview_id} (page {page}) in the cache, downloading..."); + event!( + Level::INFO, + "Not finding law_id {overview_id} (page {page}) in the cache, downloading..." + ); let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht") .send_form(&[ ("Applikation", "BrKons"), diff --git a/src/paragraph/parser/mod.rs b/src/paragraph/parser/mod.rs index aa6326e..45e98c0 100644 --- a/src/paragraph/parser/mod.rs +++ b/src/paragraph/parser/mod.rs @@ -76,7 +76,7 @@ impl<'a> Expect<'a> { fn empty(next: Option>) { if let Some(n) = next { let expect = Expect::from(&n); - assert!(false, "Expected no more elements, got {expect}"); + panic!("Expected no more elements, got {expect}"); } } }