start including tracing
All checks were successful
CI/CD Pipeline / test (push) Successful in 2m11s

This commit is contained in:
philipp 2024-02-27 16:03:24 +01:00
parent 2f077a447c
commit cb55a074d7
7 changed files with 152 additions and 12 deletions

102
Cargo.lock generated
View File

@ -336,6 +336,15 @@ version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "matchers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
dependencies = [
"regex-automata 0.1.10",
]
[[package]]
name = "memchr"
version = "2.7.1"
@ -363,6 +372,16 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
dependencies = [
"overload",
"winapi",
]
[[package]]
name = "num-conv"
version = "0.1.0"
@ -381,6 +400,12 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "parking_lot"
version = "0.12.1"
@ -478,8 +503,17 @@ checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
"regex-automata 0.4.5",
"regex-syntax 0.8.2",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
]
[[package]]
@ -490,9 +524,15 @@ checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"regex-syntax 0.8.2",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "regex-syntax"
version = "0.8.2"
@ -529,6 +569,7 @@ dependencies = [
"toml",
"tqdm",
"tracing",
"tracing-subscriber",
"ureq",
]
@ -621,6 +662,15 @@ dependencies = [
"serde",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
dependencies = [
"lazy_static",
]
[[package]]
name = "signal-hook"
version = "0.3.17"
@ -706,6 +756,16 @@ dependencies = [
"syn",
]
[[package]]
name = "thread_local"
version = "1.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "time"
version = "0.3.34"
@ -826,6 +886,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
dependencies = [
"once_cell",
"valuable",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
dependencies = [
"matchers",
"nu-ansi-term",
"once_cell",
"regex",
"sharded-slab",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
"tracing-log",
]
[[package]]
@ -889,6 +979,12 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "valuable"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"

View File

@ -17,6 +17,7 @@ clap = { version = "4.5.0", features = ["derive"] }
directories = "5.0"
regex = "1.10"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
[dev-dependencies]
pretty_assertions = "1.4"

View File

@ -24,6 +24,7 @@ use serde::Deserialize;
use std::fs;
use std::path::Path;
use std::sync::Arc;
use tracing::{event, instrument, Level};
// TODO: more generic
fn create_classifier(match_function: &str) -> Result<ClassifierApplicable, Error> {
@ -55,6 +56,7 @@ pub struct Config {
}
impl Config {
#[instrument(level = "trace", skip(path))]
/// Loads a configuration from a specified path and constructs a `LawBuilder` and `Parser` based on it.
///
/// This function reads a configuration file from the given path, expecting it to be in TOML format. It then
@ -92,12 +94,14 @@ impl Config {
///
/// assert_eq!(law_id, 10001622);
/// ```
pub fn load<P: AsRef<Path>>(path: P) -> Result<(usize, law::Builder, Parser), Error> {
pub fn load<P: AsRef<Path> + std::fmt::Debug>(
path: P,
) -> Result<(usize, law::Builder, Parser), Error> {
let config_str = fs::read_to_string(path)?;
let config: Config = toml::from_str(&config_str)?;
let mut builder = law::Builder::new(config.law.name);
for classifier in config.law.classifiers {
for classifier in &config.law.classifiers {
let to_add = law::Classifier::new(
&classifier.name,
create_classifier(&classifier.match_function)?,
@ -108,18 +112,37 @@ impl Config {
builder.add_classifier(to_add);
}
}
event!(
Level::INFO,
"Added {} classifiers from config",
&config.law.classifiers.len()
);
let mut parser = Parser::new();
for to_remove in config.parser.remove_strings {
parser.add_string_to_remove(&to_remove);
for to_remove in &config.parser.remove_strings {
parser.add_string_to_remove(to_remove);
}
event!(
Level::INFO,
"Added {} strings to remove",
&config.parser.remove_strings.len()
);
for to_replace in config.parser.replace_rules {
for to_replace in &config.parser.replace_rules {
parser.add_string_to_replace(&to_replace.find, &to_replace.replace_with);
}
event!(
Level::INFO,
"Added {} strings to replace",
&config.parser.replace_rules.len()
);
if config.parser.move_para_headers_into_content {
event!(
Level::WARN,
"Move para headers into content. Make sure you know what you do!"
);
parser.move_para_headers_into_content();
}
Ok((config.law.id, builder, parser))

View File

@ -24,6 +24,7 @@ use std::{
rc::Rc,
sync::Arc,
};
use tracing::instrument;
use crate::{config::Config, misc::Error, overview::parse};
@ -37,6 +38,7 @@ pub struct Law {
}
impl Law {
#[instrument]
/// Creates a `Law` instance from a configuration file.
///
/// This function initializes the law processing pipeline by loading configurations from the
@ -71,7 +73,9 @@ impl Law {
///
/// let law = Law::from_config("./data/configs/abgb.toml").unwrap();
/// ```
pub fn from_config<P: AsRef<Path>>(path: P) -> Result<Law, Error> {
pub fn from_config<P: AsRef<Path> + tracing::Value + std::fmt::Debug>(
path: P,
) -> Result<Law, Error> {
let (law_id, mut builder, parser) = Config::load(path)?;
let pars = parse(law_id)?;

View File

@ -22,6 +22,8 @@ use risp::{
law::{responsible::always_true, Classifier, Law},
Config,
};
use tracing_subscriber::filter::EnvFilter;
use tracing_subscriber::fmt;
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
@ -39,8 +41,12 @@ struct Args {
#[arg(long)]
clear_cache: bool,
}
use tracing_subscriber::prelude::*;
fn main() {
tracing_subscriber::registry()
.with(fmt::layer())
.with(EnvFilter::from_default_env())
.init();
let args = Args::parse();
if args.clear_cache {

View File

@ -21,11 +21,13 @@ mod ris_structure;
use std::path::Path;
use serde::Deserialize;
use tracing::{event, instrument, Level};
use crate::misc::{current_date, get_cache_dir, Error};
use ris_structure::OgdSearchResult;
#[instrument(level = "trace")]
/// Parses a law text from the Austrian RIS (Rechtsinformationssystem) based on the given `law_id`.
///
/// This function iterates over all pages of the law text, with each page containing a maximum of 100
@ -67,6 +69,7 @@ pub fn parse(law_id: usize) -> Result<Vec<String>, Error> {
let mut ret = Vec::new();
loop {
//info!("=== Fetching overview page #{page} ===");
event!(Level::INFO, "Fetching over page #{page}");
let json = fetch_page(law_id, page)?;
let (cont, nodes) = parse_from_str(&json, skip)?;
for n in nodes {
@ -113,9 +116,16 @@ fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?);
if let Ok(data) = fs::read_to_string(&expected_filename) {
event!(
Level::DEBUG,
"Using cached version of law_id {overview_id} (page {page})"
);
Ok(data)
} else {
//info!("Not finding law_id {overview_id} (page {page}) in the cache, downloading...");
event!(
Level::INFO,
"Not finding law_id {overview_id} (page {page}) in the cache, downloading..."
);
let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
.send_form(&[
("Applikation", "BrKons"),

View File

@ -76,7 +76,7 @@ impl<'a> Expect<'a> {
fn empty(next: Option<Node<'_, '_>>) {
if let Some(n) = next {
let expect = Expect::from(&n);
assert!(false, "Expected no more elements, got {expect}");
panic!("Expected no more elements, got {expect}");
}
}
}