start including tracing
All checks were successful
CI/CD Pipeline / test (push) Successful in 2m11s

This commit is contained in:
2024-02-27 16:03:24 +01:00
parent 2f077a447c
commit cb55a074d7
7 changed files with 152 additions and 12 deletions

View File

@ -24,6 +24,7 @@ use serde::Deserialize;
use std::fs;
use std::path::Path;
use std::sync::Arc;
use tracing::{event, instrument, Level};
// TODO: more generic
fn create_classifier(match_function: &str) -> Result<ClassifierApplicable, Error> {
@ -55,6 +56,7 @@ pub struct Config {
}
impl Config {
#[instrument(level = "trace", skip(path))]
/// Loads a configuration from a specified path and constructs a `LawBuilder` and `Parser` based on it.
///
/// This function reads a configuration file from the given path, expecting it to be in TOML format. It then
@ -92,12 +94,14 @@ impl Config {
///
/// assert_eq!(law_id, 10001622);
/// ```
pub fn load<P: AsRef<Path>>(path: P) -> Result<(usize, law::Builder, Parser), Error> {
pub fn load<P: AsRef<Path> + std::fmt::Debug>(
path: P,
) -> Result<(usize, law::Builder, Parser), Error> {
let config_str = fs::read_to_string(path)?;
let config: Config = toml::from_str(&config_str)?;
let mut builder = law::Builder::new(config.law.name);
for classifier in config.law.classifiers {
for classifier in &config.law.classifiers {
let to_add = law::Classifier::new(
&classifier.name,
create_classifier(&classifier.match_function)?,
@ -108,18 +112,37 @@ impl Config {
builder.add_classifier(to_add);
}
}
event!(
Level::INFO,
"Added {} classifiers from config",
&config.law.classifiers.len()
);
let mut parser = Parser::new();
for to_remove in config.parser.remove_strings {
parser.add_string_to_remove(&to_remove);
for to_remove in &config.parser.remove_strings {
parser.add_string_to_remove(to_remove);
}
event!(
Level::INFO,
"Added {} strings to remove",
&config.parser.remove_strings.len()
);
for to_replace in config.parser.replace_rules {
for to_replace in &config.parser.replace_rules {
parser.add_string_to_replace(&to_replace.find, &to_replace.replace_with);
}
event!(
Level::INFO,
"Added {} strings to replace",
&config.parser.replace_rules.len()
);
if config.parser.move_para_headers_into_content {
event!(
Level::WARN,
"Move para headers into content. Make sure you know what you do!"
);
parser.move_para_headers_into_content();
}
Ok((config.law.id, builder, parser))

View File

@ -24,6 +24,7 @@ use std::{
rc::Rc,
sync::Arc,
};
use tracing::instrument;
use crate::{config::Config, misc::Error, overview::parse};
@ -37,6 +38,7 @@ pub struct Law {
}
impl Law {
#[instrument]
/// Creates a `Law` instance from a configuration file.
///
/// This function initializes the law processing pipeline by loading configurations from the
@ -71,7 +73,9 @@ impl Law {
///
/// let law = Law::from_config("./data/configs/abgb.toml").unwrap();
/// ```
pub fn from_config<P: AsRef<Path>>(path: P) -> Result<Law, Error> {
pub fn from_config<P: AsRef<Path> + tracing::Value + std::fmt::Debug>(
path: P,
) -> Result<Law, Error> {
let (law_id, mut builder, parser) = Config::load(path)?;
let pars = parse(law_id)?;

View File

@ -22,6 +22,8 @@ use risp::{
law::{responsible::always_true, Classifier, Law},
Config,
};
use tracing_subscriber::filter::EnvFilter;
use tracing_subscriber::fmt;
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
@ -39,8 +41,12 @@ struct Args {
#[arg(long)]
clear_cache: bool,
}
use tracing_subscriber::prelude::*;
fn main() {
tracing_subscriber::registry()
.with(fmt::layer())
.with(EnvFilter::from_default_env())
.init();
let args = Args::parse();
if args.clear_cache {

View File

@ -21,11 +21,13 @@ mod ris_structure;
use std::path::Path;
use serde::Deserialize;
use tracing::{event, instrument, Level};
use crate::misc::{current_date, get_cache_dir, Error};
use ris_structure::OgdSearchResult;
#[instrument(level = "trace")]
/// Parses a law text from the Austrian RIS (Rechtsinformationssystem) based on the given `law_id`.
///
/// This function iterates over all pages of the law text, with each page containing a maximum of 100
@ -67,6 +69,7 @@ pub fn parse(law_id: usize) -> Result<Vec<String>, Error> {
let mut ret = Vec::new();
loop {
//info!("=== Fetching overview page #{page} ===");
event!(Level::INFO, "Fetching over page #{page}");
let json = fetch_page(law_id, page)?;
let (cont, nodes) = parse_from_str(&json, skip)?;
for n in nodes {
@ -113,9 +116,16 @@ fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?);
if let Ok(data) = fs::read_to_string(&expected_filename) {
event!(
Level::DEBUG,
"Using cached version of law_id {overview_id} (page {page})"
);
Ok(data)
} else {
//info!("Not finding law_id {overview_id} (page {page}) in the cache, downloading...");
event!(
Level::INFO,
"Not finding law_id {overview_id} (page {page}) in the cache, downloading..."
);
let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
.send_form(&[
("Applikation", "BrKons"),

View File

@ -76,7 +76,7 @@ impl<'a> Expect<'a> {
fn empty(next: Option<Node<'_, '_>>) {
if let Some(n) = next {
let expect = Expect::from(&n);
assert!(false, "Expected no more elements, got {expect}");
panic!("Expected no more elements, got {expect}");
}
}
}