This commit is contained in:
@ -24,6 +24,7 @@ use serde::Deserialize;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use tracing::{event, instrument, Level};
|
||||
|
||||
// TODO: more generic
|
||||
fn create_classifier(match_function: &str) -> Result<ClassifierApplicable, Error> {
|
||||
@ -55,6 +56,7 @@ pub struct Config {
|
||||
}
|
||||
|
||||
impl Config {
|
||||
#[instrument(level = "trace", skip(path))]
|
||||
/// Loads a configuration from a specified path and constructs a `LawBuilder` and `Parser` based on it.
|
||||
///
|
||||
/// This function reads a configuration file from the given path, expecting it to be in TOML format. It then
|
||||
@ -92,12 +94,14 @@ impl Config {
|
||||
///
|
||||
/// assert_eq!(law_id, 10001622);
|
||||
/// ```
|
||||
pub fn load<P: AsRef<Path>>(path: P) -> Result<(usize, law::Builder, Parser), Error> {
|
||||
pub fn load<P: AsRef<Path> + std::fmt::Debug>(
|
||||
path: P,
|
||||
) -> Result<(usize, law::Builder, Parser), Error> {
|
||||
let config_str = fs::read_to_string(path)?;
|
||||
let config: Config = toml::from_str(&config_str)?;
|
||||
|
||||
let mut builder = law::Builder::new(config.law.name);
|
||||
for classifier in config.law.classifiers {
|
||||
for classifier in &config.law.classifiers {
|
||||
let to_add = law::Classifier::new(
|
||||
&classifier.name,
|
||||
create_classifier(&classifier.match_function)?,
|
||||
@ -108,18 +112,37 @@ impl Config {
|
||||
builder.add_classifier(to_add);
|
||||
}
|
||||
}
|
||||
event!(
|
||||
Level::INFO,
|
||||
"Added {} classifiers from config",
|
||||
&config.law.classifiers.len()
|
||||
);
|
||||
|
||||
let mut parser = Parser::new();
|
||||
|
||||
for to_remove in config.parser.remove_strings {
|
||||
parser.add_string_to_remove(&to_remove);
|
||||
for to_remove in &config.parser.remove_strings {
|
||||
parser.add_string_to_remove(to_remove);
|
||||
}
|
||||
event!(
|
||||
Level::INFO,
|
||||
"Added {} strings to remove",
|
||||
&config.parser.remove_strings.len()
|
||||
);
|
||||
|
||||
for to_replace in config.parser.replace_rules {
|
||||
for to_replace in &config.parser.replace_rules {
|
||||
parser.add_string_to_replace(&to_replace.find, &to_replace.replace_with);
|
||||
}
|
||||
event!(
|
||||
Level::INFO,
|
||||
"Added {} strings to replace",
|
||||
&config.parser.replace_rules.len()
|
||||
);
|
||||
|
||||
if config.parser.move_para_headers_into_content {
|
||||
event!(
|
||||
Level::WARN,
|
||||
"Move para headers into content. Make sure you know what you do!"
|
||||
);
|
||||
parser.move_para_headers_into_content();
|
||||
}
|
||||
Ok((config.law.id, builder, parser))
|
||||
|
@ -24,6 +24,7 @@ use std::{
|
||||
rc::Rc,
|
||||
sync::Arc,
|
||||
};
|
||||
use tracing::instrument;
|
||||
|
||||
use crate::{config::Config, misc::Error, overview::parse};
|
||||
|
||||
@ -37,6 +38,7 @@ pub struct Law {
|
||||
}
|
||||
|
||||
impl Law {
|
||||
#[instrument]
|
||||
/// Creates a `Law` instance from a configuration file.
|
||||
///
|
||||
/// This function initializes the law processing pipeline by loading configurations from the
|
||||
@ -71,7 +73,9 @@ impl Law {
|
||||
///
|
||||
/// let law = Law::from_config("./data/configs/abgb.toml").unwrap();
|
||||
/// ```
|
||||
pub fn from_config<P: AsRef<Path>>(path: P) -> Result<Law, Error> {
|
||||
pub fn from_config<P: AsRef<Path> + tracing::Value + std::fmt::Debug>(
|
||||
path: P,
|
||||
) -> Result<Law, Error> {
|
||||
let (law_id, mut builder, parser) = Config::load(path)?;
|
||||
let pars = parse(law_id)?;
|
||||
|
||||
|
@ -22,6 +22,8 @@ use risp::{
|
||||
law::{responsible::always_true, Classifier, Law},
|
||||
Config,
|
||||
};
|
||||
use tracing_subscriber::filter::EnvFilter;
|
||||
use tracing_subscriber::fmt;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(version, about, long_about = None)]
|
||||
@ -39,8 +41,12 @@ struct Args {
|
||||
#[arg(long)]
|
||||
clear_cache: bool,
|
||||
}
|
||||
|
||||
use tracing_subscriber::prelude::*;
|
||||
fn main() {
|
||||
tracing_subscriber::registry()
|
||||
.with(fmt::layer())
|
||||
.with(EnvFilter::from_default_env())
|
||||
.init();
|
||||
let args = Args::parse();
|
||||
|
||||
if args.clear_cache {
|
||||
|
@ -21,11 +21,13 @@ mod ris_structure;
|
||||
use std::path::Path;
|
||||
|
||||
use serde::Deserialize;
|
||||
use tracing::{event, instrument, Level};
|
||||
|
||||
use crate::misc::{current_date, get_cache_dir, Error};
|
||||
|
||||
use ris_structure::OgdSearchResult;
|
||||
|
||||
#[instrument(level = "trace")]
|
||||
/// Parses a law text from the Austrian RIS (Rechtsinformationssystem) based on the given `law_id`.
|
||||
///
|
||||
/// This function iterates over all pages of the law text, with each page containing a maximum of 100
|
||||
@ -67,6 +69,7 @@ pub fn parse(law_id: usize) -> Result<Vec<String>, Error> {
|
||||
let mut ret = Vec::new();
|
||||
loop {
|
||||
//info!("=== Fetching overview page #{page} ===");
|
||||
event!(Level::INFO, "Fetching over page #{page}");
|
||||
let json = fetch_page(law_id, page)?;
|
||||
let (cont, nodes) = parse_from_str(&json, skip)?;
|
||||
for n in nodes {
|
||||
@ -113,9 +116,16 @@ fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
|
||||
|
||||
let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?);
|
||||
if let Ok(data) = fs::read_to_string(&expected_filename) {
|
||||
event!(
|
||||
Level::DEBUG,
|
||||
"Using cached version of law_id {overview_id} (page {page})"
|
||||
);
|
||||
Ok(data)
|
||||
} else {
|
||||
//info!("Not finding law_id {overview_id} (page {page}) in the cache, downloading...");
|
||||
event!(
|
||||
Level::INFO,
|
||||
"Not finding law_id {overview_id} (page {page}) in the cache, downloading..."
|
||||
);
|
||||
let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
|
||||
.send_form(&[
|
||||
("Applikation", "BrKons"),
|
||||
|
@ -76,7 +76,7 @@ impl<'a> Expect<'a> {
|
||||
fn empty(next: Option<Node<'_, '_>>) {
|
||||
if let Some(n) = next {
|
||||
let expect = Expect::from(&n);
|
||||
assert!(false, "Expected no more elements, got {expect}");
|
||||
panic!("Expected no more elements, got {expect}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user