remove log crate (in favor of tracing)
All checks were successful
CI/CD Pipeline / test (push) Successful in 2m9s
All checks were successful
CI/CD Pipeline / test (push) Successful in 2m9s
This commit is contained in:
parent
b4d464506c
commit
2f077a447c
69
Cargo.lock
generated
69
Cargo.lock
generated
@ -223,29 +223,6 @@ dependencies = [
|
|||||||
"windows-sys 0.48.0",
|
"windows-sys 0.48.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "env_filter"
|
|
||||||
version = "0.1.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea"
|
|
||||||
dependencies = [
|
|
||||||
"log",
|
|
||||||
"regex",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "env_logger"
|
|
||||||
version = "0.11.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "6c012a26a7f605efc424dd53697843a72be7dc86ad2d01f7814337794a12231d"
|
|
||||||
dependencies = [
|
|
||||||
"anstream",
|
|
||||||
"anstyle",
|
|
||||||
"env_filter",
|
|
||||||
"humantime",
|
|
||||||
"log",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "equivalent"
|
name = "equivalent"
|
||||||
version = "1.0.1"
|
version = "1.0.1"
|
||||||
@ -294,12 +271,6 @@ version = "0.4.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "humantime"
|
|
||||||
version = "2.1.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "idna"
|
name = "idna"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
@ -439,6 +410,12 @@ version = "2.3.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pin-project-lite"
|
||||||
|
version = "0.2.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "powerfmt"
|
name = "powerfmt"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
@ -543,8 +520,6 @@ version = "0.1.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"directories",
|
"directories",
|
||||||
"env_logger",
|
|
||||||
"log",
|
|
||||||
"pretty_assertions",
|
"pretty_assertions",
|
||||||
"regex",
|
"regex",
|
||||||
"roxmltree",
|
"roxmltree",
|
||||||
@ -553,6 +528,7 @@ dependencies = [
|
|||||||
"time",
|
"time",
|
||||||
"toml",
|
"toml",
|
||||||
"tqdm",
|
"tqdm",
|
||||||
|
"tracing",
|
||||||
"ureq",
|
"ureq",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -821,6 +797,37 @@ dependencies = [
|
|||||||
"lazy_static",
|
"lazy_static",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tracing"
|
||||||
|
version = "0.1.40"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
|
||||||
|
dependencies = [
|
||||||
|
"pin-project-lite",
|
||||||
|
"tracing-attributes",
|
||||||
|
"tracing-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tracing-attributes"
|
||||||
|
version = "0.1.27"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tracing-core"
|
||||||
|
version = "0.1.32"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
|
||||||
|
dependencies = [
|
||||||
|
"once_cell",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-bidi"
|
name = "unicode-bidi"
|
||||||
version = "0.3.15"
|
version = "0.3.15"
|
||||||
|
@ -11,13 +11,12 @@ time = { version = "0.3", features = [ "formatting" ] }
|
|||||||
serde = { version = "1.0", features = [ "derive" ] }
|
serde = { version = "1.0", features = [ "derive" ] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
roxmltree = "0.19"
|
roxmltree = "0.19"
|
||||||
env_logger = "0.11"
|
|
||||||
log = "0.4"
|
|
||||||
tqdm = "0.6"
|
tqdm = "0.6"
|
||||||
toml = "0.8"
|
toml = "0.8"
|
||||||
clap = { version = "4.5.0", features = ["derive"] }
|
clap = { version = "4.5.0", features = ["derive"] }
|
||||||
directories = "5.0"
|
directories = "5.0"
|
||||||
regex = "1.10"
|
regex = "1.10"
|
||||||
|
tracing = "0.1"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
pretty_assertions = "1.4"
|
pretty_assertions = "1.4"
|
||||||
|
@ -16,7 +16,6 @@
|
|||||||
|
|
||||||
//! Represents a parsed law text.
|
//! Represents a parsed law text.
|
||||||
|
|
||||||
use log::{debug, info};
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::{
|
use std::{
|
||||||
cell::RefCell,
|
cell::RefCell,
|
||||||
@ -313,14 +312,10 @@ impl Builder {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.history.push(format!("New_header: {name}"));
|
self.history.push(format!("New_header: {name}"));
|
||||||
|
|
||||||
info!("new_header={name}");
|
|
||||||
|
|
||||||
let responsible_class = self
|
let responsible_class = self
|
||||||
.responsible_classifier(name)
|
.responsible_classifier(name)
|
||||||
.unwrap_or_else(|| panic!("No classifier for '{name}'"));
|
.unwrap_or_else(|| panic!("No classifier for '{name}'"));
|
||||||
|
|
||||||
debug!("Responsible_class = {responsible_class:#?}");
|
|
||||||
|
|
||||||
let mut heading: ClassifierInstance = name.into();
|
let mut heading: ClassifierInstance = name.into();
|
||||||
|
|
||||||
if let Some(last_instance) = &self.last_instance {
|
if let Some(last_instance) = &self.last_instance {
|
||||||
@ -407,7 +402,6 @@ impl Builder {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.history.push(format!("New desc: {desc}"));
|
self.history.push(format!("New desc: {desc}"));
|
||||||
|
|
||||||
debug!("new_desc={desc}");
|
|
||||||
self.last_instance
|
self.last_instance
|
||||||
.clone()
|
.clone()
|
||||||
.expect("We can only set a description, if we already have received a classifier.")
|
.expect("We can only set a description, if we already have received a classifier.")
|
||||||
@ -476,8 +470,6 @@ impl Builder {
|
|||||||
serde_json::to_string(&content).unwrap()
|
serde_json::to_string(&content).unwrap()
|
||||||
));
|
));
|
||||||
|
|
||||||
debug!("new_par=par:{par};content:{content:#?}");
|
|
||||||
|
|
||||||
let par_header = self.next_para_header.clone();
|
let par_header = self.next_para_header.clone();
|
||||||
self.next_para_header = None;
|
self.next_para_header = None;
|
||||||
|
|
||||||
@ -505,7 +497,6 @@ impl Builder {
|
|||||||
self.new_header(&next_para_header.clone()); // promote to bigger header :-)
|
self.new_header(&next_para_header.clone()); // promote to bigger header :-)
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!("new_next_para_header={header}");
|
|
||||||
self.next_para_header = Some(header.trim().into());
|
self.next_para_header = Some(header.trim().into());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,8 +41,6 @@ struct Args {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
env_logger::init();
|
|
||||||
|
|
||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
|
|
||||||
if args.clear_cache {
|
if args.clear_cache {
|
||||||
|
@ -20,7 +20,6 @@ mod ris_structure;
|
|||||||
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use log::info;
|
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
|
||||||
use crate::misc::{current_date, get_cache_dir, Error};
|
use crate::misc::{current_date, get_cache_dir, Error};
|
||||||
@ -67,7 +66,7 @@ pub fn parse(law_id: usize) -> Result<Vec<String>, Error> {
|
|||||||
let mut skip = true;
|
let mut skip = true;
|
||||||
let mut ret = Vec::new();
|
let mut ret = Vec::new();
|
||||||
loop {
|
loop {
|
||||||
info!("=== Fetching overview page #{page} ===");
|
//info!("=== Fetching overview page #{page} ===");
|
||||||
let json = fetch_page(law_id, page)?;
|
let json = fetch_page(law_id, page)?;
|
||||||
let (cont, nodes) = parse_from_str(&json, skip)?;
|
let (cont, nodes) = parse_from_str(&json, skip)?;
|
||||||
for n in nodes {
|
for n in nodes {
|
||||||
@ -116,7 +115,7 @@ fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
|
|||||||
if let Ok(data) = fs::read_to_string(&expected_filename) {
|
if let Ok(data) = fs::read_to_string(&expected_filename) {
|
||||||
Ok(data)
|
Ok(data)
|
||||||
} else {
|
} else {
|
||||||
info!("Not finding law_id {overview_id} (page {page}) in the cache, downloading...");
|
//info!("Not finding law_id {overview_id} (page {page}) in the cache, downloading...");
|
||||||
let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
|
let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
|
||||||
.send_form(&[
|
.send_form(&[
|
||||||
("Applikation", "BrKons"),
|
("Applikation", "BrKons"),
|
||||||
|
@ -25,8 +25,6 @@ use std::{
|
|||||||
path::Path,
|
path::Path,
|
||||||
};
|
};
|
||||||
|
|
||||||
use log::info;
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
law,
|
law,
|
||||||
misc::{fetch_with_retries, get_cache_dir, Error},
|
misc::{fetch_with_retries, get_cache_dir, Error},
|
||||||
@ -126,7 +124,6 @@ impl Parser {
|
|||||||
/// );
|
/// );
|
||||||
/// ```
|
/// ```
|
||||||
pub fn parse(&self, url: &str, builder: &mut law::Builder) -> Result<bool, Error> {
|
pub fn parse(&self, url: &str, builder: &mut law::Builder) -> Result<bool, Error> {
|
||||||
info!("Parsing {url}");
|
|
||||||
let xml = fetch(url)?;
|
let xml = fetch(url)?;
|
||||||
|
|
||||||
let xml = xml.replace('\u{a0}', " ");
|
let xml = xml.replace('\u{a0}', " ");
|
||||||
@ -193,7 +190,6 @@ fn fetch(url: &str) -> Result<String, Error> {
|
|||||||
if let Ok(data) = fs::read_to_string(&expected_filename) {
|
if let Ok(data) = fs::read_to_string(&expected_filename) {
|
||||||
Ok(data)
|
Ok(data)
|
||||||
} else {
|
} else {
|
||||||
info!("Not finding url {url} in the cache, downloading...");
|
|
||||||
let data = fetch_with_retries(url)?;
|
let data = fetch_with_retries(url)?;
|
||||||
let path = Path::new(&expected_filename);
|
let path = Path::new(&expected_filename);
|
||||||
if let Some(parent) = path.parent() {
|
if let Some(parent) = path.parent() {
|
||||||
|
@ -16,7 +16,6 @@
|
|||||||
|
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
|
|
||||||
use log::trace;
|
|
||||||
use roxmltree::{Children, Node};
|
use roxmltree::{Children, Node};
|
||||||
|
|
||||||
use crate::law::Content;
|
use crate::law::Content;
|
||||||
@ -40,7 +39,6 @@ impl Absatz {
|
|||||||
// - String: (optional) paragraph id
|
// - String: (optional) paragraph id
|
||||||
// - Content: content of the paragraph
|
// - Content: content of the paragraph
|
||||||
pub(crate) fn parse_full(c: &mut Peekable<Children>) -> (Option<String>, Content) {
|
pub(crate) fn parse_full(c: &mut Peekable<Children>) -> (Option<String>, Content) {
|
||||||
trace!("Parsing absatz...");
|
|
||||||
let absatz = AbsatzAbs::parse(c.next().unwrap());
|
let absatz = AbsatzAbs::parse(c.next().unwrap());
|
||||||
let par_id = absatz.gldsym;
|
let par_id = absatz.gldsym;
|
||||||
|
|
||||||
@ -50,7 +48,6 @@ impl Absatz {
|
|||||||
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
|
// If there's a "liste" after an "absatz", the "liste" should be part of the "absatz"
|
||||||
while let Some(child) = c.peek() {
|
while let Some(child) = c.peek() {
|
||||||
if Liste::test(child) {
|
if Liste::test(child) {
|
||||||
trace!("Found liste inside absatz, parsing...");
|
|
||||||
let liste = Liste::parse_full(c).content;
|
let liste = Liste::parse_full(c).content;
|
||||||
content.extend(liste);
|
content.extend(liste);
|
||||||
} else if Table::test(child) {
|
} else if Table::test(child) {
|
||||||
|
@ -17,7 +17,6 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
|
|
||||||
use log::{debug, trace};
|
|
||||||
use roxmltree::{Children, Node};
|
use roxmltree::{Children, Node};
|
||||||
|
|
||||||
use crate::law;
|
use crate::law;
|
||||||
@ -72,7 +71,6 @@ impl Abschnitt {
|
|||||||
builder.new_par(par_id, Content::List(contents));
|
builder.new_par(par_id, Content::List(contents));
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!("Handling post metadata");
|
|
||||||
ret.handle_metadata(&mut c, builder);
|
ret.handle_metadata(&mut c, builder);
|
||||||
|
|
||||||
// Skip all UeberschriftTitle and Absatz
|
// Skip all UeberschriftTitle and Absatz
|
||||||
@ -103,7 +101,6 @@ impl Abschnitt {
|
|||||||
|
|
||||||
// We are done with meta-data parsing
|
// We are done with meta-data parsing
|
||||||
if key == "Text" {
|
if key == "Text" {
|
||||||
trace!("Done parsing metadata, got 'Text'");
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -128,7 +125,6 @@ impl Abschnitt {
|
|||||||
builder.add_next_para_note(value.clone());
|
builder.add_next_para_note(value.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
trace!("Parsed metadata: key='{key}', value='{value}'");
|
|
||||||
self.metadata.insert(key, value);
|
self.metadata.insert(key, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,6 @@
|
|||||||
|
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
|
|
||||||
use log::trace;
|
|
||||||
use roxmltree::{Children, Node};
|
use roxmltree::{Children, Node};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -34,7 +33,6 @@ impl Liste {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn parse_full(n: &mut Peekable<Children>) -> Self {
|
pub(crate) fn parse_full(n: &mut Peekable<Children>) -> Self {
|
||||||
trace!("Parsing liste...");
|
|
||||||
Expect::from(n.peek().unwrap()).tag("liste");
|
Expect::from(n.peek().unwrap()).tag("liste");
|
||||||
|
|
||||||
let mut content = Vec::new();
|
let mut content = Vec::new();
|
||||||
@ -44,17 +42,12 @@ impl Liste {
|
|||||||
// Parse stuff inside <liste>
|
// Parse stuff inside <liste>
|
||||||
while let Some(child) = c.peek() {
|
while let Some(child) = c.peek() {
|
||||||
if Ziffernliste::test(child) {
|
if Ziffernliste::test(child) {
|
||||||
trace!("Found Ziffernliste in liste, parsing...");
|
|
||||||
let liste = Ziffernliste::parse(&mut c);
|
let liste = Ziffernliste::parse(&mut c);
|
||||||
content.push(liste.get_content());
|
content.push(liste.get_content());
|
||||||
} else if Schlussteil::test(child) {
|
} else if Schlussteil::test(child) {
|
||||||
// 162 Schifffahrtsgesetz show use that a 'schlussteil' can be at the start of a list
|
// 162 Schifffahrtsgesetz show use that a 'schlussteil' can be at the start of a list
|
||||||
content.push(Content::Text(Schlussteil::parse(c.next().unwrap()).content));
|
content.push(Content::Text(Schlussteil::parse(c.next().unwrap()).content));
|
||||||
} else {
|
} else {
|
||||||
trace!(
|
|
||||||
"No more acceptable element in the list found: '{}'",
|
|
||||||
child.tag_name().name()
|
|
||||||
);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -22,7 +22,6 @@ mod table;
|
|||||||
use std::{fmt::Display, iter::Peekable};
|
use std::{fmt::Display, iter::Peekable};
|
||||||
|
|
||||||
use abschnitt::Abschnitt;
|
use abschnitt::Abschnitt;
|
||||||
use log::trace;
|
|
||||||
use roxmltree::{Children, Node};
|
use roxmltree::{Children, Node};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -117,7 +116,6 @@ impl Risdok {
|
|||||||
|
|
||||||
pub(crate) fn from_str(xml: &str, builder: &mut law::Builder) -> Result<bool, Error> {
|
pub(crate) fn from_str(xml: &str, builder: &mut law::Builder) -> Result<bool, Error> {
|
||||||
let doc = roxmltree::Document::parse(xml)?;
|
let doc = roxmltree::Document::parse(xml)?;
|
||||||
trace!("{doc:?}");
|
|
||||||
let root = doc.root();
|
let root = doc.root();
|
||||||
assert_eq!(root.children().count(), 1);
|
assert_eq!(root.children().count(), 1);
|
||||||
Ok(Self::parse(root.children().next().unwrap(), builder))
|
Ok(Self::parse(root.children().next().unwrap(), builder))
|
||||||
@ -183,7 +181,6 @@ impl Listelem {
|
|||||||
|
|
||||||
let text = c.next().unwrap().text().unwrap().into();
|
let text = c.next().unwrap().text().unwrap().into();
|
||||||
|
|
||||||
trace!("Parsed Listelem with text='{text}'");
|
|
||||||
Expect::empty(c.next());
|
Expect::empty(c.next());
|
||||||
|
|
||||||
Self { symbol, text }
|
Self { symbol, text }
|
||||||
@ -211,7 +208,6 @@ impl Ziffernliste {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn parse(c: &mut Peekable<Children>) -> Self {
|
pub(crate) fn parse(c: &mut Peekable<Children>) -> Self {
|
||||||
trace!("Parsing Ziffernliste...");
|
|
||||||
let n = c.next().unwrap();
|
let n = c.next().unwrap();
|
||||||
|
|
||||||
assert!(Self::test(&n));
|
assert!(Self::test(&n));
|
||||||
|
Loading…
Reference in New Issue
Block a user