This commit is contained in:
parent
287d7bdb8b
commit
cc6bedfdf1
@ -165,7 +165,7 @@ impl PartialEq for LawBuilder {
|
|||||||
|
|
||||||
impl Default for LawBuilder {
|
impl Default for LawBuilder {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self::new("".into())
|
Self::new(String::new())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -347,7 +347,7 @@ impl fmt::Display for Section {
|
|||||||
format!("{}\n{}", self.symb, self.content)
|
format!("{}\n{}", self.symb, self.content)
|
||||||
};
|
};
|
||||||
if let Some(note) = &self.par_note {
|
if let Some(note) = &self.par_note {
|
||||||
to_write.push_str(&format!("\nBeachte: {}\n", note));
|
to_write.push_str(&format!("\nBeachte: {note}\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
f.write_str(&to_write)
|
f.write_str(&to_write)
|
||||||
|
@ -104,28 +104,26 @@ fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> {
|
|||||||
use std::fs;
|
use std::fs;
|
||||||
|
|
||||||
let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?);
|
let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?);
|
||||||
|
if let Ok(data) = fs::read_to_string(&expected_filename) {
|
||||||
match fs::read_to_string(&expected_filename) {
|
Ok(data)
|
||||||
Ok(data) => Ok(data),
|
} else {
|
||||||
Err(_) => {
|
info!("Not finding law_id {overview_id} (page {page}) in the cache, downloading...");
|
||||||
info!("Not finding law_id {overview_id} (page {page}) in the cache, downloading...");
|
let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
|
||||||
let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht")
|
.send_form(&[
|
||||||
.send_form(&[
|
("Applikation", "BrKons"),
|
||||||
("Applikation", "BrKons"),
|
("Gesetzesnummer", &format!("{overview_id}")),
|
||||||
("Gesetzesnummer", &format!("{overview_id}")),
|
("DokumenteProSeite", "OneHundred"),
|
||||||
("DokumenteProSeite", "OneHundred"),
|
("Seitennummer", &format!("{page}")),
|
||||||
("Seitennummer", &format!("{page}")),
|
("Fassung.FassungVom", ¤t_date()),
|
||||||
("Fassung.FassungVom", ¤t_date()),
|
])?
|
||||||
])?
|
.into_string()?;
|
||||||
.into_string()?;
|
let path = Path::new(&expected_filename);
|
||||||
let path = Path::new(&expected_filename);
|
if let Some(parent) = path.parent() {
|
||||||
if let Some(parent) = path.parent() {
|
// Try to create the directory (and any necessary parent directories)
|
||||||
// Try to create the directory (and any necessary parent directories)
|
fs::create_dir_all(parent).expect("Unable to create directory");
|
||||||
fs::create_dir_all(parent).expect("Unable to create directory");
|
|
||||||
}
|
|
||||||
fs::write(expected_filename, &data).expect("Unable to write file");
|
|
||||||
Ok(data)
|
|
||||||
}
|
}
|
||||||
|
fs::write(expected_filename, &data).expect("Unable to write file");
|
||||||
|
Ok(data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@ use std::{
|
|||||||
path::Path,
|
path::Path,
|
||||||
};
|
};
|
||||||
|
|
||||||
use log::{info};
|
use log::info;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
law::LawBuilder,
|
law::LawBuilder,
|
||||||
@ -87,7 +87,7 @@ impl Parser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let xml = if self.move_para_headers_into_content {
|
let xml = if self.move_para_headers_into_content {
|
||||||
Self::do_move_para_headers_into_content(xml)
|
Self::do_move_para_headers_into_content(&xml)
|
||||||
} else {
|
} else {
|
||||||
xml
|
xml
|
||||||
};
|
};
|
||||||
@ -95,8 +95,8 @@ impl Parser {
|
|||||||
Risdok::from_str(&xml, builder)
|
Risdok::from_str(&xml, builder)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn do_move_para_headers_into_content(xml: String) -> String {
|
fn do_move_para_headers_into_content(xml: &str) -> String {
|
||||||
let mut result = String::from(&xml);
|
let mut result = String::from(xml);
|
||||||
let ueberschrift_regex = Regex::new(
|
let ueberschrift_regex = Regex::new(
|
||||||
"<ueberschrift typ=\"[^\"]*\" ct=\"[^\"]*\" halign=\"[^\"]*\">(§.*?)</ueberschrift>",
|
"<ueberschrift typ=\"[^\"]*\" ct=\"[^\"]*\" halign=\"[^\"]*\">(§.*?)</ueberschrift>",
|
||||||
)
|
)
|
||||||
@ -116,7 +116,7 @@ impl Parser {
|
|||||||
// Insert the <gldsym> tag with the ueberschrift content into the result string
|
// Insert the <gldsym> tag with the ueberschrift content into the result string
|
||||||
result.insert_str(
|
result.insert_str(
|
||||||
insert_point,
|
insert_point,
|
||||||
&format!("<gldsym>{}</gldsym>", ueberschrift_content),
|
&format!("<gldsym>{ueberschrift_content}</gldsym>"),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -133,20 +133,18 @@ fn fetch(url: &str) -> Result<String, Error> {
|
|||||||
let hash = format!("{:x}", hasher.finish());
|
let hash = format!("{:x}", hasher.finish());
|
||||||
|
|
||||||
let expected_filename = format!("{}par-{hash}", get_cache_dir()?);
|
let expected_filename = format!("{}par-{hash}", get_cache_dir()?);
|
||||||
|
if let Ok(data) = fs::read_to_string(&expected_filename) {
|
||||||
match fs::read_to_string(&expected_filename) {
|
Ok(data)
|
||||||
Ok(data) => Ok(data),
|
} else {
|
||||||
Err(_) => {
|
info!("Not finding url {url} in the cache, downloading...");
|
||||||
info!("Not finding url {url} in the cache, downloading...");
|
let data = fetch_with_retries(url)?;
|
||||||
let data = fetch_with_retries(url)?;
|
let path = Path::new(&expected_filename);
|
||||||
let path = Path::new(&expected_filename);
|
if let Some(parent) = path.parent() {
|
||||||
if let Some(parent) = path.parent() {
|
// Try to create the directory (and any necessary parent directories)
|
||||||
// Try to create the directory (and any necessary parent directories)
|
fs::create_dir_all(parent).expect("Unable to create directory");
|
||||||
fs::create_dir_all(parent).expect("Unable to create directory");
|
|
||||||
}
|
|
||||||
fs::write(expected_filename, &data).expect("Unable to write file");
|
|
||||||
Ok(data)
|
|
||||||
}
|
}
|
||||||
|
fs::write(expected_filename, &data).expect("Unable to write file");
|
||||||
|
Ok(data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,7 +62,7 @@ impl Absatz {
|
|||||||
{
|
{
|
||||||
// After a 'absatz' there can be a '<absatz typ="[satz|erltext]"' which should be part of the first absatz
|
// After a 'absatz' there can be a '<absatz typ="[satz|erltext]"' which should be part of the first absatz
|
||||||
// (e.g. 1209 ABGB)
|
// (e.g. 1209 ABGB)
|
||||||
content.push(Content::Text(Absatz::parse(c.next().unwrap()).content))
|
content.push(Content::Text(Absatz::parse(c.next().unwrap()).content));
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -41,7 +41,7 @@ impl Abschnitt {
|
|||||||
|
|
||||||
ret.handle_metadata(&mut c, builder);
|
ret.handle_metadata(&mut c, builder);
|
||||||
|
|
||||||
if !ret.handle_headers(&mut c, builder) {
|
if !Self::handle_headers(&mut c, builder) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -49,9 +49,8 @@ impl Abschnitt {
|
|||||||
|
|
||||||
// Special handling of first paragraph (needs id)...
|
// Special handling of first paragraph (needs id)...
|
||||||
let (par_id, first_abs) = Absatz::parse_full(&mut c);
|
let (par_id, first_abs) = Absatz::parse_full(&mut c);
|
||||||
let par_id = match par_id {
|
let Some(par_id) = par_id else {
|
||||||
Some(par_id) => par_id,
|
panic!("First paragraph needs to have an id, not found")
|
||||||
None => panic!("First paragraph needs to have an id, not found"),
|
|
||||||
};
|
};
|
||||||
absatze.push(first_abs);
|
absatze.push(first_abs);
|
||||||
|
|
||||||
@ -118,9 +117,10 @@ impl Abschnitt {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
value = value.trim().into();
|
value = value.trim().into();
|
||||||
if value.is_empty() {
|
assert!(
|
||||||
panic!("Expected at least on erltext-absatz after title meta-data");
|
!value.is_empty(),
|
||||||
}
|
"Expected at least on erltext-absatz after title meta-data"
|
||||||
|
);
|
||||||
|
|
||||||
// We want ot use this information in our markdown output.
|
// We want ot use this information in our markdown output.
|
||||||
// TODO: Use all metadata, instead of this specific call
|
// TODO: Use all metadata, instead of this specific call
|
||||||
@ -146,7 +146,7 @@ impl Abschnitt {
|
|||||||
// we have optionally headers. Such as "Einleitung", "Von den bürgerlichen Gesetzen üerhaupt,"
|
// we have optionally headers. Such as "Einleitung", "Von den bürgerlichen Gesetzen üerhaupt,"
|
||||||
// etc. If we have headers which indicate that we are done and we want to stop parsing
|
// etc. If we have headers which indicate that we are done and we want to stop parsing
|
||||||
// ("anlage" + "Artikel" we indicate this wish by returning false.
|
// ("anlage" + "Artikel" we indicate this wish by returning false.
|
||||||
fn handle_headers(&self, c: &mut Peekable<Children>, builder: &mut LawBuilder) -> bool {
|
fn handle_headers(c: &mut Peekable<Children>, builder: &mut LawBuilder) -> bool {
|
||||||
while let Some(child) = c.peek() {
|
while let Some(child) = c.peek() {
|
||||||
// Schiffahrtsgesetz: stop @ anlagen (for now)
|
// Schiffahrtsgesetz: stop @ anlagen (for now)
|
||||||
if Ueberschrift::test(child, "anlage") {
|
if Ueberschrift::test(child, "anlage") {
|
||||||
|
@ -43,14 +43,13 @@ impl<'a> From<&'a Node<'a, 'a>> for Expect<'a> {
|
|||||||
|
|
||||||
impl<'a> Expect<'a> {
|
impl<'a> Expect<'a> {
|
||||||
fn tag(&self, value: &str) {
|
fn tag(&self, value: &str) {
|
||||||
if self.node.tag_name().name() != value {
|
assert!(
|
||||||
panic!(
|
!(self.node.tag_name().name() != value),
|
||||||
"Expected tag '{value}', got {} (tag: {}, content: {:?})",
|
"Expected tag '{value}', got {} (tag: {}, content: {:?})",
|
||||||
self.node.tag_name().name(),
|
self.node.tag_name().name(),
|
||||||
self.node.tag_name().name(),
|
self.node.tag_name().name(),
|
||||||
self.node.text(),
|
self.node.text(),
|
||||||
);
|
);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user