This commit is contained in:
		| @@ -165,7 +165,7 @@ impl PartialEq for LawBuilder { | |||||||
|  |  | ||||||
| impl Default for LawBuilder { | impl Default for LawBuilder { | ||||||
|     fn default() -> Self { |     fn default() -> Self { | ||||||
|         Self::new("".into()) |         Self::new(String::new()) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -347,7 +347,7 @@ impl fmt::Display for Section { | |||||||
|             format!("{}\n{}", self.symb, self.content) |             format!("{}\n{}", self.symb, self.content) | ||||||
|         }; |         }; | ||||||
|         if let Some(note) = &self.par_note { |         if let Some(note) = &self.par_note { | ||||||
|             to_write.push_str(&format!("\nBeachte: {}\n", note)); |             to_write.push_str(&format!("\nBeachte: {note}\n")); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         f.write_str(&to_write) |         f.write_str(&to_write) | ||||||
|   | |||||||
| @@ -104,28 +104,26 @@ fn fetch_page(overview_id: usize, page: usize) -> Result<String, Error> { | |||||||
|     use std::fs; |     use std::fs; | ||||||
|  |  | ||||||
|     let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?); |     let expected_filename = format!("{}law-{overview_id}-{page}", get_cache_dir()?); | ||||||
|  |     if let Ok(data) = fs::read_to_string(&expected_filename) { | ||||||
|     match fs::read_to_string(&expected_filename) { |         Ok(data) | ||||||
|         Ok(data) => Ok(data), |     } else { | ||||||
|         Err(_) => { |         info!("Not finding law_id {overview_id} (page {page}) in the cache, downloading..."); | ||||||
|             info!("Not finding law_id {overview_id} (page {page}) in the cache, downloading..."); |         let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht") | ||||||
|             let data = ureq::post("https://data.bka.gv.at/ris/api/v2.6/Bundesrecht") |             .send_form(&[ | ||||||
|                 .send_form(&[ |                 ("Applikation", "BrKons"), | ||||||
|                     ("Applikation", "BrKons"), |                 ("Gesetzesnummer", &format!("{overview_id}")), | ||||||
|                     ("Gesetzesnummer", &format!("{overview_id}")), |                 ("DokumenteProSeite", "OneHundred"), | ||||||
|                     ("DokumenteProSeite", "OneHundred"), |                 ("Seitennummer", &format!("{page}")), | ||||||
|                     ("Seitennummer", &format!("{page}")), |                 ("Fassung.FassungVom", ¤t_date()), | ||||||
|                     ("Fassung.FassungVom", ¤t_date()), |             ])? | ||||||
|                 ])? |             .into_string()?; | ||||||
|                 .into_string()?; |         let path = Path::new(&expected_filename); | ||||||
|             let path = Path::new(&expected_filename); |         if let Some(parent) = path.parent() { | ||||||
|             if let Some(parent) = path.parent() { |             // Try to create the directory (and any necessary parent directories) | ||||||
|                 // Try to create the directory (and any necessary parent directories) |             fs::create_dir_all(parent).expect("Unable to create directory"); | ||||||
|                 fs::create_dir_all(parent).expect("Unable to create directory"); |  | ||||||
|             } |  | ||||||
|             fs::write(expected_filename, &data).expect("Unable to write file"); |  | ||||||
|             Ok(data) |  | ||||||
|         } |         } | ||||||
|  |         fs::write(expected_filename, &data).expect("Unable to write file"); | ||||||
|  |         Ok(data) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -25,7 +25,7 @@ use std::{ | |||||||
|     path::Path, |     path::Path, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| use log::{info}; | use log::info; | ||||||
|  |  | ||||||
| use crate::{ | use crate::{ | ||||||
|     law::LawBuilder, |     law::LawBuilder, | ||||||
| @@ -87,7 +87,7 @@ impl Parser { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         let xml = if self.move_para_headers_into_content { |         let xml = if self.move_para_headers_into_content { | ||||||
|             Self::do_move_para_headers_into_content(xml) |             Self::do_move_para_headers_into_content(&xml) | ||||||
|         } else { |         } else { | ||||||
|             xml |             xml | ||||||
|         }; |         }; | ||||||
| @@ -95,8 +95,8 @@ impl Parser { | |||||||
|         Risdok::from_str(&xml, builder) |         Risdok::from_str(&xml, builder) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn do_move_para_headers_into_content(xml: String) -> String { |     fn do_move_para_headers_into_content(xml: &str) -> String { | ||||||
|         let mut result = String::from(&xml); |         let mut result = String::from(xml); | ||||||
|         let ueberschrift_regex = Regex::new( |         let ueberschrift_regex = Regex::new( | ||||||
|             "<ueberschrift typ=\"[^\"]*\" ct=\"[^\"]*\" halign=\"[^\"]*\">(§.*?)</ueberschrift>", |             "<ueberschrift typ=\"[^\"]*\" ct=\"[^\"]*\" halign=\"[^\"]*\">(§.*?)</ueberschrift>", | ||||||
|         ) |         ) | ||||||
| @@ -116,7 +116,7 @@ impl Parser { | |||||||
|                 // Insert the <gldsym> tag with the ueberschrift content into the result string |                 // Insert the <gldsym> tag with the ueberschrift content into the result string | ||||||
|                 result.insert_str( |                 result.insert_str( | ||||||
|                     insert_point, |                     insert_point, | ||||||
|                     &format!("<gldsym>{}</gldsym>", ueberschrift_content), |                     &format!("<gldsym>{ueberschrift_content}</gldsym>"), | ||||||
|                 ); |                 ); | ||||||
|             } |             } | ||||||
|  |  | ||||||
| @@ -133,20 +133,18 @@ fn fetch(url: &str) -> Result<String, Error> { | |||||||
|     let hash = format!("{:x}", hasher.finish()); |     let hash = format!("{:x}", hasher.finish()); | ||||||
|  |  | ||||||
|     let expected_filename = format!("{}par-{hash}", get_cache_dir()?); |     let expected_filename = format!("{}par-{hash}", get_cache_dir()?); | ||||||
|  |     if let Ok(data) = fs::read_to_string(&expected_filename) { | ||||||
|     match fs::read_to_string(&expected_filename) { |         Ok(data) | ||||||
|         Ok(data) => Ok(data), |     } else { | ||||||
|         Err(_) => { |         info!("Not finding url {url} in the cache, downloading..."); | ||||||
|             info!("Not finding url {url} in the cache, downloading..."); |         let data = fetch_with_retries(url)?; | ||||||
|             let data = fetch_with_retries(url)?; |         let path = Path::new(&expected_filename); | ||||||
|             let path = Path::new(&expected_filename); |         if let Some(parent) = path.parent() { | ||||||
|             if let Some(parent) = path.parent() { |             // Try to create the directory (and any necessary parent directories) | ||||||
|                 // Try to create the directory (and any necessary parent directories) |             fs::create_dir_all(parent).expect("Unable to create directory"); | ||||||
|                 fs::create_dir_all(parent).expect("Unable to create directory"); |  | ||||||
|             } |  | ||||||
|             fs::write(expected_filename, &data).expect("Unable to write file"); |  | ||||||
|             Ok(data) |  | ||||||
|         } |         } | ||||||
|  |         fs::write(expected_filename, &data).expect("Unable to write file"); | ||||||
|  |         Ok(data) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -62,7 +62,7 @@ impl Absatz { | |||||||
|             { |             { | ||||||
|                 // After a 'absatz' there can be a '<absatz typ="[satz|erltext]"' which should be part of the first absatz |                 // After a 'absatz' there can be a '<absatz typ="[satz|erltext]"' which should be part of the first absatz | ||||||
|                 // (e.g. 1209 ABGB) |                 // (e.g. 1209 ABGB) | ||||||
|                 content.push(Content::Text(Absatz::parse(c.next().unwrap()).content)) |                 content.push(Content::Text(Absatz::parse(c.next().unwrap()).content)); | ||||||
|             } else { |             } else { | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
|   | |||||||
| @@ -41,7 +41,7 @@ impl Abschnitt { | |||||||
|  |  | ||||||
|         ret.handle_metadata(&mut c, builder); |         ret.handle_metadata(&mut c, builder); | ||||||
|  |  | ||||||
|         if !ret.handle_headers(&mut c, builder) { |         if !Self::handle_headers(&mut c, builder) { | ||||||
|             return ret; |             return ret; | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -49,9 +49,8 @@ impl Abschnitt { | |||||||
|  |  | ||||||
|         // Special handling of first paragraph (needs id)... |         // Special handling of first paragraph (needs id)... | ||||||
|         let (par_id, first_abs) = Absatz::parse_full(&mut c); |         let (par_id, first_abs) = Absatz::parse_full(&mut c); | ||||||
|         let par_id = match par_id { |         let Some(par_id) = par_id else { | ||||||
|             Some(par_id) => par_id, |             panic!("First paragraph needs to have an id, not found") | ||||||
|             None => panic!("First paragraph needs to have an id, not found"), |  | ||||||
|         }; |         }; | ||||||
|         absatze.push(first_abs); |         absatze.push(first_abs); | ||||||
|  |  | ||||||
| @@ -118,9 +117,10 @@ impl Abschnitt { | |||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|             value = value.trim().into(); |             value = value.trim().into(); | ||||||
|             if value.is_empty() { |             assert!( | ||||||
|                 panic!("Expected at least on erltext-absatz after title meta-data"); |                 !value.is_empty(), | ||||||
|             } |                 "Expected at least on erltext-absatz after title meta-data" | ||||||
|  |             ); | ||||||
|  |  | ||||||
|             // We want ot use this information in our markdown output. |             // We want ot use this information in our markdown output. | ||||||
|             // TODO: Use all metadata, instead of this specific call |             // TODO: Use all metadata, instead of this specific call | ||||||
| @@ -146,7 +146,7 @@ impl Abschnitt { | |||||||
|     // we have optionally headers. Such as "Einleitung", "Von den bürgerlichen Gesetzen üerhaupt," |     // we have optionally headers. Such as "Einleitung", "Von den bürgerlichen Gesetzen üerhaupt," | ||||||
|     // etc. If we have headers which indicate that we are done and we want to stop parsing |     // etc. If we have headers which indicate that we are done and we want to stop parsing | ||||||
|     // ("anlage" + "Artikel" we indicate this wish by returning false. |     // ("anlage" + "Artikel" we indicate this wish by returning false. | ||||||
|     fn handle_headers(&self, c: &mut Peekable<Children>, builder: &mut LawBuilder) -> bool { |     fn handle_headers(c: &mut Peekable<Children>, builder: &mut LawBuilder) -> bool { | ||||||
|         while let Some(child) = c.peek() { |         while let Some(child) = c.peek() { | ||||||
|             // Schiffahrtsgesetz: stop @ anlagen (for now) |             // Schiffahrtsgesetz: stop @ anlagen (for now) | ||||||
|             if Ueberschrift::test(child, "anlage") { |             if Ueberschrift::test(child, "anlage") { | ||||||
|   | |||||||
| @@ -43,14 +43,13 @@ impl<'a> From<&'a Node<'a, 'a>> for Expect<'a> { | |||||||
|  |  | ||||||
| impl<'a> Expect<'a> { | impl<'a> Expect<'a> { | ||||||
|     fn tag(&self, value: &str) { |     fn tag(&self, value: &str) { | ||||||
|         if self.node.tag_name().name() != value { |         assert!( | ||||||
|             panic!( |             !(self.node.tag_name().name() != value), | ||||||
|                 "Expected tag '{value}', got {} (tag: {}, content: {:?})", |             "Expected tag '{value}', got {} (tag: {}, content: {:?})", | ||||||
|                 self.node.tag_name().name(), |             self.node.tag_name().name(), | ||||||
|                 self.node.tag_name().name(), |             self.node.tag_name().name(), | ||||||
|                 self.node.text(), |             self.node.text(), | ||||||
|             ); |         ); | ||||||
|         } |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user