be even more liberal with naming...

This commit is contained in:
2025-08-09 17:53:01 +02:00
parent cf1866af69
commit 8a6a35269c
5 changed files with 7524 additions and 3337 deletions

View File

@@ -82,7 +82,7 @@ pub(crate) enum NameUpdateError {
}
static BAD_WORDS: LazyLock<HashSet<String>> = LazyLock::new(|| {
const BAD_WORDS_FILE: &str = include_str!("../bad/merged_output.txt");
const BAD_WORDS_FILE: &str = include_str!("../bad/bad-list.txt");
BAD_WORDS_FILE
.lines()
@@ -94,9 +94,42 @@ static BAD_WORDS: LazyLock<HashSet<String>> = LazyLock::new(|| {
fn contains_bad_word(text: &str) -> bool {
let cleaned_text: String = text.to_lowercase();
for (idx, a) in BAD_WORDS.iter().enumerate() {
if cleaned_text.trim() == a {
println!("Text contains {a} on line {idx}");
}
}
BAD_WORDS
.iter()
.any(|bad_word| cleaned_text.contains(bad_word))
.any(|bad_word| cleaned_text.trim() == bad_word)
}
#[cfg(test)]
mod tests {
use super::contains_bad_word;
use std::fs;
#[test]
fn test_whitelist_words_are_not_flagged() {
let whitelist_content =
fs::read_to_string("bad/test-common-names.txt").expect("Failed to read file");
for (line_number, line) in whitelist_content.lines().enumerate() {
let word = line.trim();
// Skip empty lines
if word.is_empty() {
continue;
}
assert!(
!contains_bad_word(word),
"Word '{}' on line {} should not be flagged as bad but was detected",
word,
line_number + 1
);
}
}
}
impl Backend {