mirror of
https://framagit.org/veretcle/oolatoocs.git
synced 2025-07-21 04:51:17 +02:00
feat: remove dissolve + add simpler html tag stripper + html entities
This commit is contained in:
67
src/utils.rs
67
src/utils.rs
@@ -1,15 +1,15 @@
|
||||
use dissolve::strip_html_tags;
|
||||
use html_escape::decode_html_entities;
|
||||
use megalodon::entities::status::Tag;
|
||||
use regex::Regex;
|
||||
use std::error::Error;
|
||||
|
||||
pub fn strip_everything(content: &str, tags: &Vec<Tag>) -> Result<String, Box<dyn Error>> {
|
||||
let mut res =
|
||||
strip_html_tags(&content.replace("</p><p>", "\n\n").replace("<br />", "\n")).join("");
|
||||
let mut res = strip_html_tags(&content.replace("</p><p>", "\n\n").replace("<br />", "\n"));
|
||||
|
||||
strip_mastodon_tags(&mut res, tags).unwrap();
|
||||
|
||||
res = res.trim_end_matches('\n').trim_end_matches(' ').to_string();
|
||||
res = decode_html_entities(&res).to_string();
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
@@ -22,3 +22,64 @@ fn strip_mastodon_tags(content: &mut String, tags: &Vec<Tag>) -> Result<(), Box<
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn strip_html_tags(input: &str) -> String {
|
||||
let mut data = String::new();
|
||||
let mut inside = false;
|
||||
|
||||
for c in input.chars() {
|
||||
if c == '<' {
|
||||
inside = true;
|
||||
continue;
|
||||
}
|
||||
if c == '>' {
|
||||
inside = false;
|
||||
continue;
|
||||
}
|
||||
if !inside {
|
||||
data.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
data
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_strip_mastodon_tags() {
|
||||
let tags = vec![
|
||||
Tag {
|
||||
name: "putaclic".to_string(),
|
||||
url: "https://m.nintendojo.fr/tags/putaclic".to_string(),
|
||||
},
|
||||
Tag {
|
||||
name: "tamerelol".to_string(),
|
||||
url: "https://m.nintendojo.fr/tags/tamerelol".to_string(),
|
||||
},
|
||||
Tag {
|
||||
name: "JeFaisNawakEnCamelCase".to_string(),
|
||||
url: "https://m.nintendojo.fr/tags/jefaisnawakencamelcase".to_string(),
|
||||
},
|
||||
];
|
||||
|
||||
let mut content =
|
||||
"C’est super ça ! #putaclic #TAMERELOL #JeFaisNawakEnCamelCase".to_string();
|
||||
let sample = "C’est super ça ! ".to_string();
|
||||
|
||||
strip_mastodon_tags(&mut content, &tags).unwrap();
|
||||
|
||||
assert_eq!(content, sample);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strip_everything() {
|
||||
let content = "<p>Ce soir à 21h, c'est le Dojobar ! Au programme ce soir, une rétrospective sur la série Mario & Luigi.<br />Comme d'hab, le Twitch sera ici : <a href=\"https://twitch.tv/nintendojofr\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"><span class=\"invisible\">https://</span><span class=\"\">twitch.tv/nintendojofr</span><span class=\"invisible\"></span></a><br />Ou juste l'audio là : <a href=\"https://nintendojo.fr/dojobar\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"><span class=\"invisible\">https://</span><span class=\"\">nintendojo.fr/dojobar</span><span class=\"invisible\"></span></a><br />A toute !</p>";
|
||||
let expected_result = "Ce soir à 21h, c'est le Dojobar ! Au programme ce soir, une rétrospective sur la série Mario & Luigi.\nComme d'hab, le Twitch sera ici : https://twitch.tv/nintendojofr\nOu juste l'audio là : https://nintendojo.fr/dojobar\nA toute !".to_string();
|
||||
let result = strip_everything(content, &vec![]).unwrap();
|
||||
|
||||
assert_eq!(result, expected_result);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user