Files
oolatoocs/src/utils.rs

86 lines
3.1 KiB
Rust
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use html_escape::decode_html_entities;
use megalodon::entities::status::Tag;
use regex::Regex;
use std::error::Error;
pub fn strip_everything(content: &str, tags: &Vec<Tag>) -> Result<String, Box<dyn Error>> {
let mut res = strip_html_tags(&content.replace("</p><p>", "\n\n").replace("<br />", "\n"));
strip_mastodon_tags(&mut res, tags).unwrap();
res = res.trim_end_matches('\n').trim_end_matches(' ').to_string();
res = decode_html_entities(&res).to_string();
Ok(res)
}
fn strip_mastodon_tags(content: &mut String, tags: &Vec<Tag>) -> Result<(), Box<dyn Error>> {
for tag in tags {
let re = Regex::new(&format!("(?i)(#{} ?)", &tag.name))?;
*content = re.replace(content, "").to_string();
}
Ok(())
}
fn strip_html_tags(input: &str) -> String {
let mut data = String::new();
let mut inside = false;
for c in input.chars() {
if c == '<' {
inside = true;
continue;
}
if c == '>' {
inside = false;
continue;
}
if !inside {
data.push(c);
}
}
data
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_strip_mastodon_tags() {
let tags = vec![
Tag {
name: "putaclic".to_string(),
url: "https://m.nintendojo.fr/tags/putaclic".to_string(),
},
Tag {
name: "tamerelol".to_string(),
url: "https://m.nintendojo.fr/tags/tamerelol".to_string(),
},
Tag {
name: "JeFaisNawakEnCamelCase".to_string(),
url: "https://m.nintendojo.fr/tags/jefaisnawakencamelcase".to_string(),
},
];
let mut content =
"Cest super ça! #putaclic #TAMERELOL #JeFaisNawakEnCamelCase".to_string();
let sample = "Cest super ça! ".to_string();
strip_mastodon_tags(&mut content, &tags).unwrap();
assert_eq!(content, sample);
}
#[test]
fn test_strip_everything() {
let content = "<p>Ce soir à 21h, c&#39;est le Dojobar ! Au programme ce soir, une rétrospective sur la série Mario &amp; Luigi.<br />Comme d&#39;hab, le Twitch sera ici : <a href=\"https://twitch.tv/nintendojofr\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"><span class=\"invisible\">https://</span><span class=\"\">twitch.tv/nintendojofr</span><span class=\"invisible\"></span></a><br />Ou juste l&#39;audio là : <a href=\"https://nintendojo.fr/dojobar\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"><span class=\"invisible\">https://</span><span class=\"\">nintendojo.fr/dojobar</span><span class=\"invisible\"></span></a><br />A toute !</p>";
let expected_result = "Ce soir à 21h, c'est le Dojobar ! Au programme ce soir, une rétrospective sur la série Mario & Luigi.\nComme d'hab, le Twitch sera ici : https://twitch.tv/nintendojofr\nOu juste l'audio là : https://nintendojo.fr/dojobar\nA toute !".to_string();
let result = strip_everything(content, &vec![]).unwrap();
assert_eq!(result, expected_result);
}
}