Files
oolatoocs/src/utils.rs
2025-12-01 16:39:37 +01:00

332 lines
14 KiB
Rust
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use atrium_api::{app::bsky::embed::defs::AspectRatioData, types::Object};
use html_escape::decode_html_entities;
use megalodon::entities::{attachment::MetaSub, status::Tag};
use regex::Regex;
use std::{error::Error, num::NonZeroU64};
/// Generate 2 contents out of 1 if that content is > 300 chars, None else
pub fn generate_multi_tweets(content: &str) -> Option<(String, String)> {
// Twitter webforms are utf-8 encoded, so we cannot count on len(), we dont need
// encode_utf16().count()
if twitter_count(content) <= 300 {
return None;
}
let split_content = content.split(' ');
let split_count = split_content.clone().count();
let first_half: String = split_content
.clone()
.take(split_count / 2)
.collect::<Vec<_>>()
.join(" ");
let second_half: String = split_content
.clone()
.skip(split_count / 2)
.collect::<Vec<_>>()
.join(" ");
Some((first_half, second_half))
}
/// Twitter doesnt count words the same we do, so youll have to improvise
fn twitter_count(content: &str) -> usize {
let mut count = 0;
let split_content = content.split(&[' ', '\n']);
count += split_content.clone().count() - 1; // count the spaces
for word in split_content {
if word.starts_with("http://") || word.starts_with("https://") {
// Its not that simple. Bsky adapts itself to the URL.
// https://github.com -> 10 chars
// https://github.com/ -> 10 chars
// https://github.com/NVNTLabs -> 19 chars
// https://github.com/NVNTLabs/ -> 20 chars
// so taking the maximum here to simplify things
count += 26;
} else {
count += word.chars().count();
}
}
count
}
pub fn strip_everything(
content: &str,
tags: &Vec<Tag>,
mastodon_base: &str,
) -> Result<String, Box<dyn Error>> {
let mut res = strip_html_tags(&content.replace("</p><p>", "\n\n").replace("<br />", "\n"));
strip_quote_header(&mut res, mastodon_base)?;
strip_mastodon_tags(&mut res, tags)?;
res = res.trim_end_matches('\n').trim_end_matches(' ').to_string();
res = decode_html_entities(&res).to_string();
Ok(res)
}
fn strip_quote_header(content: &mut String, mastodon_base: &str) -> Result<(), Box<dyn Error>> {
let re = Regex::new(&format!(
r"^RE: {}\S+\n\n",
mastodon_base.replace(".", r"\.")
))?;
*content = re.replace(content, "").to_string();
Ok(())
}
fn strip_mastodon_tags(content: &mut String, tags: &Vec<Tag>) -> Result<(), Box<dyn Error>> {
for tag in tags {
let re = Regex::new(&format!("(?i)(#{} ?)", &tag.name))?;
*content = re.replace(content, "").to_string();
}
Ok(())
}
fn strip_html_tags(input: &str) -> String {
let mut data = String::new();
let mut inside = false;
for c in input.chars() {
if c == '<' {
inside = true;
continue;
}
if c == '>' {
inside = false;
continue;
}
if !inside {
data.push(c);
}
}
data
}
pub fn convert_aspect_ratio(m: &Option<MetaSub>) -> Option<Object<AspectRatioData>> {
match m {
Some(ms) => {
if ms.height.is_some_and(|x| x > 0) && ms.width.is_some_and(|x| x > 0) {
Some(
AspectRatioData {
// unwrap is safe here
height: NonZeroU64::new(ms.height.unwrap().into()).unwrap(),
width: NonZeroU64::new(ms.width.unwrap().into()).unwrap(),
}
.into(),
)
} else {
None
}
}
None => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_convert_aspect_ratio() {
// test None orig aspect ratio
let metasub: Option<MetaSub> = None;
let result = convert_aspect_ratio(&metasub);
assert_eq!(result, None);
// test complet with image
let metasub = Some(MetaSub {
width: Some(1920),
height: Some(1080),
size: Some(String::from("1920x1080")),
aspect: Some(1.7777777777777777),
frame_rate: None,
duration: None,
bitrate: None,
});
let expected_result = Some(
AspectRatioData {
height: NonZeroU64::new(1080).unwrap(),
width: NonZeroU64::new(1920).unwrap(),
}
.into(),
);
let result = convert_aspect_ratio(&metasub);
assert_eq!(result, expected_result);
// test complete with video
let metasub = Some(MetaSub {
width: Some(500),
height: Some(278),
size: None,
aspect: None,
frame_rate: Some(String::from("10/1")),
duration: Some(0.9),
bitrate: Some(973191),
});
let expected_result = Some(
AspectRatioData {
height: NonZeroU64::new(278).unwrap(),
width: NonZeroU64::new(500).unwrap(),
}
.into(),
);
let result = convert_aspect_ratio(&metasub);
assert_eq!(result, expected_result);
/* test broken shit
* that should never happened but you never know
*/
// zero width
let metasub = Some(MetaSub {
width: Some(0),
height: Some(278),
size: None,
aspect: None,
frame_rate: Some(String::from("10/1")),
duration: Some(0.9),
bitrate: Some(973191),
});
let result = convert_aspect_ratio(&metasub);
assert_eq!(result, None);
// None height
let metasub = Some(MetaSub {
width: Some(500),
height: None,
size: None,
aspect: None,
frame_rate: Some(String::from("10/1")),
duration: Some(0.9),
bitrate: Some(973191),
});
let result = convert_aspect_ratio(&metasub);
assert_eq!(result, None);
}
#[test]
fn test_twitter_count() {
let content = "tamerelol?! 🐵";
assert_eq!(twitter_count(content), content.chars().count());
let content = "Shoot out to https://y.ml/ !";
assert_eq!(twitter_count(content), 41);
let content = "this is the link https://www.google.com/tamerelol/youpi/tonperemdr/tarace.html if you like! What if I shit a final";
assert_eq!(twitter_count(content), 79);
let content = "multi ple space";
assert_eq!(twitter_count(content), content.chars().count());
let content = "This link is LEEEEET\n\nhttps://www.factornews.com/actualites/ca-sent-le-sapin-pour-free-radical-design-49985.html";
assert_eq!(twitter_count(content), 48);
}
#[test]
fn test_generate_multi_tweets_to_none() {
// test «standard» text
let tweet_content =
"LOLOLOL, je suis bien trop petit pour être coupé en deux voyons :troll:".to_string();
let youpi = generate_multi_tweets(&tweet_content);
assert_eq!(None, youpi);
// test with «complex» emoji (2 utf-8 chars)
let tweet_content = "🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷🇫🇷".to_string();
let youpi = generate_multi_tweets(&tweet_content);
assert_eq!(None, youpi);
// test with 299 chars
let tweet_content = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate vulver amico tio".to_string();
let youpi = generate_multi_tweets(&tweet_content);
assert_eq!(None, youpi);
}
#[test]
fn test_generate_multi_tweets_to_some() {
let tweet_content = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ipsum dolor sit amet consectetur adipiscing elit pellentesque. Pharetra pharetra massa massa ultricies mi quis hendrerit dolor. Mauris nunc congue nisi vitae. Scelerisque varius morbi enim nunc faucibus a pellentesque sit amet. Morbi leo urna molestie at elementum. Tristique et egestas quis ipsum suspendisse ultrices gravida dictum fusce. Amet porttitor eget dolor morbi.".to_string();
let youpi = generate_multi_tweets(&tweet_content);
let first_half = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ipsum dolor sit amet consectetur adipiscing elit pellentesque. Pharetra pharetra massa massa ultricies mi quis hendrerit dolor.".to_string();
let second_half = "Mauris nunc congue nisi vitae. Scelerisque varius morbi enim nunc faucibus a pellentesque sit amet. Morbi leo urna molestie at elementum. Tristique et egestas quis ipsum suspendisse ultrices gravida dictum fusce. Amet porttitor eget dolor morbi.".to_string();
assert_eq!(youpi, Some((first_half, second_half)));
}
#[test]
fn test_strip_mastodon_tags() {
let tags = vec![
Tag {
name: "putaclic".to_string(),
url: "https://m.nintendojo.fr/tags/putaclic".to_string(),
},
Tag {
name: "tamerelol".to_string(),
url: "https://m.nintendojo.fr/tags/tamerelol".to_string(),
},
Tag {
name: "JeFaisNawakEnCamelCase".to_string(),
url: "https://m.nintendojo.fr/tags/jefaisnawakencamelcase".to_string(),
},
];
let mut content =
"Cest super ça! #putaclic #TAMERELOL #JeFaisNawakEnCamelCase".to_string();
let sample = "Cest super ça! ".to_string();
strip_mastodon_tags(&mut content, &tags).unwrap();
assert_eq!(content, sample);
}
#[test]
fn test_strip_everything() {
// a classic toot
let content = "<p>Ce soir à 21h, c&#39;est le Dojobar ! Au programme ce soir, une rétrospective sur la série Mario &amp; Luigi.<br />Comme d&#39;hab, le Twitch sera ici : <a href=\"https://twitch.tv/nintendojofr\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"><span class=\"invisible\">https://</span><span class=\"\">twitch.tv/nintendojofr</span><span class=\"invisible\"></span></a><br />Ou juste l&#39;audio là : <a href=\"https://nintendojo.fr/dojobar\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"><span class=\"invisible\">https://</span><span class=\"\">nintendojo.fr/dojobar</span><span class=\"invisible\"></span></a><br />A toute !</p>";
let expected_result = "Ce soir à 21h, c'est le Dojobar ! Au programme ce soir, une rétrospective sur la série Mario & Luigi.\nComme d'hab, le Twitch sera ici : https://twitch.tv/nintendojofr\nOu juste l'audio là : https://nintendojo.fr/dojobar\nA toute !".to_string();
let result = strip_everything(content, &vec![], "https://m.nintendojo.fr").unwrap();
assert_eq!(result, expected_result);
// a quoted toot
let content = "<p class=\"quote-inline\">RE: <a href=\"https://m.nintendojo.fr/@nintendojofr/115446347351491651\" target=\"_blank\" rel=\"nofollow noopener\" translate=\"no\"><span class=\"invisible\">https://</span><span class=\"ellipsis\">m.nintendojo.fr/@nintendojofr/</span><span class=\"invisible\">115446347351491651</span></a></p><p>Assassins Creed Shadows pèsera environ 62,8 Go sur Switch 2 (et un peu plus de 100 Go sur les autres supports), soit tout juste pour rentrer sur une cartouche de 64 Go.</p><p>Ou pas, pour rappel…</p><p><a href=\"https://m.nintendojo.fr/tags/AssassinsCreedShadows\" class=\"mention hashtag\" rel=\"tag\">#<span>AssassinsCreedShadows</span></a> <a href=\"https://m.nintendojo.fr/tags/Ubisoft\" class=\"mention hashtag\" rel=\"tag\">#<span>Ubisoft</span></a> <a href=\"https://m.nintendojo.fr/tags/NintendoSwitch2\" class=\"mention hashtag\" rel=\"tag\">#<span>NintendoSwitch2</span></a></p>";
let expected_result = "Assassins Creed Shadows pèsera environ 62,8 Go sur Switch 2 (et un peu plus de 100 Go sur les autres supports), soit tout juste pour rentrer sur une cartouche de 64 Go.\n\nOu pas, pour rappel…\n\n#AssassinsCreedShadows #Ubisoft #NintendoSwitch2";
let result = strip_everything(content, &vec![], "https://m.nintendojo.fr").unwrap();
assert_eq!(result, expected_result);
}
}