use html_escape::decode_html_entities;
use megalodon::entities::status::Tag;
use regex::Regex;
use std::error::Error;
/// Generate 2 contents out of 1 if that content is > 280 chars, None else
pub fn generate_multi_tweets(content: &str) -> Option<(String, String)> {
// Twitter webforms are utf-8 encoded, so we cannot count on len(), we donβt need
// encode_utf16().count()
if twitter_count(content) <= 280 {
return None;
}
let split_content = content.split(' ');
let split_count = split_content.clone().count();
let first_half: String = split_content
.clone()
.take(split_count / 2)
.collect::>()
.join(" ");
let second_half: String = split_content
.clone()
.skip(split_count / 2)
.collect::>()
.join(" ");
Some((first_half, second_half))
}
/// Twitter doesnβt count words the same we do, so youβll have to improvise
fn twitter_count(content: &str) -> usize {
let mut count = 0;
let split_content = content.split(&[' ', '\n']);
count += split_content.clone().count() - 1; // count the spaces
for word in split_content {
if word.starts_with("http://") || word.starts_with("https://") {
count += 23;
} else {
count += word.chars().count();
}
}
count
}
pub fn strip_everything(content: &str, tags: &Vec) -> Result> {
let mut res = strip_html_tags(&content.replace("
", "\n\n").replace("
", "\n"));
strip_mastodon_tags(&mut res, tags).unwrap();
res = res.trim_end_matches('\n').trim_end_matches(' ').to_string();
res = decode_html_entities(&res).to_string();
Ok(res)
}
fn strip_mastodon_tags(content: &mut String, tags: &Vec) -> Result<(), Box> {
for tag in tags {
let re = Regex::new(&format!("(?i)(#{} ?)", &tag.name))?;
*content = re.replace(content, "").to_string();
}
Ok(())
}
fn strip_html_tags(input: &str) -> String {
let mut data = String::new();
let mut inside = false;
for c in input.chars() {
if c == '<' {
inside = true;
continue;
}
if c == '>' {
inside = false;
continue;
}
if !inside {
data.push(c);
}
}
data
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_twitter_count() {
let content = "tamerelol?! π΅";
assert_eq!(twitter_count(content), content.chars().count());
let content = "Shoot out to https://y.ml/ !";
assert_eq!(twitter_count(content), 38);
let content = "this is the link https://www.google.com/tamerelol/youpi/tonperemdr/tarace.html if you like! What if I shit a final";
assert_eq!(twitter_count(content), 76);
let content = "multi ple space";
assert_eq!(twitter_count(content), content.chars().count());
let content = "This link is LEEEEET\n\nhttps://www.factornews.com/actualites/ca-sent-le-sapin-pour-free-radical-design-49985.html";
assert_eq!(twitter_count(content), 45);
}
#[test]
fn test_generate_multi_tweets_to_none() {
// test Β«β―standardβ―Β» text
let tweet_content =
"LOLOLOL, je suis bien trop petit pour Γͺtre coupΓ© en deux voyons :troll:".to_string();
let youpi = generate_multi_tweets(&tweet_content);
assert_eq!(None, youpi);
// test with Β«β―complexβ―Β» emoji (2 utf-8 chars)
let tweet_content = "π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·π«π·".to_string();
let youpi = generate_multi_tweets(&tweet_content);
assert_eq!(None, youpi);
}
#[test]
fn test_generate_multi_tweets_to_some() {
let tweet_content = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ipsum dolor sit amet consectetur adipiscing elit pellentesque. Pharetra pharetra massa massa ultricies mi quis hendrerit dolor. Mauris nunc congue nisi vitae. Scelerisque varius morbi enim nunc faucibus a pellentesque sit amet. Morbi leo urna molestie at elementum. Tristique et egestas quis ipsum suspendisse ultrices gravida dictum fusce. Amet porttitor eget dolor morbi.".to_string();
let youpi = generate_multi_tweets(&tweet_content);
let first_half = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ipsum dolor sit amet consectetur adipiscing elit pellentesque. Pharetra pharetra massa massa ultricies mi quis hendrerit dolor.".to_string();
let second_half = "Mauris nunc congue nisi vitae. Scelerisque varius morbi enim nunc faucibus a pellentesque sit amet. Morbi leo urna molestie at elementum. Tristique et egestas quis ipsum suspendisse ultrices gravida dictum fusce. Amet porttitor eget dolor morbi.".to_string();
assert_eq!(youpi, Some((first_half, second_half)));
}
#[test]
fn test_strip_mastodon_tags() {
let tags = vec![
Tag {
name: "putaclic".to_string(),
url: "https://m.nintendojo.fr/tags/putaclic".to_string(),
},
Tag {
name: "tamerelol".to_string(),
url: "https://m.nintendojo.fr/tags/tamerelol".to_string(),
},
Tag {
name: "JeFaisNawakEnCamelCase".to_string(),
url: "https://m.nintendojo.fr/tags/jefaisnawakencamelcase".to_string(),
},
];
let mut content =
"Cβest super Γ§aβ―! #putaclic #TAMERELOL #JeFaisNawakEnCamelCase".to_string();
let sample = "Cβest super Γ§aβ―! ".to_string();
strip_mastodon_tags(&mut content, &tags).unwrap();
assert_eq!(content, sample);
}
#[test]
fn test_strip_everything() {
let content = "Ce soir Γ 21h, c'est le Dojobar ! Au programme ce soir, une rΓ©trospective sur la sΓ©rie Mario & Luigi.
Comme d'hab, le Twitch sera ici : https://twitch.tv/nintendojofr
Ou juste l'audio lΓ : https://nintendojo.fr/dojobar
A toute !
";
let expected_result = "Ce soir Γ 21h, c'est le Dojobar ! Au programme ce soir, une rΓ©trospective sur la sΓ©rie Mario & Luigi.\nComme d'hab, le Twitch sera ici : https://twitch.tv/nintendojofr\nOu juste l'audio lΓ : https://nintendojo.fr/dojobar\nA toute !".to_string();
let result = strip_everything(content, &vec![]).unwrap();
assert_eq!(result, expected_result);
}
}