diff --git a/Cargo.lock b/Cargo.lock index 26e45ee..4eff00a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -988,7 +988,7 @@ checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "oolatoocs" -version = "1.2.0" +version = "1.3.0" dependencies = [ "clap", "env_logger", diff --git a/Cargo.toml b/Cargo.toml index 9e11d6c..671ca4f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oolatoocs" -version = "1.2.0" +version = "1.3.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/src/lib.rs b/src/lib.rs index 735f776..bf0a28d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,7 @@ use mastodon::get_mastodon_timeline_since; pub use mastodon::register; mod utils; -use utils::strip_everything; +use utils::{generate_multi_tweets, strip_everything}; mod twitter; #[allow(unused_imports)] @@ -40,13 +40,29 @@ pub async fn run(config: &Config) { .unwrap_or_else(|e| panic!("Cannot get instance: {}", e)); for toot in timeline { - let Ok(tweet_content) = strip_everything(&toot.content, &toot.tags) else { + let Ok(mut tweet_content) = strip_everything(&toot.content, &toot.tags) else { continue; // skip in case we can’t strip something }; - let mut medias: Vec = vec![]; + // threads if necessary + let mut reply_to = toot.in_reply_to_id.and_then(|t| { + read_state(&conn, Some(t.parse::().unwrap())) + .ok() + .flatten() + .map(|s| s.tweet_id) + }); - // if we wanted to cut toot in half, now would be the right time to do so + // if the toot is too long, we cut it in half here + if let Some((first_half, second_half)) = generate_multi_tweets(&tweet_content) { + tweet_content = second_half; + let reply_id = post_tweet(&config.twitter, &first_half, &[], &reply_to) + .await + .unwrap_or_else(|e| panic!("Cannot post the first half of {}: {}", &toot.id, e)); + reply_to = Some(reply_id); + }; + + // treats medias + let mut medias: Vec = vec![]; let media_attachments = toot.media_attachments.clone(); let mut stream = stream::iter(media_attachments) @@ -84,14 +100,6 @@ pub async fn run(config: &Config) { } } - // threads if necessary - let reply_to = toot.in_reply_to_id.and_then(|t| { - read_state(&conn, Some(t.parse::().unwrap())) - .ok() - .flatten() - .map(|s| s.tweet_id) - }); - // posts corresponding tweet let tweet_id = post_tweet(&config.twitter, &tweet_content, &medias, &reply_to) .await diff --git a/src/utils.rs b/src/utils.rs index 1a8ca0d..a91cc38 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,6 +3,50 @@ use megalodon::entities::status::Tag; use regex::Regex; use std::error::Error; +/// Generate 2 contents out of 1 if that content is > 280 chars, None else +pub fn generate_multi_tweets(content: &str) -> Option<(String, String)> { + // Twitter webforms are utf-8 encoded, so we cannot count on len(), we don’t need + // encode_utf16().count() + if twitter_count(content) <= 280 { + return None; + } + + let split_content = content.split(' '); + + let split_count = split_content.clone().count(); + + let first_half: String = split_content + .clone() + .take(split_count / 2) + .collect::>() + .join(" "); + let second_half: String = split_content + .clone() + .skip(split_count / 2) + .collect::>() + .join(" "); + + Some((first_half, second_half)) +} + +/// Twitter doesn’t count words the same we do, so you’ll have to improvise +fn twitter_count(content: &str) -> usize { + let mut count = 0; + + let split_content = content.split(' '); + count += split_content.clone().count() - 1; // count the spaces + + for word in split_content { + if word.starts_with("http://") || word.starts_with("https://") { + count += 23; + } else { + count += word.chars().count(); + } + } + + count +} + pub fn strip_everything(content: &str, tags: &Vec) -> Result> { let mut res = strip_html_tags(&content.replace("

", "\n\n").replace("
", "\n")); @@ -48,6 +92,51 @@ fn strip_html_tags(input: &str) -> String { mod tests { use super::*; + #[test] + fn test_twitter_count() { + let content = "tamerelol?! 🐡"; + + assert_eq!(twitter_count(content), content.chars().count()); + + let content = "Shoot out to https://y.ml/ !"; + + assert_eq!(twitter_count(content), 38); + + let content = "this is the link https://www.google.com/tamerelol/youpi/tonperemdr/tarace.html if you like! What if I shit a final"; + + assert_eq!(twitter_count(content), 76); + } + + #[test] + fn test_generate_multi_tweets_to_none() { + // test Β«β€―standardβ€―Β» text + let tweet_content = + "LOLOLOL, je suis bien trop petit pour Γͺtre coupΓ© en deux voyons :troll:".to_string(); + + let youpi = generate_multi_tweets(&tweet_content); + + assert_eq!(None, youpi); + + // test with Β«β€―complexβ€―Β» emoji (2 utf-8 chars) + let tweet_content = "πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·πŸ‡«πŸ‡·".to_string(); + + let youpi = generate_multi_tweets(&tweet_content); + + assert_eq!(None, youpi); + } + + #[test] + fn test_generate_multi_tweets_to_some() { + let tweet_content = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ipsum dolor sit amet consectetur adipiscing elit pellentesque. Pharetra pharetra massa massa ultricies mi quis hendrerit dolor. Mauris nunc congue nisi vitae. Scelerisque varius morbi enim nunc faucibus a pellentesque sit amet. Morbi leo urna molestie at elementum. Tristique et egestas quis ipsum suspendisse ultrices gravida dictum fusce. Amet porttitor eget dolor morbi.".to_string(); + + let youpi = generate_multi_tweets(&tweet_content); + + let first_half = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ipsum dolor sit amet consectetur adipiscing elit pellentesque. Pharetra pharetra massa massa ultricies mi quis hendrerit dolor.".to_string(); + let second_half = "Mauris nunc congue nisi vitae. Scelerisque varius morbi enim nunc faucibus a pellentesque sit amet. Morbi leo urna molestie at elementum. Tristique et egestas quis ipsum suspendisse ultrices gravida dictum fusce. Amet porttitor eget dolor morbi.".to_string(); + + assert_eq!(youpi, Some((first_half, second_half))); + } + #[test] fn test_strip_mastodon_tags() { let tags = vec![