diff --git a/Cargo.lock b/Cargo.lock index c8f6575..d290880 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2103,9 +2103,8 @@ dependencies = [ [[package]] name = "scootaloo" -version = "0.11.0" +version = "0.11.1" dependencies = [ - "chrono", "clap", "egg-mode", "elefren", diff --git a/Cargo.toml b/Cargo.toml index 6ed1811..270fd73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,13 +1,12 @@ [package] name = "scootaloo" -version = "0.11.0" +version = "0.11.1" authors = ["VC "] edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -chrono = "^0.4" regex = "^1" serde = { version = "1.0", features = ["derive"] } toml = "^0.5" diff --git a/src/config.rs b/src/config.rs index ba62d91..a14fd5a 100644 --- a/src/config.rs +++ b/src/config.rs @@ -19,7 +19,7 @@ pub struct TwitterConfig { pub page_size: Option, } -#[derive(Debug, Deserialize)] +#[derive(Debug, Deserialize, Clone)] pub struct MastodonConfig { pub twitter_screen_name: String, pub mastodon_screen_name: Option, diff --git a/src/lib.rs b/src/lib.rs index 4b244a7..eec647a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,7 +7,7 @@ use config::Config; mod mastodon; pub use mastodon::register; -use mastodon::{build_basic_status, get_mastodon_token}; +use mastodon::*; mod twitter; use twitter::*; @@ -21,10 +21,11 @@ use state::{read_state, write_state, TweetToToot}; use elefren::{prelude::*, status_builder::StatusBuilder, Language}; use futures::StreamExt; +use html_escape::decode_html_entities; use log::info; use regex::Regex; use rusqlite::Connection; -use std::{collections::HashMap, sync::Arc}; +use std::sync::Arc; use tokio::{spawn, sync::Mutex}; const DEFAULT_RATE_LIMIT: usize = 4; @@ -43,21 +44,7 @@ pub async fn run(config: Config) { }), )); - let scootaloo_mentions: HashMap = config - .mastodon - .values() - .filter(|s| s.mastodon_screen_name.is_some()) - .map(|s| { - ( - format!("@{}", s.twitter_screen_name), - format!( - "@{}@{}", - s.mastodon_screen_name.as_ref().unwrap(), - s.base.split('/').last().unwrap() - ), - ) - }) - .collect(); + let global_mastodon_config = Arc::new(Mutex::new(config.mastodon.clone())); let display_url_re = config .scootaloo @@ -76,11 +63,11 @@ pub async fn run(config: Config) { // create temporary value for each task let scootaloo_cache_path = config.scootaloo.cache_path.clone(); - let scootaloo_mentions = scootaloo_mentions.clone(); let scootaloo_alt_services = config.scootaloo.alternative_services_for.clone(); let display_url_re = display_url_re.clone(); let token = get_oauth2_token(&config.twitter); let task_conn = conn.clone(); + let global_mastodon_config = global_mastodon_config.clone(); spawn(async move { info!("Starting treating {}", &mastodon_config.twitter_screen_name); @@ -105,26 +92,32 @@ pub async fn run(config: Config) { for tweet in &feed { info!("Treating Tweet {} inside feed", tweet.id); - let lconn = task_conn.lock().await; - // initiate the toot_reply_id var and retrieve the corresponding toot_id - let toot_reply_id: Option = tweet.in_reply_to_user_id.and_then(|_| { - read_state( - &lconn, - &mastodon_config.twitter_screen_name, - tweet.in_reply_to_status_id, - ) - .unwrap_or(None) - .map(|s| s.toot_id) - }); - drop(lconn); + // basic toot text + let mut status_text = tweet.text.clone(); - // build basic status by just yielding text and dereferencing contained urls - let mut status_text = build_basic_status( - tweet, - &scootaloo_mentions, - &display_url_re, - &scootaloo_alt_services, - ); + // add mentions and smart mentions + if !&tweet.entities.user_mentions.is_empty() { + info!("Tweet contains mentions, add them!"); + let global_mastodon_config = global_mastodon_config.lock().await; + twitter_mentions( + &mut status_text, + &tweet.entities.user_mentions, + &global_mastodon_config, + ); + drop(global_mastodon_config); + } + + if !&tweet.entities.urls.is_empty() { + info!("Tweet contains links, add them!"); + let mut associated_urls = + associate_urls(&tweet.entities.urls, &display_url_re); + + if let Some(a) = &scootaloo_alt_services { + replace_alt_services(&mut associated_urls, a); + } + + decode_urls(&mut status_text, &associated_urls); + } // building associative media list let (media_url, status_medias) = @@ -132,15 +125,27 @@ pub async fn run(config: Config) { status_text = status_text.replace(&media_url, ""); + // now that the text won’t be altered anymore, we can safely remove HTML + // entities + status_text = decode_html_entities(&status_text).to_string(); + info!("Building corresponding Mastodon status"); let mut status_builder = StatusBuilder::new(); - status_builder.status(&status_text).media_ids(status_medias); + status_builder.status(status_text).media_ids(status_medias); - // theard if necessary - if let Some(i) = toot_reply_id { - status_builder.in_reply_to(&i); + // thread if necessary + if tweet.in_reply_to_user_id.is_some() { + let lconn = task_conn.lock().await; + if let Ok(Some(r)) = read_state( + &lconn, + &mastodon_config.twitter_screen_name, + tweet.in_reply_to_status_id, + ) { + status_builder.in_reply_to(&r.toot_id); + } + drop(lconn); } // language if any diff --git a/src/mastodon.rs b/src/mastodon.rs index c7086ec..74e8840 100644 --- a/src/mastodon.rs +++ b/src/mastodon.rs @@ -1,58 +1,84 @@ use crate::config::MastodonConfig; -use egg_mode::{ - entities::{MentionEntity, UrlEntity}, - tweet::Tweet, -}; +use egg_mode::entities::{MentionEntity, UrlEntity}; use elefren::{apps::App, prelude::*, scopes::Read, scopes::Scopes, scopes::Write}; -use html_escape::decode_html_entities; use regex::Regex; use std::{borrow::Cow, collections::HashMap, io::stdin}; /// Decodes the Twitter mention to something that will make sense once Twitter has joined the -/// Fediverse -fn twitter_mentions(ums: &[MentionEntity]) -> HashMap { - ums.iter() +/// Fediverse. Users in the global user list of Scootaloo are rewritten, as they are Mastodon users +/// as well +pub fn twitter_mentions( + toot: &mut String, + ums: &[MentionEntity], + masto: &HashMap, +) { + let tm: HashMap = ums + .iter() .map(|s| { ( format!("@{}", s.screen_name), format!("@{}@twitter.com", s.screen_name), ) }) - .collect() + .chain( + masto + .values() + .filter(|s| s.mastodon_screen_name.is_some()) + .map(|s| { + ( + format!("@{}", s.twitter_screen_name), + format!( + "@{}@{}", + s.mastodon_screen_name.as_ref().unwrap(), + s.base.split('/').last().unwrap() + ), + ) + }) + .collect::>(), + ) + .collect(); + + for (k, v) in tm { + *toot = toot.replace(&k, &v); + } } -/// Decodes urls from UrlEntities -fn decode_urls( - urls: &[UrlEntity], - re: &Option, - alt_urls: &Option>, -) -> HashMap { +/// Decodes urls in toot +pub fn decode_urls(toot: &mut String, urls: &HashMap) { + for (k, v) in urls { + *toot = toot.replace(k, v); + } +} + +/// Reassociates source url with destination url for rewritting +/// this takes a Teet UrlEntity and an optional Regex +pub fn associate_urls(urls: &[UrlEntity], re: &Option) -> HashMap { urls.iter() .filter(|s| s.expanded_url.is_some()) .map(|s| { (s.url.to_owned(), { let mut def = s.expanded_url.as_deref().unwrap().to_owned(); - if let Some(a) = &alt_urls { - for (url_source, url_destination) in a { - def = def.replace( - &format!("https://{}", url_source), - &format!("https://{}", url_destination), - ); - } - } - - if let Some(r) = &re { + if let Some(r) = re { if r.is_match(s.expanded_url.as_deref().unwrap()) { - def = s.display_url.clone(); + def = s.display_url.to_owned(); } } def }) }) - .collect() + .collect::>() +} + +/// Replaces the commonly used services by mirrors, if asked to +pub fn replace_alt_services(urls: &mut HashMap, alts: &HashMap) { + for val in urls.values_mut() { + for (k, v) in alts { + *val = val.replace(&format!("/{}/", k), &format!("/{}/", v)); + } + } } /// Gets Mastodon Data @@ -68,30 +94,6 @@ pub fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon { Mastodon::from(data) } -/// Builds toot text from tweet -pub fn build_basic_status( - tweet: &Tweet, - mentions: &HashMap, - url_regex_filter: &Option, - url_alt_services: &Option>, -) -> String { - let mut toot = tweet.text.to_owned(); - - for decoded_url in decode_urls(&tweet.entities.urls, url_regex_filter, url_alt_services) { - toot = toot.replace(&decoded_url.0, &decoded_url.1); - } - - for decoded_mention in twitter_mentions(&tweet.entities.user_mentions) - .into_iter() - .chain(mentions.to_owned()) - .collect::>() - { - toot = toot.replace(&decoded_mention.0, &decoded_mention.1); - } - - decode_html_entities(&toot).to_string() -} - /// Generic register function /// As this function is supposed to be run only once, it will panic for every error it encounters /// Most of this function is a direct copy/paste of the official `elefren` crate @@ -155,74 +157,90 @@ mastodon_screen_name = \"{}\" mod tests { use super::*; - use chrono::prelude::*; - use egg_mode::tweet::TweetEntities; - #[test] fn test_twitter_mentions() { - let mention_entity = MentionEntity { - id: 12345, - range: (1, 3), - name: "Ta Mere l0l".to_string(), - screen_name: "tamerelol".to_string(), - }; + let mention_entities = vec![ + MentionEntity { + id: 12345, + range: (1, 3), + name: "Ta Mere l0l".to_string(), + screen_name: "tamerelol".to_string(), + }, + MentionEntity { + id: 6789, + range: (1, 3), + name: "TONPERE".to_string(), + screen_name: "tonpere".to_string(), + }, + ]; - let twitter_ums = vec![mention_entity]; + let mut toot = ":kikoo: @tamerelol @tonpere !".to_string(); - let mut expected_mentions = HashMap::new(); - expected_mentions.insert( - "@tamerelol".to_string(), - "@tamerelol@twitter.com".to_string(), - ); + let scootaloo_config = HashMap::from([( + "test".to_string(), + (MastodonConfig { + twitter_screen_name: "tonpere".to_string(), + mastodon_screen_name: Some("lalali".to_string()), + twitter_page_size: None, + base: "https://mstdn.net".to_string(), + client_id: "".to_string(), + client_secret: "".to_string(), + redirect: "".to_string(), + token: "".to_string(), + }), + )]); - let decoded_mentions = twitter_mentions(&twitter_ums); + twitter_mentions(&mut toot, &mention_entities, &scootaloo_config); - assert_eq!(expected_mentions, decoded_mentions); + assert_eq!(&toot, ":kikoo: @tamerelol@twitter.com @lalali@mstdn.net !"); } #[test] fn test_decode_urls() { - let url_entity1 = UrlEntity { - display_url: "tamerelol".to_string(), - expanded_url: Some("https://www.nintendojo.fr/dojobar".to_string()), - range: (1, 3), - url: "https://t.me/tamerelol".to_string(), - }; - - let url_entity2 = UrlEntity { - display_url: "tamerelol".to_string(), - expanded_url: None, - range: (1, 3), - url: "https://t.me/tamerelol".to_string(), - }; - - let wrong_url_entity = UrlEntity { - display_url: "invité.es".to_string(), - expanded_url: Some("http://xn--invit-fsa.es".to_string()), - range: (85, 108), - url: "https://t.co/WAUgnpHLmo".to_string(), - }; - - let rewritten_url_entity = UrlEntity { - display_url: "youtu.be/w5TrSaoYmZ8".to_string(), - expanded_url: Some("https://youtu.be/w5TrSaoYmZ8".to_string()), - range: (0, 23), - url: "https://t.co/fUVYXuF7tg".to_string(), - }; - - let re = Regex::new("(.+)\\.es$").ok(); - - let alt: HashMap = HashMap::from([ - ("youtube.com".to_string(), "invidio.us".to_string()), - ("youtu.be".to_string(), "invidio.us".to_string()), - ("www.youtube.com".to_string(), "invidio.us".to_string()), + let urls = HashMap::from([ + ( + "https://t.co/thisisatest".to_string(), + "https://www.nintendojo.fr/dojobar".to_string(), + ), + ( + "https://t.co/nopenotinclusive".to_string(), + "invité.es".to_string(), + ), ]); - let twitter_urls = vec![ - url_entity1, - url_entity2, - wrong_url_entity, - rewritten_url_entity, + let mut toot = + "Rendez-vous sur https://t.co/thisisatest avec nos https://t.co/nopenotinclusive !" + .to_string(); + + decode_urls(&mut toot, &urls); + + assert_eq!( + &toot, + "Rendez-vous sur https://www.nintendojo.fr/dojobar avec nos invité.es !" + ); + } + + #[test] + fn test_associate_urls() { + let urls = vec![ + UrlEntity { + display_url: "tamerelol".to_string(), + expanded_url: Some("https://www.nintendojo.fr/dojobar".to_string()), + range: (1, 3), + url: "https://t.me/tamerelol".to_string(), + }, + UrlEntity { + display_url: "sadcat".to_string(), + expanded_url: None, + range: (1, 3), + url: "https://t.me/sadcat".to_string(), + }, + UrlEntity { + display_url: "invité.es".to_string(), + expanded_url: Some("http://xn--invit-fsa.es".to_string()), + range: (85, 108), + url: "https://t.co/WAUgnpHLmo".to_string(), + }, ]; let expected_urls = HashMap::from([ @@ -234,83 +252,72 @@ mod tests { "https://t.co/WAUgnpHLmo".to_string(), "invité.es".to_string(), ), - ( - "https://t.co/fUVYXuF7tg".to_string(), - "https://invidio.us/w5TrSaoYmZ8".to_string(), - ), ]); - let decoded_urls = decode_urls(&twitter_urls, &re, &Some(alt)); + let re = Regex::new("(.+)\\.es$").ok(); - assert_eq!(expected_urls, decoded_urls); + let associated_urls = associate_urls(&urls, &re); + + assert_eq!(associated_urls, expected_urls); } #[test] - fn test_build_basic_status() { - let t = Tweet { - coordinates: None, - created_at: Utc::now(), - current_user_retweet: None, - display_text_range: None, - entities: TweetEntities { - hashtags: vec![], - symbols: vec![], - urls: vec![ - UrlEntity { - display_url: "youtube.com/watch?v=w5TrSa…".to_string(), - expanded_url: Some("https://www.youtube.com/watch?v=w5TrSaoYmZ8".to_string()), - range: (93, 116), - url: "https://t.co/zXw0FfX2Nt".to_string(), - } - ], - user_mentions: vec![ - MentionEntity { - id: 491500016, - range: (80, 95), - name: "Nintendo France".to_string(), - screen_name: "NintendoFrance".to_string(), - }, - MentionEntity { - id: 999999999, - range: (80, 95), - name: "Willy Wonka".to_string(), - screen_name: "WillyWonka".to_string(), - }, - ], - media: None, - }, - extended_entities: None, - favorite_count: 0, - favorited: None, - filter_level: None, - id: 1491541246984306693, - in_reply_to_user_id: None, - in_reply_to_screen_name: None, - in_reply_to_status_id: None, - lang: None, - place: None, - possibly_sensitive: None, - quoted_status: None, - quoted_status_id: None, - retweet_count: 0, - retweeted: None, - retweeted_status: None, - source: None, - text: "Mother 1 & 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance @WillyWonka https://t.co/zXw0FfX2Nt".to_string(), - truncated: false, - user: None, - withheld_copyright: false, - withheld_in_countries: None, - withheld_scope: None, - }; + fn test_replace_alt_services() { + let mut associated_urls = HashMap::from([ + ( + "https://t.co/youplaboom".to_string(), + "https://www.youtube.com/watch?v=dQw4w9WgXcQ".to_string(), + ), + ( + "https://t.co/thisisfine".to_string(), + "https://twitter.com/Nintendo/status/1594590628771688448".to_string(), + ), + ( + "https://t.co/nopenope".to_string(), + "https://www.nintendojo.fr/dojobar".to_string(), + ), + ( + "https://t.co/broken".to_string(), + "http://youtu.be".to_string(), + ), + ( + "https://t.co/alsobroken".to_string(), + "https://youtube.com".to_string(), + ), + ]); - let s: HashMap = HashMap::from([( - "@WillyWonka".to_string(), - "@WillyWonka@chocolatefactory.org".to_string(), - )]); + let alt_services = HashMap::from([ + ("twitter.com".to_string(), "nitter.net".to_string()), + ("youtu.be".to_string(), "invidio.us".to_string()), + ("www.youtube.com".to_string(), "invidio.us".to_string()), + ("youtube.com".to_string(), "invidio.us".to_string()), + ]); - let t_out = build_basic_status(&t, &s, &None, &None); + let expected_urls = HashMap::from([ + ( + "https://t.co/youplaboom".to_string(), + "https://invidio.us/watch?v=dQw4w9WgXcQ".to_string(), + ), + ( + "https://t.co/thisisfine".to_string(), + "https://nitter.net/Nintendo/status/1594590628771688448".to_string(), + ), + ( + "https://t.co/nopenope".to_string(), + "https://www.nintendojo.fr/dojobar".to_string(), + ), + ( + "https://t.co/broken".to_string(), + "http://youtu.be".to_string(), + ), + ( + "https://t.co/alsobroken".to_string(), + "https://youtube.com".to_string(), + ), + ]); - assert_eq!(&t_out, "Mother 1 & 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance@twitter.com @WillyWonka@chocolatefactory.org https://www.youtube.com/watch?v=w5TrSaoYmZ8"); + replace_alt_services(&mut associated_urls, &alt_services); + + assert_eq!(associated_urls, expected_urls); } }