diff --git a/Cargo.lock b/Cargo.lock index e32290f..c8f6575 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2103,7 +2103,7 @@ dependencies = [ [[package]] name = "scootaloo" -version = "0.10.1" +version = "0.11.0" dependencies = [ "chrono", "clap", @@ -2113,6 +2113,7 @@ dependencies = [ "html-escape", "log", "mime", + "regex", "reqwest 0.11.12", "rusqlite", "serde", diff --git a/Cargo.toml b/Cargo.toml index 35a5030..6ed1811 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "scootaloo" -version = "0.10.1" +version = "0.11.0" authors = ["VC "] edition = "2021" @@ -8,6 +8,7 @@ edition = "2021" [dependencies] chrono = "^0.4" +regex = "^1" serde = { version = "1.0", features = ["derive"] } toml = "^0.5" clap = "^4" diff --git a/README.md b/README.md index 0f5e608..bea2e30 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,19 @@ First up, create a configuration file (default path is `/usr/local/etc/scootaloo db_path = "/var/lib/scootaloo/scootaloo.sqlite" ## file containing the SQLite Tweet corresponding Toot DB, must be writeable cache_path = "/tmp/scootaloo" ## a dir where the temporary files will be download, must be writeable rate_limiting = 4 ## optional, default 4, number of accounts handled simultaneously +## optional, this should be omitted the majority of the time +## sometimes, twitter try to use french inclusive writting, but instead of using `·` (median point), they’re using `.` +## this makes twitter interpret it as a URL, which is wrong +## this parameter allows you to catch such URLs and apply the `display_url` (i.e. `tout.es`) instead of the `expanded_url` (i.e. `http://tout.es`) +## in those particular cases +## (!) use with caution, it might have some undesired effects +show_url_as_display_url_for = "^http(s)://(.+)\\.es$" +## optional, this allows you to replace the host for popular services such as YouTube of Twitter, or any other +## with their more freely accessible equivalent +[scootaloo.alternative_services_for] +"tamere.lol" = "tonpere.mdr" ## quotes are necessary for both parameters +"you.pi" = "you.pla" +"www.you.pi" = "you.pla" ## this is an exact match, so you’ll need to lay out all the possibilities [twitter] ## Consumer/Access key for Twitter (can be generated at https://developer.twitter.com/en/apps) diff --git a/src/config.rs b/src/config.rs index b8aaaa0..ba62d91 100644 --- a/src/config.rs +++ b/src/config.rs @@ -36,6 +36,8 @@ pub struct ScootalooConfig { pub db_path: String, pub cache_path: String, pub rate_limit: Option, + pub show_url_as_display_url_for: Option, + pub alternative_services_for: Option>, } /// Parses the TOML file into a Config Struct diff --git a/src/lib.rs b/src/lib.rs index 7e13565..4b244a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,6 +22,7 @@ use state::{read_state, write_state, TweetToToot}; use elefren::{prelude::*, status_builder::StatusBuilder, Language}; use futures::StreamExt; use log::info; +use regex::Regex; use rusqlite::Connection; use std::{collections::HashMap, sync::Arc}; use tokio::{spawn, sync::Mutex}; @@ -58,6 +59,14 @@ pub async fn run(config: Config) { }) .collect(); + let display_url_re = config + .scootaloo + .show_url_as_display_url_for + .as_ref() + .map(|r| + // we want to panic in case the RE is not valid + Regex::new(r).unwrap()); + let mut stream = futures::stream::iter(config.mastodon.into_values()) .map(|mastodon_config| { // calculate Twitter page size @@ -68,6 +77,8 @@ pub async fn run(config: Config) { // create temporary value for each task let scootaloo_cache_path = config.scootaloo.cache_path.clone(); let scootaloo_mentions = scootaloo_mentions.clone(); + let scootaloo_alt_services = config.scootaloo.alternative_services_for.clone(); + let display_url_re = display_url_re.clone(); let token = get_oauth2_token(&config.twitter); let task_conn = conn.clone(); @@ -108,7 +119,12 @@ pub async fn run(config: Config) { drop(lconn); // build basic status by just yielding text and dereferencing contained urls - let mut status_text = build_basic_status(tweet, &scootaloo_mentions); + let mut status_text = build_basic_status( + tweet, + &scootaloo_mentions, + &display_url_re, + &scootaloo_alt_services, + ); // building associative media list let (media_url, status_medias) = diff --git a/src/mastodon.rs b/src/mastodon.rs index 84d4dea..c7086ec 100644 --- a/src/mastodon.rs +++ b/src/mastodon.rs @@ -6,6 +6,7 @@ use egg_mode::{ }; use elefren::{apps::App, prelude::*, scopes::Read, scopes::Scopes, scopes::Write}; use html_escape::decode_html_entities; +use regex::Regex; use std::{borrow::Cow, collections::HashMap, io::stdin}; /// Decodes the Twitter mention to something that will make sense once Twitter has joined the @@ -22,14 +23,34 @@ fn twitter_mentions(ums: &[MentionEntity]) -> HashMap { } /// Decodes urls from UrlEntities -fn decode_urls(urls: &[UrlEntity]) -> HashMap { +fn decode_urls( + urls: &[UrlEntity], + re: &Option, + alt_urls: &Option>, +) -> HashMap { urls.iter() .filter(|s| s.expanded_url.is_some()) .map(|s| { - ( - s.url.to_owned(), - s.expanded_url.as_deref().unwrap().to_owned(), - ) + (s.url.to_owned(), { + let mut def = s.expanded_url.as_deref().unwrap().to_owned(); + + if let Some(a) = &alt_urls { + for (url_source, url_destination) in a { + def = def.replace( + &format!("https://{}", url_source), + &format!("https://{}", url_destination), + ); + } + } + + if let Some(r) = &re { + if r.is_match(s.expanded_url.as_deref().unwrap()) { + def = s.display_url.clone(); + } + } + + def + }) }) .collect() } @@ -48,10 +69,15 @@ pub fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon { } /// Builds toot text from tweet -pub fn build_basic_status(tweet: &Tweet, mentions: &HashMap) -> String { +pub fn build_basic_status( + tweet: &Tweet, + mentions: &HashMap, + url_regex_filter: &Option, + url_alt_services: &Option>, +) -> String { let mut toot = tweet.text.to_owned(); - for decoded_url in decode_urls(&tweet.entities.urls) { + for decoded_url in decode_urls(&tweet.entities.urls, url_regex_filter, url_alt_services) { toot = toot.replace(&decoded_url.0, &decoded_url.1); } @@ -170,15 +196,51 @@ mod tests { url: "https://t.me/tamerelol".to_string(), }; - let twitter_urls = vec![url_entity1, url_entity2]; + let wrong_url_entity = UrlEntity { + display_url: "invité.es".to_string(), + expanded_url: Some("http://xn--invit-fsa.es".to_string()), + range: (85, 108), + url: "https://t.co/WAUgnpHLmo".to_string(), + }; - let mut expected_urls = HashMap::new(); - expected_urls.insert( - "https://t.me/tamerelol".to_string(), - "https://www.nintendojo.fr/dojobar".to_string(), - ); + let rewritten_url_entity = UrlEntity { + display_url: "youtu.be/w5TrSaoYmZ8".to_string(), + expanded_url: Some("https://youtu.be/w5TrSaoYmZ8".to_string()), + range: (0, 23), + url: "https://t.co/fUVYXuF7tg".to_string(), + }; - let decoded_urls = decode_urls(&twitter_urls); + let re = Regex::new("(.+)\\.es$").ok(); + + let alt: HashMap = HashMap::from([ + ("youtube.com".to_string(), "invidio.us".to_string()), + ("youtu.be".to_string(), "invidio.us".to_string()), + ("www.youtube.com".to_string(), "invidio.us".to_string()), + ]); + + let twitter_urls = vec![ + url_entity1, + url_entity2, + wrong_url_entity, + rewritten_url_entity, + ]; + + let expected_urls = HashMap::from([ + ( + "https://t.me/tamerelol".to_string(), + "https://www.nintendojo.fr/dojobar".to_string(), + ), + ( + "https://t.co/WAUgnpHLmo".to_string(), + "invité.es".to_string(), + ), + ( + "https://t.co/fUVYXuF7tg".to_string(), + "https://invidio.us/w5TrSaoYmZ8".to_string(), + ), + ]); + + let decoded_urls = decode_urls(&twitter_urls, &re, &Some(alt)); assert_eq!(expected_urls, decoded_urls); } @@ -247,7 +309,7 @@ mod tests { "@WillyWonka@chocolatefactory.org".to_string(), )]); - let t_out = build_basic_status(&t, &s); + let t_out = build_basic_status(&t, &s, &None, &None); assert_eq!(&t_out, "Mother 1 & 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance@twitter.com @WillyWonka@chocolatefactory.org https://www.youtube.com/watch?v=w5TrSaoYmZ8"); } diff --git a/tests/config.rs b/tests/config.rs index 81a63d9..e24323b 100644 --- a/tests/config.rs +++ b/tests/config.rs @@ -1,4 +1,33 @@ use scootaloo::parse_toml; +use std::collections::HashMap; + +#[test] +fn test_alt_services() { + let toml = parse_toml("tests/no_test_alt_services.toml"); + assert_eq!(toml.scootaloo.alternative_services_for, None); + + let toml = parse_toml("tests/test_alt_services.toml"); + assert_eq!( + toml.scootaloo.alternative_services_for, + Some(HashMap::from([ + ("tamere.lol".to_string(), "tonpere.mdr".to_string()), + ("you.pi".to_string(), "you.pla".to_string()) + ])) + ); +} + +#[test] +fn test_re_display() { + let toml = parse_toml("tests/no_show_url_as_display_url_for.toml"); + assert_eq!(toml.scootaloo.show_url_as_display_url_for, None); + + let toml = parse_toml("tests/show_url_as_display_url_for.toml"); + + assert_eq!( + toml.scootaloo.show_url_as_display_url_for, + Some("^(.+)\\.es$".to_string()) + ); +} #[test] fn test_page_size() { diff --git a/tests/no_show_url_as_display_url_for.toml b/tests/no_show_url_as_display_url_for.toml new file mode 100644 index 0000000..443dac7 --- /dev/null +++ b/tests/no_show_url_as_display_url_for.toml @@ -0,0 +1,19 @@ +[scootaloo] + +db_path="/var/random/scootaloo.sqlite" +cache_path="/tmp/scootaloo" + +[twitter] +consumer_key="rand consumer key" +consumer_secret="secret" +access_key="rand access key" +access_secret="super secret" + +[mastodon] +[mastodon.tamerelol] +twitter_screen_name="tamerelol" +base = "https://m.nintendojo.fr" +client_id = "rand client id" +client_secret = "secret" +redirect = "urn:ietf:wg:oauth:2.0:oob" +token = "super secret" diff --git a/tests/no_test_alt_services.toml b/tests/no_test_alt_services.toml new file mode 100644 index 0000000..443dac7 --- /dev/null +++ b/tests/no_test_alt_services.toml @@ -0,0 +1,19 @@ +[scootaloo] + +db_path="/var/random/scootaloo.sqlite" +cache_path="/tmp/scootaloo" + +[twitter] +consumer_key="rand consumer key" +consumer_secret="secret" +access_key="rand access key" +access_secret="super secret" + +[mastodon] +[mastodon.tamerelol] +twitter_screen_name="tamerelol" +base = "https://m.nintendojo.fr" +client_id = "rand client id" +client_secret = "secret" +redirect = "urn:ietf:wg:oauth:2.0:oob" +token = "super secret" diff --git a/tests/show_url_as_display_url_for.toml b/tests/show_url_as_display_url_for.toml new file mode 100644 index 0000000..32f742c --- /dev/null +++ b/tests/show_url_as_display_url_for.toml @@ -0,0 +1,20 @@ +[scootaloo] + +db_path="/var/random/scootaloo.sqlite" +cache_path="/tmp/scootaloo" +show_url_as_display_url_for = "^(.+)\\.es$" + +[twitter] +consumer_key="rand consumer key" +consumer_secret="secret" +access_key="rand access key" +access_secret="super secret" + +[mastodon] +[mastodon.tamerelol] +twitter_screen_name="tamerelol" +base = "https://m.nintendojo.fr" +client_id = "rand client id" +client_secret = "secret" +redirect = "urn:ietf:wg:oauth:2.0:oob" +token = "super secret" diff --git a/tests/test_alt_services.toml b/tests/test_alt_services.toml new file mode 100644 index 0000000..d782d89 --- /dev/null +++ b/tests/test_alt_services.toml @@ -0,0 +1,22 @@ +[scootaloo] + +db_path="/var/random/scootaloo.sqlite" +cache_path="/tmp/scootaloo" +[scootaloo.alternative_services_for] +"tamere.lol" = "tonpere.mdr" +"you.pi" = "you.pla" + +[twitter] +consumer_key="rand consumer key" +consumer_secret="secret" +access_key="rand access key" +access_secret="super secret" + +[mastodon] +[mastodon.tamerelol] +twitter_screen_name="tamerelol" +base = "https://m.nintendojo.fr" +client_id = "rand client id" +client_secret = "secret" +redirect = "urn:ietf:wg:oauth:2.0:oob" +token = "super secret"