refactor: build status text more progressively

This commit is contained in:
VC
2022-11-24 15:38:23 +01:00
parent 1132f41b9e
commit f42aa8cbb6
3 changed files with 222 additions and 203 deletions

View File

@@ -19,7 +19,7 @@ pub struct TwitterConfig {
pub page_size: Option<i32>,
}
#[derive(Debug, Deserialize)]
#[derive(Debug, Deserialize, Clone)]
pub struct MastodonConfig {
pub twitter_screen_name: String,
pub mastodon_screen_name: Option<String>,

View File

@@ -7,7 +7,9 @@ use config::Config;
mod mastodon;
pub use mastodon::register;
use mastodon::{build_basic_status, get_mastodon_token};
use mastodon::{
associate_urls, decode_urls, get_mastodon_token, replace_alt_services, twitter_mentions,
};
mod twitter;
use twitter::*;
@@ -21,10 +23,11 @@ use state::{read_state, write_state, TweetToToot};
use elefren::{prelude::*, status_builder::StatusBuilder, Language};
use futures::StreamExt;
use html_escape::decode_html_entities;
use log::info;
use regex::Regex;
use rusqlite::Connection;
use std::{collections::HashMap, sync::Arc};
use std::sync::Arc;
use tokio::{spawn, sync::Mutex};
const DEFAULT_RATE_LIMIT: usize = 4;
@@ -43,21 +46,7 @@ pub async fn run(config: Config) {
}),
));
let scootaloo_mentions: HashMap<String, String> = config
.mastodon
.values()
.filter(|s| s.mastodon_screen_name.is_some())
.map(|s| {
(
format!("@{}", s.twitter_screen_name),
format!(
"@{}@{}",
s.mastodon_screen_name.as_ref().unwrap(),
s.base.split('/').last().unwrap()
),
)
})
.collect();
let global_mastodon_config = Arc::new(Mutex::new(config.mastodon.clone()));
let display_url_re = config
.scootaloo
@@ -76,11 +65,11 @@ pub async fn run(config: Config) {
// create temporary value for each task
let scootaloo_cache_path = config.scootaloo.cache_path.clone();
let scootaloo_mentions = scootaloo_mentions.clone();
let scootaloo_alt_services = config.scootaloo.alternative_services_for.clone();
let display_url_re = display_url_re.clone();
let token = get_oauth2_token(&config.twitter);
let task_conn = conn.clone();
let global_mastodon_config = global_mastodon_config.clone();
spawn(async move {
info!("Starting treating {}", &mastodon_config.twitter_screen_name);
@@ -118,13 +107,32 @@ pub async fn run(config: Config) {
});
drop(lconn);
// build basic status by just yielding text and dereferencing contained urls
let mut status_text = build_basic_status(
tweet,
&scootaloo_mentions,
&display_url_re,
&scootaloo_alt_services,
);
// basic toot text
let mut status_text = tweet.text.clone();
// add mentions and smart mentions
if !&tweet.entities.user_mentions.is_empty() {
info!("Tweet contains mentions, add them!");
let global_mastodon_config = global_mastodon_config.lock().await;
twitter_mentions(
&mut status_text,
&tweet.entities.user_mentions,
&global_mastodon_config,
);
drop(global_mastodon_config);
}
if !&tweet.entities.urls.is_empty() {
info!("Tweet contains links, add them!");
let mut associated_urls =
associate_urls(&tweet.entities.urls, &display_url_re);
if let Some(a) = &scootaloo_alt_services {
replace_alt_services(&mut associated_urls, a);
}
decode_urls(&mut status_text, &associated_urls);
}
// building associative media list
let (media_url, status_medias) =
@@ -132,11 +140,15 @@ pub async fn run(config: Config) {
status_text = status_text.replace(&media_url, "");
// now that the text wont be altered anymore, we can safely remove HTML
// entities
status_text = decode_html_entities(&status_text).to_string();
info!("Building corresponding Mastodon status");
let mut status_builder = StatusBuilder::new();
status_builder.status(&status_text).media_ids(status_medias);
status_builder.status(status_text).media_ids(status_medias);
// theard if necessary
if let Some(i) = toot_reply_id {

View File

@@ -1,58 +1,84 @@
use crate::config::MastodonConfig;
use egg_mode::{
entities::{MentionEntity, UrlEntity},
tweet::Tweet,
};
use egg_mode::entities::{MentionEntity, UrlEntity};
use elefren::{apps::App, prelude::*, scopes::Read, scopes::Scopes, scopes::Write};
use html_escape::decode_html_entities;
use regex::Regex;
use std::{borrow::Cow, collections::HashMap, io::stdin};
/// Decodes the Twitter mention to something that will make sense once Twitter has joined the
/// Fediverse
fn twitter_mentions(ums: &[MentionEntity]) -> HashMap<String, String> {
ums.iter()
/// Fediverse. Users in the global user list of Scootaloo are rewritten, as they are Mastodon users
/// as well
pub fn twitter_mentions(
toot: &mut String,
ums: &[MentionEntity],
masto: &HashMap<String, MastodonConfig>,
) {
let tm: HashMap<String, String> = ums
.iter()
.map(|s| {
(
format!("@{}", s.screen_name),
format!("@{}@twitter.com", s.screen_name),
)
})
.collect()
.chain(
masto
.values()
.filter(|s| s.mastodon_screen_name.is_some())
.map(|s| {
(
format!("@{}", s.twitter_screen_name),
format!(
"@{}@{}",
s.mastodon_screen_name.as_ref().unwrap(),
s.base.split('/').last().unwrap()
),
)
})
.collect::<HashMap<String, String>>(),
)
.collect();
for (k, v) in tm {
*toot = toot.replace(&k, &v);
}
}
/// Decodes urls from UrlEntities
fn decode_urls(
urls: &[UrlEntity],
re: &Option<Regex>,
alt_urls: &Option<HashMap<String, String>>,
) -> HashMap<String, String> {
/// Decodes urls in toot
pub fn decode_urls(toot: &mut String, urls: &HashMap<String, String>) {
for (k, v) in urls {
*toot = toot.replace(k, v);
}
}
/// Reassociates source url with destination url for rewritting
/// this takes a Teet UrlEntity and an optional Regex
pub fn associate_urls(urls: &[UrlEntity], re: &Option<Regex>) -> HashMap<String, String> {
urls.iter()
.filter(|s| s.expanded_url.is_some())
.map(|s| {
(s.url.to_owned(), {
let mut def = s.expanded_url.as_deref().unwrap().to_owned();
if let Some(a) = &alt_urls {
for (url_source, url_destination) in a {
def = def.replace(
&format!("https://{}", url_source),
&format!("https://{}", url_destination),
);
}
}
if let Some(r) = &re {
if let Some(r) = re {
if r.is_match(s.expanded_url.as_deref().unwrap()) {
def = s.display_url.clone();
def = s.display_url.to_owned();
}
}
def
})
})
.collect()
.collect::<HashMap<String, String>>()
}
/// Replaces the commonly used services by mirrors, if asked to
pub fn replace_alt_services(urls: &mut HashMap<String, String>, alts: &HashMap<String, String>) {
for val in urls.values_mut() {
for (k, v) in alts {
*val = val.replace(&format!("/{}/", k), &format!("/{}/", v));
}
}
}
/// Gets Mastodon Data
@@ -68,30 +94,6 @@ pub fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
Mastodon::from(data)
}
/// Builds toot text from tweet
pub fn build_basic_status(
tweet: &Tweet,
mentions: &HashMap<String, String>,
url_regex_filter: &Option<Regex>,
url_alt_services: &Option<HashMap<String, String>>,
) -> String {
let mut toot = tweet.text.to_owned();
for decoded_url in decode_urls(&tweet.entities.urls, url_regex_filter, url_alt_services) {
toot = toot.replace(&decoded_url.0, &decoded_url.1);
}
for decoded_mention in twitter_mentions(&tweet.entities.user_mentions)
.into_iter()
.chain(mentions.to_owned())
.collect::<HashMap<String, String>>()
{
toot = toot.replace(&decoded_mention.0, &decoded_mention.1);
}
decode_html_entities(&toot).to_string()
}
/// Generic register function
/// As this function is supposed to be run only once, it will panic for every error it encounters
/// Most of this function is a direct copy/paste of the official `elefren` crate
@@ -155,74 +157,90 @@ mastodon_screen_name = \"{}\"
mod tests {
use super::*;
use chrono::prelude::*;
use egg_mode::tweet::TweetEntities;
#[test]
fn test_twitter_mentions() {
let mention_entity = MentionEntity {
id: 12345,
range: (1, 3),
name: "Ta Mere l0l".to_string(),
screen_name: "tamerelol".to_string(),
};
let mention_entities = vec![
MentionEntity {
id: 12345,
range: (1, 3),
name: "Ta Mere l0l".to_string(),
screen_name: "tamerelol".to_string(),
},
MentionEntity {
id: 6789,
range: (1, 3),
name: "TONPERE".to_string(),
screen_name: "tonpere".to_string(),
},
];
let twitter_ums = vec![mention_entity];
let mut toot = ":kikoo: @tamerelol @tonpere !".to_string();
let mut expected_mentions = HashMap::new();
expected_mentions.insert(
"@tamerelol".to_string(),
"@tamerelol@twitter.com".to_string(),
);
let scootaloo_config = HashMap::from([(
"test".to_string(),
(MastodonConfig {
twitter_screen_name: "tonpere".to_string(),
mastodon_screen_name: Some("lalali".to_string()),
twitter_page_size: None,
base: "https://mstdn.net".to_string(),
client_id: "".to_string(),
client_secret: "".to_string(),
redirect: "".to_string(),
token: "".to_string(),
}),
)]);
let decoded_mentions = twitter_mentions(&twitter_ums);
twitter_mentions(&mut toot, &mention_entities, &scootaloo_config);
assert_eq!(expected_mentions, decoded_mentions);
assert_eq!(&toot, ":kikoo: @tamerelol@twitter.com @lalali@mstdn.net !");
}
#[test]
fn test_decode_urls() {
let url_entity1 = UrlEntity {
display_url: "tamerelol".to_string(),
expanded_url: Some("https://www.nintendojo.fr/dojobar".to_string()),
range: (1, 3),
url: "https://t.me/tamerelol".to_string(),
};
let url_entity2 = UrlEntity {
display_url: "tamerelol".to_string(),
expanded_url: None,
range: (1, 3),
url: "https://t.me/tamerelol".to_string(),
};
let wrong_url_entity = UrlEntity {
display_url: "invité.es".to_string(),
expanded_url: Some("http://xn--invit-fsa.es".to_string()),
range: (85, 108),
url: "https://t.co/WAUgnpHLmo".to_string(),
};
let rewritten_url_entity = UrlEntity {
display_url: "youtu.be/w5TrSaoYmZ8".to_string(),
expanded_url: Some("https://youtu.be/w5TrSaoYmZ8".to_string()),
range: (0, 23),
url: "https://t.co/fUVYXuF7tg".to_string(),
};
let re = Regex::new("(.+)\\.es$").ok();
let alt: HashMap<String, String> = HashMap::from([
("youtube.com".to_string(), "invidio.us".to_string()),
("youtu.be".to_string(), "invidio.us".to_string()),
("www.youtube.com".to_string(), "invidio.us".to_string()),
let urls = HashMap::from([
(
"https://t.co/thisisatest".to_string(),
"https://www.nintendojo.fr/dojobar".to_string(),
),
(
"https://t.co/nopenotinclusive".to_string(),
"invité.es".to_string(),
),
]);
let twitter_urls = vec![
url_entity1,
url_entity2,
wrong_url_entity,
rewritten_url_entity,
let mut toot =
"Rendez-vous sur https://t.co/thisisatest avec nos https://t.co/nopenotinclusive !"
.to_string();
decode_urls(&mut toot, &urls);
assert_eq!(
&toot,
"Rendez-vous sur https://www.nintendojo.fr/dojobar avec nos invité.es !"
);
}
#[test]
fn test_associate_urls() {
let urls = vec![
UrlEntity {
display_url: "tamerelol".to_string(),
expanded_url: Some("https://www.nintendojo.fr/dojobar".to_string()),
range: (1, 3),
url: "https://t.me/tamerelol".to_string(),
},
UrlEntity {
display_url: "sadcat".to_string(),
expanded_url: None,
range: (1, 3),
url: "https://t.me/sadcat".to_string(),
},
UrlEntity {
display_url: "invité.es".to_string(),
expanded_url: Some("http://xn--invit-fsa.es".to_string()),
range: (85, 108),
url: "https://t.co/WAUgnpHLmo".to_string(),
},
];
let expected_urls = HashMap::from([
@@ -234,83 +252,72 @@ mod tests {
"https://t.co/WAUgnpHLmo".to_string(),
"invité.es".to_string(),
),
(
"https://t.co/fUVYXuF7tg".to_string(),
"https://invidio.us/w5TrSaoYmZ8".to_string(),
),
]);
let decoded_urls = decode_urls(&twitter_urls, &re, &Some(alt));
let re = Regex::new("(.+)\\.es$").ok();
assert_eq!(expected_urls, decoded_urls);
let associated_urls = associate_urls(&urls, &re);
assert_eq!(associated_urls, expected_urls);
}
#[test]
fn test_build_basic_status() {
let t = Tweet {
coordinates: None,
created_at: Utc::now(),
current_user_retweet: None,
display_text_range: None,
entities: TweetEntities {
hashtags: vec![],
symbols: vec![],
urls: vec![
UrlEntity {
display_url: "youtube.com/watch?v=w5TrSa…".to_string(),
expanded_url: Some("https://www.youtube.com/watch?v=w5TrSaoYmZ8".to_string()),
range: (93, 116),
url: "https://t.co/zXw0FfX2Nt".to_string(),
}
],
user_mentions: vec![
MentionEntity {
id: 491500016,
range: (80, 95),
name: "Nintendo France".to_string(),
screen_name: "NintendoFrance".to_string(),
},
MentionEntity {
id: 999999999,
range: (80, 95),
name: "Willy Wonka".to_string(),
screen_name: "WillyWonka".to_string(),
},
],
media: None,
},
extended_entities: None,
favorite_count: 0,
favorited: None,
filter_level: None,
id: 1491541246984306693,
in_reply_to_user_id: None,
in_reply_to_screen_name: None,
in_reply_to_status_id: None,
lang: None,
place: None,
possibly_sensitive: None,
quoted_status: None,
quoted_status_id: None,
retweet_count: 0,
retweeted: None,
retweeted_status: None,
source: None,
text: "Mother 1 &amp; 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance @WillyWonka https://t.co/zXw0FfX2Nt".to_string(),
truncated: false,
user: None,
withheld_copyright: false,
withheld_in_countries: None,
withheld_scope: None,
};
fn test_replace_alt_services() {
let mut associated_urls = HashMap::from([
(
"https://t.co/youplaboom".to_string(),
"https://www.youtube.com/watch?v=dQw4w9WgXcQ".to_string(),
),
(
"https://t.co/thisisfine".to_string(),
"https://twitter.com/Nintendo/status/1594590628771688448".to_string(),
),
(
"https://t.co/nopenope".to_string(),
"https://www.nintendojo.fr/dojobar".to_string(),
),
(
"https://t.co/broken".to_string(),
"http://youtu.be".to_string(),
),
(
"https://t.co/alsobroken".to_string(),
"https://youtube.com".to_string(),
),
]);
let s: HashMap<String, String> = HashMap::from([(
"@WillyWonka".to_string(),
"@WillyWonka@chocolatefactory.org".to_string(),
)]);
let alt_services = HashMap::from([
("twitter.com".to_string(), "nitter.net".to_string()),
("youtu.be".to_string(), "invidio.us".to_string()),
("www.youtube.com".to_string(), "invidio.us".to_string()),
("youtube.com".to_string(), "invidio.us".to_string()),
]);
let t_out = build_basic_status(&t, &s, &None, &None);
let expected_urls = HashMap::from([
(
"https://t.co/youplaboom".to_string(),
"https://invidio.us/watch?v=dQw4w9WgXcQ".to_string(),
),
(
"https://t.co/thisisfine".to_string(),
"https://nitter.net/Nintendo/status/1594590628771688448".to_string(),
),
(
"https://t.co/nopenope".to_string(),
"https://www.nintendojo.fr/dojobar".to_string(),
),
(
"https://t.co/broken".to_string(),
"http://youtu.be".to_string(),
),
(
"https://t.co/alsobroken".to_string(),
"https://youtube.com".to_string(),
),
]);
assert_eq!(&t_out, "Mother 1 & 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance@twitter.com @WillyWonka@chocolatefactory.org https://www.youtube.com/watch?v=w5TrSaoYmZ8");
replace_alt_services(&mut associated_urls, &alt_services);
assert_eq!(associated_urls, expected_urls);
}
}