mirror of
https://framagit.org/veretcle/scootaloo.git
synced 2025-07-20 17:11:19 +02:00
459 lines
14 KiB
Rust
459 lines
14 KiB
Rust
// std
|
||
use std::{
|
||
borrow::Cow,
|
||
collections::HashMap,
|
||
io::stdin,
|
||
fmt,
|
||
fs::{read_to_string, write},
|
||
error::Error,
|
||
sync::{Arc, Mutex},
|
||
};
|
||
|
||
// toml
|
||
use serde::Deserialize;
|
||
|
||
// egg-mode
|
||
use egg_mode::{
|
||
Token,
|
||
KeyPair,
|
||
entities::{UrlEntity, MediaEntity, MentionEntity, MediaType},
|
||
user::UserID,
|
||
tweet::{
|
||
Tweet,
|
||
user_timeline,
|
||
},
|
||
};
|
||
|
||
// elefren
|
||
use elefren::{
|
||
prelude::*,
|
||
apps::App,
|
||
status_builder::StatusBuilder,
|
||
scopes::Scopes,
|
||
};
|
||
|
||
// reqwest
|
||
use reqwest::Url;
|
||
|
||
// tokio
|
||
use tokio::{
|
||
io::copy,
|
||
fs::{File, create_dir_all, remove_file},
|
||
sync::mpsc,
|
||
};
|
||
|
||
// htmlescape
|
||
use htmlescape::decode_html;
|
||
|
||
// log
|
||
use log::{info, warn, error, debug};
|
||
|
||
/**********
|
||
* Generic usage functions
|
||
***********/
|
||
/*
|
||
* Those functions are related to the Twitter side of things
|
||
*/
|
||
/// Reads last tweet id from a file
|
||
fn read_state(s: &str) -> Option<u64> {
|
||
let state = read_to_string(s);
|
||
|
||
if let Ok(s) = state {
|
||
debug!("Last Tweet ID (from file): {}", &s);
|
||
return s.parse::<u64>().ok();
|
||
}
|
||
|
||
None
|
||
}
|
||
|
||
/// Writes last treated tweet id to a file
|
||
fn write_state(f: &str, s: u64) -> Result<(), std::io::Error> {
|
||
write(f, format!("{}", s))
|
||
}
|
||
|
||
/// Gets Twitter oauth2 token
|
||
fn get_oauth2_token(config: &Config) -> Token {
|
||
let con_token = KeyPair::new(String::from(&config.twitter.consumer_key), String::from(&config.twitter.consumer_secret));
|
||
let access_token = KeyPair::new(String::from(&config.twitter.access_key), String::from(&config.twitter.access_secret));
|
||
|
||
Token::Access {
|
||
consumer: con_token,
|
||
access: access_token,
|
||
}
|
||
}
|
||
|
||
/// Gets Twitter user timeline
|
||
async fn get_user_timeline(config: &Config, token: Token, lid: Option<u64>) -> Result<Vec<Tweet>, Box<dyn Error>> {
|
||
// fix the page size to 200 as it is the maximum Twitter authorizes
|
||
let (_, feed) = user_timeline(UserID::from(String::from(&config.twitter.username)), true, false, &token)
|
||
.with_page_size(200)
|
||
.older(lid)
|
||
.await?;
|
||
|
||
Ok(feed.to_vec())
|
||
}
|
||
|
||
/// Decodes urls from UrlEntities
|
||
fn decode_urls(urls: &Vec<UrlEntity>) -> HashMap<String, String> {
|
||
let mut decoded_urls = HashMap::new();
|
||
|
||
for url in urls {
|
||
if url.expanded_url.is_some() {
|
||
// unwrap is safe here as we just verified that there is something inside expanded_url
|
||
decoded_urls.insert(String::from(&url.url), String::from(url.expanded_url.as_deref().unwrap()));
|
||
}
|
||
}
|
||
|
||
decoded_urls
|
||
}
|
||
|
||
/// Decodes the Twitter mention to something that will make sense once Twitter has joined the
|
||
/// Fediverse
|
||
fn twitter_mentions(ums: &Vec<MentionEntity>) -> HashMap<String, String> {
|
||
let mut decoded_mentions = HashMap::new();
|
||
|
||
for um in ums {
|
||
decoded_mentions.insert(format!("@{}", um.screen_name), format!("@{}@twitter.com", um.screen_name));
|
||
}
|
||
|
||
decoded_mentions
|
||
}
|
||
|
||
/// Retrieves a single media from a tweet and store it in a temporary file
|
||
async fn get_tweet_media(m: &MediaEntity, t: &str) -> Result<String, Box<dyn Error>> {
|
||
match m.media_type {
|
||
MediaType::Photo => {
|
||
return cache_media(&m.media_url_https, t).await;
|
||
},
|
||
_ => {
|
||
match &m.video_info {
|
||
Some(v) => {
|
||
for variant in &v.variants {
|
||
if variant.content_type == "video/mp4" {
|
||
return cache_media(&variant.url, t).await;
|
||
}
|
||
}
|
||
return Err(Box::new(ScootalooError::new(format!("Media Type for {} is video but no mp4 file URL is available", &m.url).as_str())));
|
||
},
|
||
None => {
|
||
return Err(Box::new(ScootalooError::new(format!("Media Type for {} is video but does not contain any video_info", &m.url).as_str())));
|
||
},
|
||
}
|
||
},
|
||
};
|
||
}
|
||
|
||
/*
|
||
* Those functions are related to the Mastodon side of things
|
||
*/
|
||
/// Gets Mastodon Data
|
||
fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
|
||
let data = Data {
|
||
base: Cow::from(String::from(&masto.base)),
|
||
client_id: Cow::from(String::from(&masto.client_id)),
|
||
client_secret: Cow::from(String::from(&masto.client_secret)),
|
||
redirect: Cow::from(String::from(&masto.redirect)),
|
||
token: Cow::from(String::from(&masto.token)),
|
||
};
|
||
|
||
Mastodon::from(data)
|
||
}
|
||
|
||
/// Builds toot text from tweet
|
||
fn build_basic_status(tweet: &Tweet) -> Result<String, Box<dyn Error>> {
|
||
let mut toot = String::from(&tweet.text);
|
||
|
||
let decoded_urls = decode_urls(&tweet.entities.urls);
|
||
|
||
for decoded_url in decoded_urls {
|
||
toot = toot.replace(&decoded_url.0, &decoded_url.1);
|
||
}
|
||
|
||
let decoded_mentions = twitter_mentions(&tweet.entities.user_mentions);
|
||
|
||
for decoded_mention in decoded_mentions {
|
||
toot = toot.replace(&decoded_mention.0, &decoded_mention.1);
|
||
}
|
||
|
||
if let Ok(t) = decode_html(&toot) {
|
||
toot = t;
|
||
}
|
||
|
||
Ok(toot)
|
||
}
|
||
|
||
/*
|
||
* Generic private functions
|
||
*/
|
||
|
||
/// Gets and caches Twitter Media inside the determined temp dir
|
||
async fn cache_media(u: &str, t: &str) -> Result<String, Box<dyn Error>> {
|
||
// create dir
|
||
create_dir_all(t).await?;
|
||
|
||
// get file
|
||
let mut response = reqwest::get(u).await?;
|
||
|
||
// create local file
|
||
let url = Url::parse(u)?;
|
||
let dest_filename = url.path_segments().ok_or_else(|| Box::new(ScootalooError::new(format!("Cannot determine the destination filename for {}", u).as_str())))?
|
||
.last().ok_or_else(|| Box::new(ScootalooError::new(format!("Cannot determine the destination filename for {}", u).as_str())))?;
|
||
|
||
let dest_filepath = format!("{}/{}", t, dest_filename);
|
||
|
||
let mut dest_file = File::create(&dest_filepath).await?;
|
||
|
||
while let Some(chunk) = response.chunk().await? {
|
||
copy(&mut &*chunk, &mut dest_file).await?;
|
||
}
|
||
|
||
Ok(dest_filepath)
|
||
}
|
||
|
||
/**********
|
||
* This is the struct that holds the Mastodon Media ID and the Twitter Media URL at the same Time
|
||
**********/
|
||
#[derive(Debug)]
|
||
struct ScootalooSpawnResponse {
|
||
mastodon_media_id: String,
|
||
twitter_media_url: String,
|
||
}
|
||
|
||
/**********
|
||
* local error handler
|
||
**********/
|
||
#[derive(Debug)]
|
||
struct ScootalooError {
|
||
details: String,
|
||
}
|
||
|
||
impl ScootalooError {
|
||
fn new(msg: &str) -> ScootalooError {
|
||
ScootalooError {
|
||
details: String::from(msg),
|
||
}
|
||
}
|
||
}
|
||
|
||
impl fmt::Display for ScootalooError {
|
||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||
write!(f, "{}", self.details)
|
||
}
|
||
}
|
||
|
||
impl std::error::Error for ScootalooError {
|
||
fn description(&self) -> &str {
|
||
&self.details
|
||
}
|
||
}
|
||
|
||
/**********
|
||
* Config structure
|
||
***********/
|
||
/// General configuration Struct
|
||
#[derive(Debug, Deserialize)]
|
||
pub struct Config {
|
||
twitter: TwitterConfig,
|
||
mastodon: MastodonConfig,
|
||
scootaloo: ScootalooConfig,
|
||
}
|
||
|
||
#[derive(Debug, Deserialize)]
|
||
struct TwitterConfig {
|
||
username: String,
|
||
consumer_key: String,
|
||
consumer_secret: String,
|
||
access_key: String,
|
||
access_secret: String,
|
||
}
|
||
|
||
#[derive(Debug, Deserialize)]
|
||
struct MastodonConfig {
|
||
base: String,
|
||
client_id: String,
|
||
client_secret: String,
|
||
redirect: String,
|
||
token: String,
|
||
}
|
||
|
||
#[derive(Debug, Deserialize)]
|
||
struct ScootalooConfig {
|
||
last_tweet_path: String,
|
||
cache_path: String,
|
||
}
|
||
|
||
/*********
|
||
* Main functions
|
||
*********/
|
||
/// Parses the TOML file into a Config Struct
|
||
pub fn parse_toml(toml_file: &str) -> Config {
|
||
let toml_config = read_to_string(toml_file).unwrap_or_else(|e|
|
||
panic!("Cannot open config file {}: {}", toml_file, e)
|
||
);
|
||
|
||
let config: Config = toml::from_str(&toml_config).unwrap_or_else(|e|
|
||
panic!("Cannot parse TOML file {}: {}", toml_file, e)
|
||
);
|
||
|
||
config
|
||
}
|
||
|
||
/// Generic register function
|
||
/// As this function is supposed to be run only once, it will panic for every error it encounters
|
||
/// Most of this function is a direct copy/paste of the official `elefren` crate
|
||
pub fn register(host: &str) {
|
||
let mut builder = App::builder();
|
||
builder.client_name(Cow::from(String::from(env!("CARGO_PKG_NAME"))))
|
||
.redirect_uris(Cow::from(String::from("urn:ietf:wg:oauth:2.0:oob")))
|
||
.scopes(Scopes::write_all())
|
||
.website(Cow::from(String::from("https://framagit.org/veretcle/scootaloo")));
|
||
|
||
let app = builder.build().expect("Cannot build the app");
|
||
|
||
let registration = Registration::new(host).register(app).expect("Cannot build registration object");
|
||
let url = registration.authorize_url().expect("Cannot generate registration URI!");
|
||
|
||
println!("Click this link to authorize on Mastodon: {}", url);
|
||
println!("Paste the returned authorization code: ");
|
||
|
||
let mut input = String::new();
|
||
stdin().read_line(&mut input).expect("Unable to read back registration code!");
|
||
|
||
let code = input.trim();
|
||
let mastodon = registration.complete(code).expect("Unable to create access token!");
|
||
|
||
let toml = toml::to_string(&*mastodon).unwrap();
|
||
|
||
println!("Please insert the following block at the end of your configuration file:\n[mastodon]\n{}", toml);
|
||
}
|
||
|
||
/// This is where the magic happens
|
||
#[tokio::main]
|
||
pub async fn run(config: Config) {
|
||
// retrieve the last tweet ID for the username
|
||
let last_tweet_id = read_state(&config.scootaloo.last_tweet_path);
|
||
|
||
// get OAuth2 token
|
||
let token = get_oauth2_token(&config);
|
||
|
||
// get Mastodon instance
|
||
let mastodon = Arc::new(Mutex::new(get_mastodon_token(&config.mastodon)));
|
||
|
||
// get user timeline feed (Vec<tweet>)
|
||
let mut feed = get_user_timeline(&config, token, last_tweet_id)
|
||
.await
|
||
.unwrap_or_else(|e|
|
||
panic!("Something went wrong when trying to retrieve {}’s timeline: {}", &config.twitter.username, e)
|
||
);
|
||
|
||
// empty feed -> exiting
|
||
if feed.is_empty() {
|
||
info!("Nothing to retrieve since last time, exiting…");
|
||
return;
|
||
}
|
||
|
||
// order needs to be chronological
|
||
feed.reverse();
|
||
|
||
for tweet in &feed {
|
||
debug!("Treating Tweet {} inside feed", tweet.id);
|
||
// determine if the tweet is part of a thread (response to self) or a standard response
|
||
if let Some(r) = &tweet.in_reply_to_screen_name {
|
||
if &r.to_lowercase() != &config.twitter.username.to_lowercase() {
|
||
// we are responding not threading
|
||
info!("Tweet is a direct response, skipping");
|
||
continue;
|
||
}
|
||
};
|
||
|
||
// build basic status by just yielding text and dereferencing contained urls
|
||
let mut status_text = match build_basic_status(tweet) {
|
||
Ok(t) => t,
|
||
Err(e) => {
|
||
error!("Could not create status from tweet {}: {}", tweet.id ,e);
|
||
continue;
|
||
},
|
||
};
|
||
|
||
let mut status_medias: Vec<String> = vec![];
|
||
|
||
// reupload the attachments if any
|
||
if let Some(m) = &tweet.extended_entities {
|
||
let (tx, mut rx) = mpsc::channel(4);
|
||
|
||
for media in &m.media {
|
||
// creating a new tx for this initial loop
|
||
let tx = tx.clone();
|
||
// creating a new mastodon from the original mutex
|
||
let mastodon = mastodon.clone();
|
||
// unfortunately for this to be thread safe, we need to clone a lot of structures
|
||
let media = media.clone();
|
||
let cache_path = config.scootaloo.cache_path.clone();
|
||
|
||
tokio::spawn(async move {
|
||
debug!("Spawing new async thread to treat {}", &media.id);
|
||
let local_tweet_media_path = match get_tweet_media(&media, &cache_path).await {
|
||
Ok(m) => m,
|
||
Err(e) => {
|
||
// we could have panicked here, no issue, but I’m not confortable using
|
||
// that for now
|
||
warn!("Cannot get tweet media for {}: {}", &media.url, e);
|
||
return;
|
||
}
|
||
};
|
||
|
||
// we cannot directly do all the stuff inside here because mastodon lock can
|
||
// live outside this
|
||
let mas_result = mastodon.lock().unwrap().media(Cow::from(String::from(&local_tweet_media_path)));
|
||
|
||
match mas_result {
|
||
Ok(m) => {
|
||
remove_file(&local_tweet_media_path).await.unwrap_or_else(|e|
|
||
warn!("Attachment {} has been uploaded but I’m unable to remove the existing file: {}", &local_tweet_media_path, e)
|
||
);
|
||
// we can unwrap here because we’re in a thread
|
||
tx.send(ScootalooSpawnResponse {
|
||
mastodon_media_id: m.id.clone(),
|
||
twitter_media_url: local_tweet_media_path.clone()
|
||
}).await.unwrap();
|
||
},
|
||
Err(e) => {
|
||
error!("Attachment {} cannot be uploaded to Mastodon Instance: {}", &local_tweet_media_path, e);
|
||
}
|
||
}
|
||
});
|
||
}
|
||
|
||
// dropping the last tx otherwise recv() will wait indefinitely
|
||
drop(tx);
|
||
|
||
while let Some(i) = rx.recv().await {
|
||
// pushes the media into the media vec
|
||
status_medias.push(i.mastodon_media_id);
|
||
// removes the URL from the original Tweet text
|
||
status_text = status_text.replace(&i.twitter_media_url, "");
|
||
}
|
||
}
|
||
// finished reuploading attachments, now let’s do the toot baby!
|
||
|
||
debug!("Building corresponding Mastodon status");
|
||
let status = StatusBuilder::new()
|
||
.status(&status_text)
|
||
.media_ids(status_medias)
|
||
.build()
|
||
.expect(format!("Cannot build status with text {}", &status_text).as_str());
|
||
|
||
// publish status
|
||
// again unwrap is safe here as we are in the main thread
|
||
mastodon.lock().unwrap().new_status(status).unwrap();
|
||
// this will panic if it cannot publish the status, which is a good thing, it allows the
|
||
// last_tweet gathered not to be written
|
||
|
||
// write the current state (tweet ID) to avoid copying it another time
|
||
write_state(&config.scootaloo.last_tweet_path, tweet.id).unwrap_or_else(|e|
|
||
panic!("Can’t write the last tweet retrieved: {}", e)
|
||
);
|
||
}
|
||
}
|
||
|