Files
scootaloo/src/lib.rs

459 lines
14 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// std
use std::{
borrow::Cow,
collections::HashMap,
io::stdin,
fmt,
fs::{read_to_string, write},
error::Error,
sync::{Arc, Mutex},
};
// toml
use serde::Deserialize;
// egg-mode
use egg_mode::{
Token,
KeyPair,
entities::{UrlEntity, MediaEntity, MentionEntity, MediaType},
user::UserID,
tweet::{
Tweet,
user_timeline,
},
};
// elefren
use elefren::{
prelude::*,
apps::App,
status_builder::StatusBuilder,
scopes::Scopes,
};
// reqwest
use reqwest::Url;
// tokio
use tokio::{
io::copy,
fs::{File, create_dir_all, remove_file},
sync::mpsc,
};
// htmlescape
use htmlescape::decode_html;
// log
use log::{info, warn, error, debug};
/**********
* Generic usage functions
***********/
/*
* Those functions are related to the Twitter side of things
*/
/// Reads last tweet id from a file
fn read_state(s: &str) -> Option<u64> {
let state = read_to_string(s);
if let Ok(s) = state {
debug!("Last Tweet ID (from file): {}", &s);
return s.parse::<u64>().ok();
}
None
}
/// Writes last treated tweet id to a file
fn write_state(f: &str, s: u64) -> Result<(), std::io::Error> {
write(f, format!("{}", s))
}
/// Gets Twitter oauth2 token
fn get_oauth2_token(config: &Config) -> Token {
let con_token = KeyPair::new(String::from(&config.twitter.consumer_key), String::from(&config.twitter.consumer_secret));
let access_token = KeyPair::new(String::from(&config.twitter.access_key), String::from(&config.twitter.access_secret));
Token::Access {
consumer: con_token,
access: access_token,
}
}
/// Gets Twitter user timeline
async fn get_user_timeline(config: &Config, token: Token, lid: Option<u64>) -> Result<Vec<Tweet>, Box<dyn Error>> {
// fix the page size to 200 as it is the maximum Twitter authorizes
let (_, feed) = user_timeline(UserID::from(String::from(&config.twitter.username)), true, false, &token)
.with_page_size(200)
.older(lid)
.await?;
Ok(feed.to_vec())
}
/// Decodes urls from UrlEntities
fn decode_urls(urls: &Vec<UrlEntity>) -> HashMap<String, String> {
let mut decoded_urls = HashMap::new();
for url in urls {
if url.expanded_url.is_some() {
// unwrap is safe here as we just verified that there is something inside expanded_url
decoded_urls.insert(String::from(&url.url), String::from(url.expanded_url.as_deref().unwrap()));
}
}
decoded_urls
}
/// Decodes the Twitter mention to something that will make sense once Twitter has joined the
/// Fediverse
fn twitter_mentions(ums: &Vec<MentionEntity>) -> HashMap<String, String> {
let mut decoded_mentions = HashMap::new();
for um in ums {
decoded_mentions.insert(format!("@{}", um.screen_name), format!("@{}@twitter.com", um.screen_name));
}
decoded_mentions
}
/// Retrieves a single media from a tweet and store it in a temporary file
async fn get_tweet_media(m: &MediaEntity, t: &str) -> Result<String, Box<dyn Error>> {
match m.media_type {
MediaType::Photo => {
return cache_media(&m.media_url_https, t).await;
},
_ => {
match &m.video_info {
Some(v) => {
for variant in &v.variants {
if variant.content_type == "video/mp4" {
return cache_media(&variant.url, t).await;
}
}
return Err(Box::new(ScootalooError::new(format!("Media Type for {} is video but no mp4 file URL is available", &m.url).as_str())));
},
None => {
return Err(Box::new(ScootalooError::new(format!("Media Type for {} is video but does not contain any video_info", &m.url).as_str())));
},
}
},
};
}
/*
* Those functions are related to the Mastodon side of things
*/
/// Gets Mastodon Data
fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
let data = Data {
base: Cow::from(String::from(&masto.base)),
client_id: Cow::from(String::from(&masto.client_id)),
client_secret: Cow::from(String::from(&masto.client_secret)),
redirect: Cow::from(String::from(&masto.redirect)),
token: Cow::from(String::from(&masto.token)),
};
Mastodon::from(data)
}
/// Builds toot text from tweet
fn build_basic_status(tweet: &Tweet) -> Result<String, Box<dyn Error>> {
let mut toot = String::from(&tweet.text);
let decoded_urls = decode_urls(&tweet.entities.urls);
for decoded_url in decoded_urls {
toot = toot.replace(&decoded_url.0, &decoded_url.1);
}
let decoded_mentions = twitter_mentions(&tweet.entities.user_mentions);
for decoded_mention in decoded_mentions {
toot = toot.replace(&decoded_mention.0, &decoded_mention.1);
}
if let Ok(t) = decode_html(&toot) {
toot = t;
}
Ok(toot)
}
/*
* Generic private functions
*/
/// Gets and caches Twitter Media inside the determined temp dir
async fn cache_media(u: &str, t: &str) -> Result<String, Box<dyn Error>> {
// create dir
create_dir_all(t).await?;
// get file
let mut response = reqwest::get(u).await?;
// create local file
let url = Url::parse(u)?;
let dest_filename = url.path_segments().ok_or_else(|| Box::new(ScootalooError::new(format!("Cannot determine the destination filename for {}", u).as_str())))?
.last().ok_or_else(|| Box::new(ScootalooError::new(format!("Cannot determine the destination filename for {}", u).as_str())))?;
let dest_filepath = format!("{}/{}", t, dest_filename);
let mut dest_file = File::create(&dest_filepath).await?;
while let Some(chunk) = response.chunk().await? {
copy(&mut &*chunk, &mut dest_file).await?;
}
Ok(dest_filepath)
}
/**********
* This is the struct that holds the Mastodon Media ID and the Twitter Media URL at the same Time
**********/
#[derive(Debug)]
struct ScootalooSpawnResponse {
mastodon_media_id: String,
twitter_media_url: String,
}
/**********
* local error handler
**********/
#[derive(Debug)]
struct ScootalooError {
details: String,
}
impl ScootalooError {
fn new(msg: &str) -> ScootalooError {
ScootalooError {
details: String::from(msg),
}
}
}
impl fmt::Display for ScootalooError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.details)
}
}
impl std::error::Error for ScootalooError {
fn description(&self) -> &str {
&self.details
}
}
/**********
* Config structure
***********/
/// General configuration Struct
#[derive(Debug, Deserialize)]
pub struct Config {
twitter: TwitterConfig,
mastodon: MastodonConfig,
scootaloo: ScootalooConfig,
}
#[derive(Debug, Deserialize)]
struct TwitterConfig {
username: String,
consumer_key: String,
consumer_secret: String,
access_key: String,
access_secret: String,
}
#[derive(Debug, Deserialize)]
struct MastodonConfig {
base: String,
client_id: String,
client_secret: String,
redirect: String,
token: String,
}
#[derive(Debug, Deserialize)]
struct ScootalooConfig {
last_tweet_path: String,
cache_path: String,
}
/*********
* Main functions
*********/
/// Parses the TOML file into a Config Struct
pub fn parse_toml(toml_file: &str) -> Config {
let toml_config = read_to_string(toml_file).unwrap_or_else(|e|
panic!("Cannot open config file {}: {}", toml_file, e)
);
let config: Config = toml::from_str(&toml_config).unwrap_or_else(|e|
panic!("Cannot parse TOML file {}: {}", toml_file, e)
);
config
}
/// Generic register function
/// As this function is supposed to be run only once, it will panic for every error it encounters
/// Most of this function is a direct copy/paste of the official `elefren` crate
pub fn register(host: &str) {
let mut builder = App::builder();
builder.client_name(Cow::from(String::from(env!("CARGO_PKG_NAME"))))
.redirect_uris(Cow::from(String::from("urn:ietf:wg:oauth:2.0:oob")))
.scopes(Scopes::write_all())
.website(Cow::from(String::from("https://framagit.org/veretcle/scootaloo")));
let app = builder.build().expect("Cannot build the app");
let registration = Registration::new(host).register(app).expect("Cannot build registration object");
let url = registration.authorize_url().expect("Cannot generate registration URI!");
println!("Click this link to authorize on Mastodon: {}", url);
println!("Paste the returned authorization code: ");
let mut input = String::new();
stdin().read_line(&mut input).expect("Unable to read back registration code!");
let code = input.trim();
let mastodon = registration.complete(code).expect("Unable to create access token!");
let toml = toml::to_string(&*mastodon).unwrap();
println!("Please insert the following block at the end of your configuration file:\n[mastodon]\n{}", toml);
}
/// This is where the magic happens
#[tokio::main]
pub async fn run(config: Config) {
// retrieve the last tweet ID for the username
let last_tweet_id = read_state(&config.scootaloo.last_tweet_path);
// get OAuth2 token
let token = get_oauth2_token(&config);
// get Mastodon instance
let mastodon = Arc::new(Mutex::new(get_mastodon_token(&config.mastodon)));
// get user timeline feed (Vec<tweet>)
let mut feed = get_user_timeline(&config, token, last_tweet_id)
.await
.unwrap_or_else(|e|
panic!("Something went wrong when trying to retrieve {}s timeline: {}", &config.twitter.username, e)
);
// empty feed -> exiting
if feed.is_empty() {
info!("Nothing to retrieve since last time, exiting…");
return;
}
// order needs to be chronological
feed.reverse();
for tweet in &feed {
debug!("Treating Tweet {} inside feed", tweet.id);
// determine if the tweet is part of a thread (response to self) or a standard response
if let Some(r) = &tweet.in_reply_to_screen_name {
if &r.to_lowercase() != &config.twitter.username.to_lowercase() {
// we are responding not threading
info!("Tweet is a direct response, skipping");
continue;
}
};
// build basic status by just yielding text and dereferencing contained urls
let mut status_text = match build_basic_status(tweet) {
Ok(t) => t,
Err(e) => {
error!("Could not create status from tweet {}: {}", tweet.id ,e);
continue;
},
};
let mut status_medias: Vec<String> = vec![];
// reupload the attachments if any
if let Some(m) = &tweet.extended_entities {
let (tx, mut rx) = mpsc::channel(4);
for media in &m.media {
// creating a new tx for this initial loop
let tx = tx.clone();
// creating a new mastodon from the original mutex
let mastodon = mastodon.clone();
// unfortunately for this to be thread safe, we need to clone a lot of structures
let media = media.clone();
let cache_path = config.scootaloo.cache_path.clone();
tokio::spawn(async move {
debug!("Spawing new async thread to treat {}", &media.id);
let local_tweet_media_path = match get_tweet_media(&media, &cache_path).await {
Ok(m) => m,
Err(e) => {
// we could have panicked here, no issue, but Im not confortable using
// that for now
warn!("Cannot get tweet media for {}: {}", &media.url, e);
return;
}
};
// we cannot directly do all the stuff inside here because mastodon lock can
// live outside this
let mas_result = mastodon.lock().unwrap().media(Cow::from(String::from(&local_tweet_media_path)));
match mas_result {
Ok(m) => {
remove_file(&local_tweet_media_path).await.unwrap_or_else(|e|
warn!("Attachment {} has been uploaded but Im unable to remove the existing file: {}", &local_tweet_media_path, e)
);
// we can unwrap here because were in a thread
tx.send(ScootalooSpawnResponse {
mastodon_media_id: m.id.clone(),
twitter_media_url: local_tweet_media_path.clone()
}).await.unwrap();
},
Err(e) => {
error!("Attachment {} cannot be uploaded to Mastodon Instance: {}", &local_tweet_media_path, e);
}
}
});
}
// dropping the last tx otherwise recv() will wait indefinitely
drop(tx);
while let Some(i) = rx.recv().await {
// pushes the media into the media vec
status_medias.push(i.mastodon_media_id);
// removes the URL from the original Tweet text
status_text = status_text.replace(&i.twitter_media_url, "");
}
}
// finished reuploading attachments, now lets do the toot baby!
debug!("Building corresponding Mastodon status");
let status = StatusBuilder::new()
.status(&status_text)
.media_ids(status_medias)
.build()
.expect(format!("Cannot build status with text {}", &status_text).as_str());
// publish status
// again unwrap is safe here as we are in the main thread
mastodon.lock().unwrap().new_status(status).unwrap();
// this will panic if it cannot publish the status, which is a good thing, it allows the
// last_tweet gathered not to be written
// write the current state (tweet ID) to avoid copying it another time
write_state(&config.scootaloo.last_tweet_path, tweet.id).unwrap_or_else(|e|
panic!("Cant write the last tweet retrieved: {}", e)
);
}
}