Files
scootaloo/src/lib.rs

413 lines
12 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// std
use std::{
borrow::Cow,
collections::HashMap,
io::stdin,
fmt,
fs::{read_to_string, write},
error::Error,
};
// toml
use serde::Deserialize;
// egg-mode
use egg_mode::{
Token,
KeyPair,
entities::{UrlEntity, MediaEntity, MentionEntity, MediaType},
user::UserID,
tweet::{
Tweet,
user_timeline,
},
};
// elefren
use elefren::{
prelude::*,
apps::App,
status_builder::StatusBuilder,
scopes::Scopes,
};
// reqwest
use reqwest::Url;
// tokio
use tokio::{
io::copy,
fs::{File, create_dir_all, remove_file},
};
// htmlescape
use htmlescape::decode_html;
// log
use log::{info, warn, error, debug};
/**********
* Generic usage functions
***********/
/*
* Those functions are related to the Twitter side of things
*/
/// Read last tweet id from a file
fn read_state(s: &str) -> Option<u64> {
let state = read_to_string(s);
if let Ok(s) = state {
debug!("Last Tweet ID (from file): {}", &s);
return s.parse::<u64>().ok();
}
None
}
/// Writes last treated tweet id to a file
fn write_state(f: &str, s: u64) -> Result<(), std::io::Error> {
write(f, format!("{}", s))
}
/// Gets twitter oauth2 token
fn get_oauth2_token(config: &Config) -> Token {
let con_token = KeyPair::new(String::from(&config.twitter.consumer_key), String::from(&config.twitter.consumer_secret));
let access_token = KeyPair::new(String::from(&config.twitter.access_key), String::from(&config.twitter.access_secret));
Token::Access {
consumer: con_token,
access: access_token,
}
}
/// Gets twitter user timeline
async fn get_user_timeline(config: &Config, token: Token, lid: Option<u64>) -> Result<Vec<Tweet>, Box<dyn Error>> {
// fix the page size to 200 as it is the maximum Twitter authorizes
let (_timeline, feed) = user_timeline(UserID::from(String::from(&config.twitter.username)), true, false, &token)
.with_page_size(200)
.older(lid).await?;
Ok(feed.to_vec())
}
/// Decodes urls from UrlEntities
fn decode_urls(urls: &Vec<UrlEntity>) -> HashMap<String, String> {
let mut decoded_urls = HashMap::new();
for url in urls {
if url.expanded_url.is_some() {
// unwrap is safe here as we just verified that there is something inside expanded_url
decoded_urls.insert(String::from(&url.url), String::from(url.expanded_url.as_deref().unwrap()));
}
}
decoded_urls
}
/// Decodes the Twitter mention to something that will make sense once Twitter has joined the
/// Fediverse
fn twitter_mentions(ums: &Vec<MentionEntity>) -> HashMap<String, String> {
let mut decoded_mentions = HashMap::new();
for um in ums {
decoded_mentions.insert(format!("@{}", um.screen_name), format!("@{}@twitter.com", um.screen_name));
}
decoded_mentions
}
/// Retrieves a single media from a tweet and store it in a temporary file
async fn get_tweet_media(m: &MediaEntity, t: &str) -> Result<String, Box<dyn Error>> {
match m.media_type {
MediaType::Photo => {
return cache_media(&m.media_url_https, t).await;
},
_ => {
match &m.video_info {
Some(v) => {
for variant in &v.variants {
if variant.content_type == "video/mp4" {
return cache_media(&variant.url, t).await;
}
}
return Err(ScootalooError::new(&format!("Media Type for {} is video but no mp4 file URL is available", &m.url)).into());
},
None => {
return Err(ScootalooError::new(&format!("Media Type for {} is video but does not contain any video_info", &m.url)).into());
},
}
},
};
}
/*
* Those functions are related to the Mastodon side of things
*/
/// Gets Mastodon Data
fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
let data = Data {
base: Cow::from(String::from(&masto.base)),
client_id: Cow::from(String::from(&masto.client_id)),
client_secret: Cow::from(String::from(&masto.client_secret)),
redirect: Cow::from(String::from(&masto.redirect)),
token: Cow::from(String::from(&masto.token)),
};
Mastodon::from(data)
}
/// Builds toot text from tweet
fn build_basic_status(tweet: &Tweet) -> Result<String, Box<dyn Error>> {
let mut toot = String::from(&tweet.text);
let decoded_urls = decode_urls(&tweet.entities.urls);
for decoded_url in decoded_urls {
toot = toot.replace(&decoded_url.0, &decoded_url.1);
}
let decoded_mentions = twitter_mentions(&tweet.entities.user_mentions);
for decoded_mention in decoded_mentions {
toot = toot.replace(&decoded_mention.0, &decoded_mention.1);
}
if let Ok(t) = decode_html(&toot) {
toot = t;
}
Ok(toot)
}
/*
* Generic private functions
*/
/// Gets and caches Twitter Media inside the determined temp dir
async fn cache_media(u: &str, t: &str) -> Result<String, Box<dyn Error>> {
// create dir
create_dir_all(t).await?;
// get file
let mut response = reqwest::get(u).await?;
// create local file
let url = Url::parse(u)?;
let dest_filename = url.path_segments().ok_or_else(|| ScootalooError::new(&format!("Cannot determine the destination filename for {}", u)))?
.last().ok_or_else(|| ScootalooError::new(&format!("Cannot determine the destination filename for {}", u)))?;
let dest_filepath = format!("{}/{}", t, dest_filename);
let mut dest_file = File::create(&dest_filepath).await?;
while let Some(chunk) = response.chunk().await? {
copy(&mut &*chunk, &mut dest_file).await?;
}
Ok(dest_filepath)
}
/**********
* local error handler
**********/
#[derive(Debug)]
struct ScootalooError {
details: String,
}
impl ScootalooError {
fn new(msg: &str) -> ScootalooError {
ScootalooError {
details: String::from(msg),
}
}
}
impl fmt::Display for ScootalooError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.details)
}
}
impl std::error::Error for ScootalooError {
fn description(&self) -> &str {
&self.details
}
}
/**********
* Config structure
***********/
/// General configuration Struct
#[derive(Debug, Deserialize)]
pub struct Config {
twitter: TwitterConfig,
mastodon: MastodonConfig,
scootaloo: ScootalooConfig,
}
#[derive(Debug, Deserialize)]
struct TwitterConfig {
username: String,
consumer_key: String,
consumer_secret: String,
access_key: String,
access_secret: String,
}
#[derive(Debug, Deserialize)]
struct MastodonConfig {
base: String,
client_id: String,
client_secret: String,
redirect: String,
token: String,
}
#[derive(Debug, Deserialize)]
struct ScootalooConfig {
last_tweet_path: String,
cache_path: String,
}
/*********
* Main functions
*********/
/// Parses the TOML file into a Config Struct
pub fn parse_toml(toml_file: &str) -> Config {
let toml_config = read_to_string(toml_file).unwrap_or_else(|e|
panic!("Cannot open config file {}: {}", toml_file, e)
);
let config: Config = toml::from_str(&toml_config).unwrap_or_else(|e|
panic!("Cannot parse TOML file {}: {}", toml_file, e)
);
config
}
/// Generic register function
/// As this function is supposed to be run only once, it will panic for every error it encounters
/// Most of this function is a direct copy/paste of the official `elefren` crate
pub fn register(host: &str) {
let mut builder = App::builder();
builder.client_name(Cow::from(String::from(env!("CARGO_PKG_NAME"))))
.redirect_uris(Cow::from(String::from("urn:ietf:wg:oauth:2.0:oob")))
.scopes(Scopes::write_all())
.website(Cow::from(String::from("https://framagit.org/veretcle/scootaloo")));
let app = builder.build().expect("Cannot build the app");
let registration = Registration::new(host).register(app).expect("Cannot build registration object");
let url = registration.authorize_url().expect("Cannot generate registration URI!");
println!("Click this link to authorize on Mastodon: {}", url);
println!("Paste the returned authorization code: ");
let mut input = String::new();
stdin().read_line(&mut input).expect("Unable to read back registration code!");
let code = input.trim();
let mastodon = registration.complete(code).expect("Unable to create access token!");
let toml = toml::to_string(&*mastodon).unwrap();
println!("Please insert the following block at the end of your configuration file:\n[mastodon]\n{}", toml);
}
/// This is where the magic happens
#[tokio::main]
pub async fn run(config: Config) {
// retrieve the last tweet ID for the username
let last_tweet_id = read_state(&config.scootaloo.last_tweet_path);
// get OAuth2 token
let token = get_oauth2_token(&config);
// get Mastodon instance
let mastodon = get_mastodon_token(&config.mastodon);
// get user timeline feed (Vec<tweet>)
let mut feed = get_user_timeline(&config, token, last_tweet_id).await.unwrap_or_else(|e|
panic!("Something went wrong when trying to retrieve {}s timeline: {}", &config.twitter.username, e)
);
// empty feed -> exiting
if feed.is_empty() {
info!("Nothing to retrieve since last time, exiting…");
return;
}
// order needs to be chronological
feed.reverse();
for tweet in &feed {
// determine if the tweet is part of a thread (response to self) or a standard response
debug!("Treating Tweet {} inside feed", tweet.id);
if let Some(r) = &tweet.in_reply_to_screen_name {
if &r.to_lowercase() != &config.twitter.username.to_lowercase() {
// we are responding not threading
info!("Tweet is a direct response, skipping");
continue;
}
};
// build basic status by just yielding text and dereferencing contained urls
let mut status_text = match build_basic_status(tweet) {
Ok(t) => t,
Err(e) => {
error!("Could not create status from tweet {}: {}", tweet.id ,e);
continue;
},
};
let mut status_medias: Vec<String> = vec![];
// reupload the attachments if any
if let Some(m) = &tweet.extended_entities {
for media in &m.media {
let local_tweet_media_path = match get_tweet_media(&media, &config.scootaloo.cache_path).await {
Ok(m) => m,
Err(e) => {
warn!("Cannot get tweet media for {}: {}", &media.url, e);
continue;
},
};
let mastodon_media_ids = match mastodon.media(Cow::from(String::from(&local_tweet_media_path))) {
Ok(m) => {
remove_file(&local_tweet_media_path).await.unwrap_or_else(|e|
warn!("Attachment {} has been uploaded but Im unable to remove the existing file: {}", &local_tweet_media_path, e)
);
m.id
},
Err(e) => {
error!("Attachment {} cannot be uploaded to Mastodon Instance: {}", &local_tweet_media_path, e);
continue;
}
};
status_medias.push(mastodon_media_ids);
// last step, removing the reference to the media from with the toots text
status_text = status_text.replace(&media.url, "");
}
}
debug!("Building corresponding Mastodon status");
let status = StatusBuilder::new()
.status(&status_text)
.media_ids(status_medias)
.build()
.expect(&format!("Cannot build status with text {}", &status_text));
// publish status
mastodon.new_status(status).unwrap();
// this will panic if it cannot publish the status, which is a good thing, it allows the
// last_tweet gathered not to be written
// write the current state (tweet ID) to avoid copying it another time
write_state(&config.scootaloo.last_tweet_path, tweet.id).unwrap_or_else(|e|
panic!("Cant write the last tweet retrieved: {}", e)
);
}
}