Files
scootaloo/src/lib.rs
2020-03-01 20:59:40 +01:00

370 lines
11 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// std
use std::{
path::Path,
borrow::Cow,
collections::HashMap,
io::{stdin, copy},
fmt,
fs::{read_to_string, write, create_dir_all, File, remove_file},
error::Error,
};
// toml
use serde::Deserialize;
// egg-mode
use egg_mode::{
Token,
KeyPair,
entities::{UrlEntity, MediaEntity, MediaType},
tweet::{
Tweet,
user_timeline,
},
};
use tokio::runtime::current_thread::block_on_all;
// mammut
use mammut::{
Mastodon,
Data,
Registration,
apps::{AppBuilder, Scopes},
status_builder::StatusBuilder,
media_builder::MediaBuilder,
};
// reqwest
use reqwest::blocking::Client;
/**********
* Generic usage functions
***********/
/*
* Those functions are related to the Twitter side of things
*/
/// Read last tweet id from a file
fn read_state(s: &str) -> Option<u64> {
let state = read_to_string(s);
if let Ok(s) = state {
return s.parse::<u64>().ok();
}
None
}
/// Write last treated tweet id to a file
fn write_state(f: &str, s: u64) -> Result<(), std::io::Error> {
write(f, format!("{}", s))
}
/// Get twitter oauth2 token
fn get_oauth2_token(config: &Config) -> Token {
let con_token = KeyPair::new(String::from(&config.twitter.consumer_key), String::from(&config.twitter.consumer_secret));
let access_token = KeyPair::new(String::from(&config.twitter.access_key), String::from(&config.twitter.access_secret));
Token::Access {
consumer: con_token,
access: access_token,
}
}
/// Get twitter user timeline
fn get_user_timeline(config: &Config, token: Token, lid: Option<u64>) -> Result<Vec<Tweet>, Box<dyn Error>> {
// fix the page size to 200 as it is the maximum Twitter authorizes
let (_timeline, feed) = block_on_all(user_timeline(&config.twitter.username, true, true, &token)
.with_page_size(200)
.older(lid))?;
Ok(feed.to_vec())
}
/// decode urls from UrlEntities
fn decode_urls(urls: &Vec<UrlEntity>) -> HashMap<String, String> {
let mut decoded_urls = HashMap::new();
for url in urls {
if url.expanded_url.is_some() {
// unwrap is safe here as we just verified that there is something inside expanded_url
decoded_urls.insert(String::from(&url.url), String::from(url.expanded_url.as_deref().unwrap()));
}
}
decoded_urls
}
/// Retrieve a single media from a tweet and store it in a temporary file
fn get_tweet_media(m: &MediaEntity, t: &str) -> Result<String, Box<dyn Error>> {
match m.media_type {
MediaType::Photo => {
return cache_media(&m.media_url_https, t);
},
_ => {
match &m.video_info {
Some(v) => {
for variant in &v.variants {
if variant.content_type == "video/mp4" {
return cache_media(&variant.url, t);
}
}
return Err(Box::new(ScootalooError::new(format!("Media Type for {} is video but no mp4 file URL is available", &m.url).as_str())));
},
None => {
return Err(Box::new(ScootalooError::new(format!("Media Type for {} is video but does not contain any video_info", &m.url).as_str())));
},
}
},
};
}
/*
* Those functions are related to the Mastodon side of things
*/
/// Get Mastodon Data
fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
let data = Data {
base: Cow::from(String::from(&masto.base)),
client_id: Cow::from(String::from(&masto.client_id)),
client_secret: Cow::from(String::from(&masto.client_secret)),
redirect: Cow::from(String::from(&masto.redirect)),
token: Cow::from(String::from(&masto.token)),
};
Mastodon::from_data(data)
}
/// build toot from tweet
fn build_basic_status(tweet: &Tweet) -> Result<StatusBuilder, Box<dyn Error>> {
let mut toot = String::from(&tweet.text);
let decoded_urls = decode_urls(&tweet.entities.urls);
for decoded_url in decoded_urls {
toot = toot.replace(&decoded_url.0, &decoded_url.1);
}
Ok(StatusBuilder::new(toot))
}
/*
* Generic private functions
*/
fn cache_media(u: &str, t: &str) -> Result<String, Box<dyn Error>> {
// create dir
if !Path::new(t).is_dir() {
create_dir_all(t)?;
}
// get file
let client = Client::new();
let mut response = client.get(u).send()?;
// create local file
let dest_filename = match response.url()
.path_segments()
.and_then(|segments| segments.last()) {
Some(r) => r,
None => {
return Err(Box::new(ScootalooError::new(format!("Cannot determine the destination filename for {}", u).as_str())));
},
};
let dest_filepath = format!("{}/{}", t, dest_filename);
let mut dest_file = File::create(&dest_filepath)?;
copy(&mut response, &mut dest_file)?;
Ok(dest_filepath)
}
/**********
* local error handler
**********/
#[derive(Debug)]
struct ScootalooError {
details: String,
}
impl ScootalooError {
fn new(msg: &str) -> ScootalooError {
ScootalooError {
details: String::from(msg),
}
}
}
impl fmt::Display for ScootalooError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.details)
}
}
impl std::error::Error for ScootalooError {
fn description(&self) -> &str {
&self.details
}
}
/**********
* Config structure
***********/
/// General configuration Struct
#[derive(Debug, Deserialize)]
pub struct Config {
twitter: TwitterConfig,
mastodon: MastodonConfig,
scootaloo: ScootalooConfig,
}
#[derive(Debug, Deserialize)]
struct TwitterConfig {
username: String,
consumer_key: String,
consumer_secret: String,
access_key: String,
access_secret: String,
}
#[derive(Debug, Deserialize)]
struct MastodonConfig {
base: String,
client_id: String,
client_secret: String,
redirect: String,
token: String,
}
#[derive(Debug, Deserialize)]
struct ScootalooConfig {
last_tweet_path: String,
cache_path: String,
}
/*********
* Main functions
*********/
/// Parses the TOML file into a Config Struct
pub fn parse_toml(toml_file: &str) -> Config {
let toml_config = read_to_string(toml_file).unwrap_or_else(|e|
panic!("Cannot open config file {}: {}", toml_file, e)
);
let config: Config = toml::from_str(&toml_config).unwrap_or_else(|e|
panic!("Cannot parse TOML file {}: {}", toml_file, e)
);
config
}
/// Generic register function
/// As this function is supposed to be run only once, it will panic for every error it encounters
/// Most of this function is a direct copy/paste of the official `mammut` crate
pub fn register(host: &str) {
let app = AppBuilder {
client_name: env!("CARGO_PKG_NAME"),
redirect_uris: "urn:ietf:wg:oauth:2.0:oob",
scopes: Scopes::Write,
website: Some("https://framagit.org/veretcle/scootaloo"),
};
let mut registration = Registration::new(host);
registration.register(app).expect("Registration failed!");
let url = registration.authorise().expect("Cannot generate registration URI!");
println!("Click this link to authorize on Mastodon: {}", url);
println!("Paste the returned authorization code: ");
let mut input = String::new();
stdin().read_line(&mut input).expect("Unable to read back registration code!");
let code = input.trim();
let mastodon = registration.create_access_token(code.to_string()).expect("Unable to create access token!");
let toml = toml::to_string(&*mastodon).unwrap();
println!("Please insert the following block at the end of your configuration file:\n[mastodon]\n{}", toml);
}
/// This is where the magic happens
pub fn run(config: Config) {
// retrieve the last tweet ID for the username
let last_tweet_id = read_state(&config.scootaloo.last_tweet_path);
// get OAuth2 token
let token = get_oauth2_token(&config);
// get Mastodon instance
let mastodon = get_mastodon_token(&config.mastodon);
// get user timeline feed (Vec<tweet>)
let mut feed = get_user_timeline(&config, token, last_tweet_id).unwrap_or_else(|e|
panic!("Something went wrong when trying to retrieve {}s timeline: {}", &config.twitter.username, e)
);
// empty feed -> exiting
if feed.is_empty() {
println!("Nothing to retrieve since last time, exiting…");
return;
}
// order needs to be chronological
feed.reverse();
for tweet in &feed {
// build basic status by just yielding text and dereferencing contained urls
let mut status = match build_basic_status(tweet) {
Ok(t) => t,
Err(e) => {
println!("Could not create status from tweet {}: {}", tweet.id ,e);
continue;
},
};
// reupload the attachments if any
if let Some(m) = &tweet.extended_entities {
for media in &m.media {
let local_tweet_media_path = match get_tweet_media(&media, &config.scootaloo.cache_path) {
Ok(m) => m,
Err(e) => {
println!("Cannot get tweet media for {}: {}", &media.url, e);
continue;
},
};
let mastodon_media_ids = match mastodon.media(MediaBuilder::new(Cow::from(String::from(&local_tweet_media_path)))) {
Ok(m) => {
remove_file(&local_tweet_media_path).unwrap_or_else(|e|
println!("Attachment for {} has been upload, but Im unable to remove the existing file: {}", &local_tweet_media_path, e)
);
m.id
},
Err(e) => {
println!("Cannot attach media {} to Mastodon Instance: {}", &local_tweet_media_path, e);
continue;
}
};
// media has been successfully uploaded, adding it to the toot
match status.media_ids {
Some(ref mut i) => i.push(mastodon_media_ids),
None => status.media_ids = Some(vec![mastodon_media_ids]),
};
// last step, removing the reference to the media from with the toots text
status.status = status.status.replace(&media.url, "");
}
}
// publish status
mastodon.new_status(status).unwrap();
// write the current state (tweet ID) to avoid copying it another time
write_state(&config.scootaloo.last_tweet_path, tweet.id).unwrap_or_else(|e|
panic!("Cant write the last tweet retrieved: {}", e)
);
}
}