17 Commits

Author SHA1 Message Date
VC
01bac63fb9 Merge branch 'reply_improvements' into 'master'
refactor: improve reply/thread management

See merge request veretcle/scootaloo!25
2022-11-09 20:26:20 +00:00
VC
4f5663b450 feature: better error implementation for ScootalooError inside async block 2022-11-09 19:36:00 +01:00
VC
9a9c4b4809 chore: cargo update 2022-11-09 18:33:04 +01:00
VC
9970968b47 refactor: avoid panicking into thread, bubble up errors to main thread to be handled 2022-11-09 18:23:06 +01:00
VC
291c86677e refactor: get mastodon token after ensuring feed is not empty 2022-11-09 08:40:04 +01:00
VC
31afb1cf7d Merge branch 'async_media_upload' into 'master'
Async media upload

See merge request veretcle/scootaloo!24
2022-11-08 13:35:06 +00:00
VC
4415c4ac12 refactor: better logic flow for uploading/deleting media 2022-11-08 10:54:42 +01:00
VC
89f1372f9f bump: version v0.8.0 2022-11-08 08:54:36 +01:00
VC
06904434c8 fix: indentation error when registering 2022-11-08 08:54:36 +01:00
VC
3c64df23bc refactor: add info/debug 2022-11-08 08:54:32 +01:00
VC
c62f67c3b3 refactor: simpler mtask var 2022-11-08 08:37:26 +01:00
VC
3b0e7234af refactor: downloads/uploads every media from a tweet async way 2022-11-08 08:37:17 +01:00
VC
62011b4b81 refactor: downloads/uploads every media from a tweet async way 2022-11-07 21:47:12 +01:00
VC
5ce3bde3e7 fix: remove unecessary \n in TOML conf 2022-11-07 18:25:55 +01:00
VC
ab4184c0ed Merge branch 'async_multi_account' into 'master'
feat: attempt for async treatment of all accounts

See merge request veretcle/scootaloo!23
2022-11-05 09:36:55 +00:00
VC
de758c7bda refactor: separate function for media ids 2022-11-05 10:23:21 +01:00
VC
df75520175 feat: async treatment of all accounts 2022-11-04 15:26:27 +01:00
7 changed files with 937 additions and 645 deletions

1203
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "scootaloo" name = "scootaloo"
version = "0.7.0" version = "0.8.1"
authors = ["VC <veretcle+framagit@mateu.be>"] authors = ["VC <veretcle+framagit@mateu.be>"]
edition = "2021" edition = "2021"

View File

@@ -1,8 +1,12 @@
use std::{ use std::{
boxed::Box,
convert::From,
error::Error, error::Error,
fmt::{Display, Formatter, Result}, fmt::{Display, Formatter, Result},
}; };
use elefren::Error as elefrenError;
#[derive(Debug)] #[derive(Debug)]
pub struct ScootalooError { pub struct ScootalooError {
details: String, details: String,
@@ -23,3 +27,15 @@ impl Display for ScootalooError {
write!(f, "{}", self.details) write!(f, "{}", self.details)
} }
} }
impl From<Box<dyn Error>> for ScootalooError {
fn from(error: Box<dyn Error>) -> Self {
ScootalooError::new(&format!("Error in a subset crate: {}", error))
}
}
impl From<elefrenError> for ScootalooError {
fn from(error: elefrenError) -> Self {
ScootalooError::new(&format!("Error in elefren crate: {}", error))
}
}

View File

@@ -13,124 +13,100 @@ mod twitter;
use twitter::*; use twitter::*;
mod util; mod util;
use crate::util::generate_media_ids;
mod state; mod state;
pub use state::{init_db, migrate_db}; pub use state::{init_db, migrate_db};
use state::{read_state, write_state, TweetToToot}; use state::{read_state, write_state, TweetToToot};
use elefren::{prelude::*, status_builder::StatusBuilder}; use elefren::{prelude::*, status_builder::StatusBuilder};
use log::{debug, error, info, warn}; use log::info;
use rusqlite::Connection; use rusqlite::Connection;
use std::borrow::Cow; use std::sync::Arc;
use tokio::fs::remove_file; use tokio::{spawn, sync::Mutex};
/// This is where the magic happens /// This is where the magic happens
#[tokio::main] #[tokio::main]
pub async fn run(config: Config) { pub async fn run(config: Config) {
// get OAuth2 token // create the task vector for handling multiple accounts
let token = get_oauth2_token(&config.twitter); let mut mtask = vec![];
for mastodon_config in config.mastodon.values() {
// open the SQLite connection // open the SQLite connection
let conn = Connection::open(&config.scootaloo.db_path).unwrap_or_else(|e| { let conn = Arc::new(Mutex::new(
Connection::open(&config.scootaloo.db_path).unwrap_or_else(|e| {
panic!( panic!(
"Something went wrong when opening the DB {}: {}", "Something went wrong when opening the DB {}: {}",
&config.scootaloo.db_path, e &config.scootaloo.db_path, e
) )
}); }),
// retrieve the last tweet ID for the username ));
let last_tweet_id = read_state(&conn, &mastodon_config.twitter_screen_name, None)
.unwrap_or_else(|e| panic!("Cannot retrieve last_tweet_id: {}", e))
.map(|s| s.tweet_id);
// get Mastodon instance for mastodon_config in config.mastodon.into_values() {
let mastodon = get_mastodon_token(mastodon_config); // create temporary value for each task
let scootaloo_cache_path = config.scootaloo.cache_path.clone();
let token = get_oauth2_token(&config.twitter);
let task_conn = conn.clone();
let task = spawn(async move {
info!("Starting treating {}", &mastodon_config.twitter_screen_name);
// retrieve the last tweet ID for the username
let lconn = task_conn.lock().await;
let last_tweet_id =
read_state(&lconn, &mastodon_config.twitter_screen_name, None)?.map(|r| r.tweet_id);
drop(lconn);
// get user timeline feed (Vec<tweet>) // get user timeline feed (Vec<tweet>)
let mut feed = get_user_timeline(mastodon_config, &token, last_tweet_id) let mut feed =
.await get_user_timeline(&mastodon_config.twitter_screen_name, &token, last_tweet_id)
.unwrap_or_else(|e| { .await?;
panic!(
"Something went wrong when trying to retrieve {}s timeline: {}",
&mastodon_config.twitter_screen_name, e
)
});
// empty feed -> exiting // empty feed -> exiting
if feed.is_empty() { if feed.is_empty() {
info!("Nothing to retrieve since last time, exiting…"); info!("Nothing to retrieve since last time, exiting…");
return; return Ok(());
} }
// get Mastodon instance
let mastodon = get_mastodon_token(&mastodon_config);
// order needs to be chronological // order needs to be chronological
feed.reverse(); feed.reverse();
for tweet in &feed { for tweet in &feed {
debug!("Treating Tweet {} inside feed", tweet.id); info!("Treating Tweet {} inside feed", tweet.id);
// initiate the toot_reply_id var // initiate the toot_reply_id var
let mut toot_reply_id: Option<String> = None; let mut toot_reply_id: Option<String> = None;
// determine if the tweet is part of a thread (response to self) or a standard response // determine if the tweet is part of a thread (response to self) or a standard response
if let Some(r) = &tweet.in_reply_to_screen_name { if let Some(r) = &tweet.in_reply_to_screen_name {
if r.to_lowercase() != mastodon_config.twitter_screen_name.to_lowercase() { if r.to_lowercase() != mastodon_config.twitter_screen_name.to_lowercase() {
// we are responding not threadin // we are responding not threading
info!("Tweet is a direct response, skipping"); info!("Tweet is a direct response, skipping");
continue; continue;
} }
info!("Tweet is a thread"); info!("Tweet is a thread");
// get the corresponding toot id
let lconn = task_conn.lock().await;
toot_reply_id = read_state( toot_reply_id = read_state(
&conn, &lconn,
&mastodon_config.twitter_screen_name, &mastodon_config.twitter_screen_name,
tweet.in_reply_to_status_id, tweet.in_reply_to_status_id,
) )
.unwrap_or(None) .unwrap_or(None)
.map(|s| s.toot_id); .map(|s| s.toot_id);
drop(lconn);
}; };
// build basic status by just yielding text and dereferencing contained urls // build basic status by just yielding text and dereferencing contained urls
let mut status_text = build_basic_status(tweet); let mut status_text = build_basic_status(tweet);
let mut status_medias: Vec<String> = vec![]; // building associative media list
// reupload the attachments if any let (media_url, status_medias) =
if let Some(m) = &tweet.extended_entities { generate_media_ids(tweet, &scootaloo_cache_path, &mastodon).await;
for media in &m.media {
let local_tweet_media_path =
match get_tweet_media(media, &config.scootaloo.cache_path).await {
Ok(m) => m,
Err(e) => {
error!("Cannot get tweet media for {}: {}", &media.url, e);
continue;
}
};
let mastodon_media_ids = match mastodon status_text = status_text.replace(&media_url, "");
.media(Cow::from(local_tweet_media_path.to_owned()))
{
Ok(m) => {
remove_file(&local_tweet_media_path)
.await
.unwrap_or_else(|e|
warn!("Attachment for {} has been uploaded, but Im unable to remove the existing file: {}", &local_tweet_media_path, e)
);
m.id
}
Err(e) => {
error!(
"Attachment {} cannot be uploaded to Mastodon Instance: {}",
&local_tweet_media_path, e
);
continue;
}
};
status_medias.push(mastodon_media_ids); info!("Building corresponding Mastodon status");
// last step, removing the reference to the media from with the toots text
status_text = status_text.replace(&media.url, "");
}
}
// finished reuploading attachments, now lets do the toot baby!
debug!("Building corresponding Mastodon status");
let mut status_builder = StatusBuilder::new(); let mut status_builder = StatusBuilder::new();
@@ -140,15 +116,14 @@ pub async fn run(config: Config) {
status_builder.in_reply_to(&i); status_builder.in_reply_to(&i);
} }
let status = status_builder // can be activated for test purposes
.build() // status_builder.visibility(elefren::status_builder::Visibility::Private);
.unwrap_or_else(|_| panic!("Cannot build status with text {}", &status_text));
// publish status let status = status_builder.build()?;
// again unwrap is safe here as we are in the main thread
let published_status = mastodon.new_status(status).unwrap(); let published_status = mastodon.new_status(status)?;
// this will panic if it cannot publish the status, which is a good thing, it allows the // this will return if it cannot publish the status preventing the last_tweet from
// last_tweet gathered not to be written // being written into db
let ttt_towrite = TweetToToot { let ttt_towrite = TweetToToot {
twitter_screen_name: mastodon_config.twitter_screen_name.clone(), twitter_screen_name: mastodon_config.twitter_screen_name.clone(),
@@ -157,8 +132,23 @@ pub async fn run(config: Config) {
}; };
// write the current state (tweet ID and toot ID) to avoid copying it another time // write the current state (tweet ID and toot ID) to avoid copying it another time
write_state(&conn, ttt_towrite) let lconn = task_conn.lock().await;
.unwrap_or_else(|e| panic!("Cant write the last tweet retrieved: {}", e)); write_state(&lconn, ttt_towrite)?;
drop(lconn);
}
Ok::<(), ScootalooError>(())
});
// push each task into the vec task
mtask.push(task);
}
// launch and wait for every handle
for handle in mtask {
match handle.await {
Ok(Err(e)) => eprintln!("Error within thread: {}", e),
Err(e) => eprintln!("Error with thread: {}", e),
_ => (),
} }
} }
} }

View File

@@ -101,9 +101,9 @@ pub fn register(host: &str, screen_name: &str) {
println!( println!(
"Please insert the following block at the end of your configuration file: "Please insert the following block at the end of your configuration file:
\n[mastodon.{}] [mastodon.{}]
\ntwitter_screen_name = \"{}\" twitter_screen_name = \"{}\"
\n{}", {}",
screen_name.to_lowercase(), screen_name.to_lowercase(),
screen_name, screen_name,
toml toml

View File

@@ -1,4 +1,3 @@
use crate::config::MastodonConfig;
use crate::config::TwitterConfig; use crate::config::TwitterConfig;
use crate::util::cache_media; use crate::util::cache_media;
use crate::ScootalooError; use crate::ScootalooError;
@@ -30,17 +29,12 @@ pub fn get_oauth2_token(config: &TwitterConfig) -> Token {
/// Gets Twitter user timeline /// Gets Twitter user timeline
pub async fn get_user_timeline( pub async fn get_user_timeline(
config: &MastodonConfig, screen_name: &str,
token: &Token, token: &Token,
lid: Option<u64>, lid: Option<u64>,
) -> Result<Vec<Tweet>, Box<dyn Error>> { ) -> Result<Vec<Tweet>, Box<dyn Error>> {
// fix the page size to 200 as it is the maximum Twitter authorizes // fix the page size to 200 as it is the maximum Twitter authorizes
let (_, feed) = user_timeline( let (_, feed) = user_timeline(UserID::from(screen_name.to_owned()), true, false, token)
UserID::from(config.twitter_screen_name.to_owned()),
true,
false,
token,
)
.with_page_size(200) .with_page_size(200)
.older(lid) .older(lid)
.await?; .await?;

View File

@@ -1,11 +1,100 @@
use crate::ScootalooError; use crate::{twitter::get_tweet_media, ScootalooError};
use std::{borrow::Cow, error::Error};
use egg_mode::tweet::Tweet;
use elefren::prelude::*;
use log::{error, info, warn};
use reqwest::Url; use reqwest::Url;
use std::error::Error;
use tokio::{ use tokio::{
fs::{create_dir_all, File}, fs::{create_dir_all, remove_file, File},
io::copy, io::copy,
}; };
/// Generate associative table between media ids and tweet extended entities
pub async fn generate_media_ids(
tweet: &Tweet,
cache_path: &str,
mastodon: &Mastodon,
) -> (String, Vec<String>) {
let mut media_url = "".to_string();
let mut media_ids: Vec<String> = vec![];
if let Some(m) = &tweet.extended_entities {
// create tasks list
let mut tasks = vec![];
// size of media_ids vector, should be equal to the media vector
media_ids.resize(m.media.len(), String::new());
info!("{} medias in tweet", m.media.len());
for (i, media) in m.media.iter().enumerate() {
// attribute media url
media_url = media.url.clone();
// clone everything we need
let cache_path = String::from(cache_path);
let media = media.clone();
let mastodon = mastodon.clone();
let task = tokio::task::spawn(async move {
info!("Start treating {}", media.media_url_https);
// get the tweet embedded media
let local_tweet_media_path = match get_tweet_media(&media, &cache_path).await {
Ok(l) => l,
Err(e) => {
return Err(ScootalooError::new(&format!(
"Cannot get tweet media for {}: {}",
&media.url, e
)))
}
};
// upload media to Mastodon
let mastodon_media = mastodon.media(Cow::from(local_tweet_media_path.to_owned()));
// at this point, we can safely erase the original file
// it doesnt matter if we cant remove, cache_media fn is idempotent
remove_file(&local_tweet_media_path).await.ok();
let mastodon_media = match mastodon_media {
Ok(m) => m,
Err(e) => {
return Err(ScootalooError::new(&format!(
"Attachment {} cannot be uploaded to Mastodon Instance: {}",
&local_tweet_media_path, e
)))
}
};
Ok((i, mastodon_media.id))
});
tasks.push(task);
}
for task in tasks {
match task.await {
// insert the media at the right place
Ok(Ok((i, v))) => media_ids[i] = v,
Ok(Err(e)) => warn!("{}", e),
Err(e) => error!("Something went wrong when joining the main thread: {}", e),
}
}
} else {
info!("No media in tweet");
}
// in case some media_ids slot remained empty due to errors, remove them
media_ids.retain(|x| !x.is_empty());
(media_url, media_ids)
}
/// Gets and caches Twitter Media inside the determined temp dir /// Gets and caches Twitter Media inside the determined temp dir
pub async fn cache_media(u: &str, t: &str) -> Result<String, Box<dyn Error>> { pub async fn cache_media(u: &str, t: &str) -> Result<String, Box<dyn Error>> {
// create dir // create dir