16 Commits

Author SHA1 Message Date
VC
b3e7ee9d84 Merge branch '5-migrate-from-tokio-loop-to-futures-stream' into 'master'
refactor: use futures instead of tokio for media upload

Closes #5

See merge request veretcle/scootaloo!28
2022-11-15 09:11:28 +00:00
VC
7f7219ea78 feat: turn tokio-based async logic into futures 2022-11-15 10:06:00 +01:00
VC
f371b8a297 feat: add default rate_limiting option 2022-11-15 10:06:00 +01:00
VC
ec3956eabb doc: add rate_limiting option 2022-11-15 10:06:00 +01:00
VC
ce84c05581 refactor: use futures instead of tokio for media upload 2022-11-15 10:05:57 +01:00
VC
b64621368b Merge branch '4-migrate-to-clap-v4' into 'master'
refactor: migrate from clap v2 to clap v4

Closes #4

See merge request veretcle/scootaloo!27
2022-11-14 19:57:32 +00:00
VC
89de1cf7a3 refactor: migrate from clap v2 to clap v4 2022-11-14 20:36:15 +01:00
VC
ffbe98f838 Merge branch 'generate_media_flow' into 'master'
Better media flow

See merge request veretcle/scootaloo!26
2022-11-14 13:41:07 +00:00
VC
822f4044c6 chore: bump version 2022-11-14 14:33:42 +01:00
VC
78924f6eeb refactor: simpler error bubbling inside async block 2022-11-14 14:33:39 +01:00
VC
9c14636735 refactor: avoid Box::new syntax, prefer into() 2022-11-14 14:25:08 +01:00
VC
01bac63fb9 Merge branch 'reply_improvements' into 'master'
refactor: improve reply/thread management

See merge request veretcle/scootaloo!25
2022-11-09 20:26:20 +00:00
VC
4f5663b450 feature: better error implementation for ScootalooError inside async block 2022-11-09 19:36:00 +01:00
VC
9a9c4b4809 chore: cargo update 2022-11-09 18:33:04 +01:00
VC
9970968b47 refactor: avoid panicking into thread, bubble up errors to main thread to be handled 2022-11-09 18:23:06 +01:00
VC
291c86677e refactor: get mastodon token after ensuring feed is not empty 2022-11-09 08:40:04 +01:00
9 changed files with 912 additions and 729 deletions

1236
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "scootaloo" name = "scootaloo"
version = "0.8.0" version = "0.9.0"
authors = ["VC <veretcle+framagit@mateu.be>"] authors = ["VC <veretcle+framagit@mateu.be>"]
edition = "2021" edition = "2021"
@@ -10,11 +10,11 @@ edition = "2021"
chrono = "^0.4" chrono = "^0.4"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
toml = "^0.5" toml = "^0.5"
clap = "^2.34" clap = "^4"
futures = "^0.3"
egg-mode = "^0.16" egg-mode = "^0.16"
rusqlite = "^0.27" rusqlite = "^0.27"
tokio = { version = "1", features = ["full"]} tokio = { version = "^1", features = ["full"]}
futures = "^0.3"
elefren = "^0.22" elefren = "^0.22"
html-escape = "^0.2" html-escape = "^0.2"
reqwest = "^0.11" reqwest = "^0.11"

View File

@@ -18,6 +18,7 @@ First up, create a configuration file (default path is `/usr/local/etc/scootaloo
[scootaloo] [scootaloo]
db_path = "/var/lib/scootaloo/scootaloo.sqlite" ## file containing the SQLite Tweet corresponding Toot DB, must be writeable db_path = "/var/lib/scootaloo/scootaloo.sqlite" ## file containing the SQLite Tweet corresponding Toot DB, must be writeable
cache_path = "/tmp/scootaloo" ## a dir where the temporary files will be download, must be writeable cache_path = "/tmp/scootaloo" ## a dir where the temporary files will be download, must be writeable
rate_limiting = 4 ## optional, default 4, number of accounts handled simultaneously
[twitter] [twitter]
## Consumer/Access key for Twitter (can be generated at https://developer.twitter.com/en/apps) ## Consumer/Access key for Twitter (can be generated at https://developer.twitter.com/en/apps)

View File

@@ -32,6 +32,7 @@ pub struct MastodonConfig {
pub struct ScootalooConfig { pub struct ScootalooConfig {
pub db_path: String, pub db_path: String,
pub cache_path: String, pub cache_path: String,
pub rate_limit: Option<usize>,
} }
/// Parses the TOML file into a Config Struct /// Parses the TOML file into a Config Struct

View File

@@ -1,8 +1,12 @@
use std::{ use std::{
boxed::Box,
convert::From,
error::Error, error::Error,
fmt::{Display, Formatter, Result}, fmt::{Display, Formatter, Result},
}; };
use elefren::Error as elefrenError;
#[derive(Debug)] #[derive(Debug)]
pub struct ScootalooError { pub struct ScootalooError {
details: String, details: String,
@@ -23,3 +27,15 @@ impl Display for ScootalooError {
write!(f, "{}", self.details) write!(f, "{}", self.details)
} }
} }
impl From<Box<dyn Error>> for ScootalooError {
fn from(error: Box<dyn Error>) -> Self {
ScootalooError::new(&format!("Error in a subset crate: {}", error))
}
}
impl From<elefrenError> for ScootalooError {
fn from(error: elefrenError) -> Self {
ScootalooError::new(&format!("Error in elefren crate: {}", error))
}
}

View File

@@ -25,12 +25,13 @@ use rusqlite::Connection;
use std::sync::Arc; use std::sync::Arc;
use tokio::{spawn, sync::Mutex}; use tokio::{spawn, sync::Mutex};
use futures::StreamExt;
const DEFAULT_RATE_LIMIT: usize = 4;
/// This is where the magic happens /// This is where the magic happens
#[tokio::main] #[tokio::main]
pub async fn run(config: Config) { pub async fn run(config: Config) {
// create the task vector for handling multiple accounts
let mut mtask = vec![];
// open the SQLite connection // open the SQLite connection
let conn = Arc::new(Mutex::new( let conn = Arc::new(Mutex::new(
Connection::open(&config.scootaloo.db_path).unwrap_or_else(|e| { Connection::open(&config.scootaloo.db_path).unwrap_or_else(|e| {
@@ -41,121 +42,113 @@ pub async fn run(config: Config) {
}), }),
)); ));
for mastodon_config in config.mastodon.into_values() { let mut stream = futures::stream::iter(config.mastodon.into_values())
// create temporary value for each task .map(|mastodon_config| {
let scootaloo_cache_path = config.scootaloo.cache_path.clone(); // create temporary value for each task
let token = get_oauth2_token(&config.twitter); let scootaloo_cache_path = config.scootaloo.cache_path.clone();
let task_conn = conn.clone(); let token = get_oauth2_token(&config.twitter);
let task_conn = conn.clone();
let task = spawn(async move { spawn(async move {
info!("Starting treating {}", &mastodon_config.twitter_screen_name); info!("Starting treating {}", &mastodon_config.twitter_screen_name);
// retrieve the last tweet ID for the username // retrieve the last tweet ID for the username
let lconn = task_conn.lock().await; let lconn = task_conn.lock().await;
let last_tweet_id = read_state(&lconn, &mastodon_config.twitter_screen_name, None) let last_tweet_id = read_state(&lconn, &mastodon_config.twitter_screen_name, None)?
.unwrap_or_else(|e| panic!("Cannot retrieve last_tweet_id: {}", e)) .map(|r| r.tweet_id);
.map(|s| s.tweet_id); drop(lconn);
drop(lconn);
// get Mastodon instance // get user timeline feed (Vec<tweet>)
let mastodon = get_mastodon_token(&mastodon_config); let mut feed =
get_user_timeline(&mastodon_config.twitter_screen_name, &token, last_tweet_id)
.await?;
// get user timeline feed (Vec<tweet>) // empty feed -> exiting
let mut feed = if feed.is_empty() {
get_user_timeline(&mastodon_config.twitter_screen_name, &token, last_tweet_id) info!("Nothing to retrieve since last time, exiting…");
.await return Ok(());
.unwrap_or_else(|e| {
panic!(
"Something went wrong when trying to retrieve {}s timeline: {}",
&mastodon_config.twitter_screen_name, e
)
});
// empty feed -> exiting
if feed.is_empty() {
info!("Nothing to retrieve since last time, exiting…");
return;
}
// order needs to be chronological
feed.reverse();
for tweet in &feed {
info!("Treating Tweet {} inside feed", tweet.id);
// initiate the toot_reply_id var
let mut toot_reply_id: Option<String> = None;
// determine if the tweet is part of a thread (response to self) or a standard response
if let Some(r) = &tweet.in_reply_to_screen_name {
if r.to_lowercase() != mastodon_config.twitter_screen_name.to_lowercase() {
// we are responding not threading
info!("Tweet is a direct response, skipping");
continue;
}
info!("Tweet is a thread");
// get the corresponding toot id
let lconn = task_conn.lock().await;
toot_reply_id = read_state(
&lconn,
&mastodon_config.twitter_screen_name,
tweet.in_reply_to_status_id,
)
.unwrap_or(None)
.map(|s| s.toot_id);
drop(lconn);
};
// build basic status by just yielding text and dereferencing contained urls
let mut status_text = build_basic_status(tweet);
// building associative media list
let (media_url, status_medias) =
generate_media_ids(tweet, &scootaloo_cache_path, &mastodon).await;
status_text = status_text.replace(&media_url, "");
info!("Building corresponding Mastodon status");
let mut status_builder = StatusBuilder::new();
status_builder.status(&status_text).media_ids(status_medias);
if let Some(i) = toot_reply_id {
status_builder.in_reply_to(&i);
} }
// can be activated for test purposes // get Mastodon instance
// status_builder.visibility(elefren::status_builder::Visibility::Private); let mastodon = get_mastodon_token(&mastodon_config);
let status = status_builder // order needs to be chronological
.build() feed.reverse();
.unwrap_or_else(|_| panic!("Cannot build status with text {}", &status_text));
// publish status for tweet in &feed {
// again unwrap is safe here as we are in the main thread info!("Treating Tweet {} inside feed", tweet.id);
let published_status = mastodon.new_status(status).unwrap(); // initiate the toot_reply_id var
// this will panic if it cannot publish the status, which is a good thing, it allows the let mut toot_reply_id: Option<String> = None;
// last_tweet gathered not to be written // determine if the tweet is part of a thread (response to self) or a standard response
if let Some(r) = &tweet.in_reply_to_screen_name {
if r.to_lowercase() != mastodon_config.twitter_screen_name.to_lowercase() {
// we are responding not threading
info!("Tweet is a direct response, skipping");
continue;
}
info!("Tweet is a thread");
// get the corresponding toot id
let lconn = task_conn.lock().await;
toot_reply_id = read_state(
&lconn,
&mastodon_config.twitter_screen_name,
tweet.in_reply_to_status_id,
)
.unwrap_or(None)
.map(|s| s.toot_id);
drop(lconn);
};
let ttt_towrite = TweetToToot { // build basic status by just yielding text and dereferencing contained urls
twitter_screen_name: mastodon_config.twitter_screen_name.clone(), let mut status_text = build_basic_status(tweet);
tweet_id: tweet.id,
toot_id: published_status.id,
};
// write the current state (tweet ID and toot ID) to avoid copying it another time // building associative media list
let lconn = task_conn.lock().await; let (media_url, status_medias) =
write_state(&lconn, ttt_towrite) generate_media_ids(tweet, &scootaloo_cache_path, &mastodon).await;
.unwrap_or_else(|e| panic!("Cant write the last tweet retrieved: {}", e));
drop(lconn);
}
});
// push each task into the vec task status_text = status_text.replace(&media_url, "");
mtask.push(task);
} info!("Building corresponding Mastodon status");
let mut status_builder = StatusBuilder::new();
status_builder.status(&status_text).media_ids(status_medias);
if let Some(i) = toot_reply_id {
status_builder.in_reply_to(&i);
}
// can be activated for test purposes
// status_builder.visibility(elefren::status_builder::Visibility::Private);
let status = status_builder.build()?;
let published_status = mastodon.new_status(status)?;
// this will return if it cannot publish the status preventing the last_tweet from
// being written into db
let ttt_towrite = TweetToToot {
twitter_screen_name: mastodon_config.twitter_screen_name.clone(),
tweet_id: tweet.id,
toot_id: published_status.id,
};
// write the current state (tweet ID and toot ID) to avoid copying it another time
let lconn = task_conn.lock().await;
write_state(&lconn, ttt_towrite)?;
drop(lconn);
}
Ok::<(), ScootalooError>(())
})
})
.buffer_unordered(config.scootaloo.rate_limit.unwrap_or(DEFAULT_RATE_LIMIT));
// launch and wait for every handle // launch and wait for every handle
for handle in mtask { while let Some(result) = stream.next().await {
handle.await.unwrap(); match result {
Ok(Err(e)) => eprintln!("Error within thread: {}", e),
Err(e) => eprintln!("Error with thread: {}", e),
_ => (),
}
} }
} }

View File

@@ -1,5 +1,5 @@
use clap::{App, Arg, SubCommand}; use clap::{Arg, Command};
use log::{error, LevelFilter}; use log::LevelFilter;
use scootaloo::*; use scootaloo::*;
use simple_logger::SimpleLogger; use simple_logger::SimpleLogger;
use std::str::FromStr; use std::str::FromStr;
@@ -7,115 +7,121 @@ use std::str::FromStr;
const DEFAULT_CONFIG_PATH: &str = "/usr/local/etc/scootaloo.toml"; const DEFAULT_CONFIG_PATH: &str = "/usr/local/etc/scootaloo.toml";
fn main() { fn main() {
let matches = App::new(env!("CARGO_PKG_NAME")) let matches = Command::new(env!("CARGO_PKG_NAME"))
.version(env!("CARGO_PKG_VERSION")) .version(env!("CARGO_PKG_VERSION"))
.about("A Twitter to Mastodon bot") .about("A Twitter to Mastodon bot")
.arg( .arg(
Arg::with_name("config") Arg::new("config")
.short("c") .short('c')
.long("config") .long("config")
.value_name("CONFIG_FILE") .value_name("CONFIG_FILE")
.help(&format!( .help(&format!(
"TOML config file for scootaloo (default {})", "TOML config file for scootaloo (default {})",
DEFAULT_CONFIG_PATH DEFAULT_CONFIG_PATH
)) ))
.takes_value(true) .num_args(1)
.default_value(DEFAULT_CONFIG_PATH)
.display_order(1), .display_order(1),
) )
.arg( .arg(
Arg::with_name("log_level") Arg::new("log_level")
.short("l") .short('l')
.long("loglevel") .long("loglevel")
.value_name("LOGLEVEL") .value_name("LOGLEVEL")
.help("Log level.Valid values are: Off, Warn, Error, Info, Debug") .help("Log level.Valid values are: Off, Warn, Error, Info, Debug")
.takes_value(true) .num_args(1)
.value_parser(["Off", "Warn", "Error", "Info", "Debug"])
.display_order(2), .display_order(2),
) )
.subcommand( .subcommand(
SubCommand::with_name("register") Command::new("register")
.version(env!("CARGO_PKG_VERSION")) .version(env!("CARGO_PKG_VERSION"))
.about("Command to register to a Mastodon Instance") .about("Command to register to a Mastodon Instance")
.arg( .arg(
Arg::with_name("host") Arg::new("host")
.short("H") .short('H')
.long("host") .long("host")
.value_name("HOST") .value_name("HOST")
.help("Base URL of the Mastodon instance to register to (no default)") .help("Base URL of the Mastodon instance to register to (no default)")
.takes_value(true) .num_args(1)
.required(true) .required(true)
.display_order(1) .display_order(1)
) )
.arg( .arg(
Arg::with_name("name") Arg::new("name")
.short("n") .short('n')
.long("name") .long("name")
.help("Twitter Screen Name (like https://twitter.com/screen_name, no default)") .help("Twitter Screen Name (like https://twitter.com/screen_name, no default)")
.takes_value(true) .num_args(1)
.required(true) .required(true)
.display_order(2) .display_order(2)
), ),
) )
.subcommand( .subcommand(
SubCommand::with_name("init") Command::new("init")
.version(env!("CARGO_PKG_VERSION")) .version(env!("CARGO_PKG_VERSION"))
.about("Command to init Scootaloo DB") .about("Command to init Scootaloo DB")
.arg( .arg(
Arg::with_name("config") Arg::new("config")
.short("c") .short('c')
.long("config") .long("config")
.value_name("CONFIG_FILE") .value_name("CONFIG_FILE")
.help(&format!( .help(&format!(
"TOML config file for scootaloo (default {})", "TOML config file for scootaloo (default {})",
DEFAULT_CONFIG_PATH DEFAULT_CONFIG_PATH
)) ))
.takes_value(true) .default_value(DEFAULT_CONFIG_PATH)
.num_args(1)
.display_order(1), .display_order(1),
), ),
) )
.subcommand( .subcommand(
SubCommand::with_name("migrate") Command::new("migrate")
.version(env!("CARGO_PKG_VERSION")) .version(env!("CARGO_PKG_VERSION"))
.about("Command to migrate Scootaloo DB") .about("Command to migrate Scootaloo DB")
.arg( .arg(
Arg::with_name("config") Arg::new("config")
.short("c") .short('c')
.long("config") .long("config")
.value_name("CONFIG_FILE") .value_name("CONFIG_FILE")
.help(&format!("TOML config file for scootaloo (default {})", DEFAULT_CONFIG_PATH)) .help(&format!("TOML config file for scootaloo (default {})", DEFAULT_CONFIG_PATH))
.takes_value(true) .default_value(DEFAULT_CONFIG_PATH)
.num_args(1)
.display_order(1), .display_order(1),
) )
.arg( .arg(
Arg::with_name("name") Arg::new("name")
.short("n") .short('n')
.long("name") .long("name")
.help("Twitter Screen Name (like https://twitter.com/screen_name, no default)") .help("Twitter Screen Name (like https://twitter.com/screen_name, no default)")
.takes_value(true) .num_args(1)
.display_order(2) .display_order(2)
) )
) )
.get_matches(); .get_matches();
match matches.subcommand() { match matches.subcommand() {
("register", Some(sub_m)) => { Some(("register", sub_m)) => {
register( register(
sub_m.value_of("host").unwrap(), sub_m.get_one::<String>("host").unwrap(),
sub_m.value_of("name").unwrap(), sub_m.get_one::<String>("name").unwrap(),
); );
return; return;
} }
("init", Some(sub_m)) => { Some(("init", sub_m)) => {
let config = parse_toml(sub_m.value_of("config").unwrap_or(DEFAULT_CONFIG_PATH)); let config = parse_toml(sub_m.get_one::<String>("config").unwrap());
init_db(&config.scootaloo.db_path).unwrap(); init_db(&config.scootaloo.db_path).unwrap();
return; return;
} }
("migrate", Some(sub_m)) => { Some(("migrate", sub_m)) => {
let config = parse_toml(sub_m.value_of("config").unwrap_or(DEFAULT_CONFIG_PATH)); let config = parse_toml(sub_m.get_one::<String>("config").unwrap());
let config_twitter_screen_name = let config_twitter_screen_name =
&config.mastodon.values().next().unwrap().twitter_screen_name; &config.mastodon.values().next().unwrap().twitter_screen_name;
migrate_db( migrate_db(
&config.scootaloo.db_path, &config.scootaloo.db_path,
sub_m.value_of("name").unwrap_or(config_twitter_screen_name), sub_m
.get_one::<String>("name")
.unwrap_or(config_twitter_screen_name),
) )
.unwrap(); .unwrap();
return; return;
@@ -123,20 +129,14 @@ fn main() {
_ => (), _ => (),
} }
if matches.is_present("log_level") { if let Some(level) = matches.get_one::<String>("log_level") {
match LevelFilter::from_str(matches.value_of("log_level").unwrap()) { SimpleLogger::new()
Ok(level) => SimpleLogger::new().with_level(level).init().unwrap(), .with_level(LevelFilter::from_str(level).unwrap())
Err(e) => { .init()
SimpleLogger::new() .unwrap();
.with_level(LevelFilter::Error)
.init()
.unwrap();
error!("Unknown log level filter: {}", e);
}
};
} }
let config = parse_toml(matches.value_of("config").unwrap_or(DEFAULT_CONFIG_PATH)); let config = parse_toml(matches.get_one::<String>("config").unwrap());
run(config); run(config);
} }

View File

@@ -87,7 +87,7 @@ pub fn migrate_db(d: &str, s: &str) -> Result<(), Box<dyn Error>> {
match res { match res {
Err(e) => match e.to_string().as_str() { Err(e) => match e.to_string().as_str() {
"duplicate column name: twitter_screen_name" => Ok(()), "duplicate column name: twitter_screen_name" => Ok(()),
_ => Err(Box::new(e)), _ => Err(e.into()),
}, },
_ => Ok(()), _ => Ok(()),
} }

View File

@@ -15,6 +15,8 @@ use tokio::{
io::copy, io::copy,
}; };
use futures::{stream, stream::StreamExt};
/// Generate associative table between media ids and tweet extended entities /// Generate associative table between media ids and tweet extended entities
pub async fn generate_media_ids( pub async fn generate_media_ids(
tweet: &Tweet, tweet: &Tweet,
@@ -25,63 +27,41 @@ pub async fn generate_media_ids(
let mut media_ids: Vec<String> = vec![]; let mut media_ids: Vec<String> = vec![];
if let Some(m) = &tweet.extended_entities { if let Some(m) = &tweet.extended_entities {
// create tasks list
let mut tasks = vec![];
// size of media_ids vector, should be equal to the media vector
media_ids.resize(m.media.len(), String::new());
info!("{} medias in tweet", m.media.len()); info!("{} medias in tweet", m.media.len());
for (i, media) in m.media.iter().enumerate() { let medias = m.media.clone();
// attribute media url
media_url = media.url.clone();
// clone everything we need let mut stream = stream::iter(medias)
let cache_path = String::from(cache_path); .map(|media| {
let media = media.clone(); // attribute media url
let mastodon = mastodon.clone(); media_url = media.url.clone();
let task = tokio::task::spawn(async move { // clone everything we need
info!("Start treating {}", media.media_url_https); let cache_path = String::from(cache_path);
// get the tweet embedded media let mastodon = mastodon.clone();
let local_tweet_media_path = match get_tweet_media(&media, &cache_path).await {
Ok(l) => l,
Err(e) => {
return Err(ScootalooError::new(&format!(
"Cannot get tweet media for {}: {}",
&media.url, e
)))
}
};
// upload media to Mastodon tokio::task::spawn(async move {
let mastodon_media = mastodon.media(Cow::from(local_tweet_media_path.to_owned())); info!("Start treating {}", media.media_url_https);
// at this point, we can safely erase the original file // get the tweet embedded media
// it doesnt matter if we cant remove, cache_media fn is idempotent let local_tweet_media_path = get_tweet_media(&media, &cache_path).await?;
remove_file(&local_tweet_media_path).await.ok();
let mastodon_media = match mastodon_media { // upload media to Mastodon
Ok(m) => m, let mastodon_media =
Err(e) => { mastodon.media(Cow::from(local_tweet_media_path.to_owned()))?;
return Err(ScootalooError::new(&format!( // at this point, we can safely erase the original file
"Attachment {} cannot be uploaded to Mastodon Instance: {}", // it doesnt matter if we cant remove, cache_media fn is idempotent
&local_tweet_media_path, e remove_file(&local_tweet_media_path).await.ok();
)))
}
};
Ok((i, mastodon_media.id)) Ok::<String, ScootalooError>(mastodon_media.id)
}); })
})
.buffered(4); // there are max four medias per tweet and they need to be treated in
// order
tasks.push(task); while let Some(result) = stream.next().await {
} match result {
Ok(Ok(v)) => media_ids.push(v),
for task in tasks { Ok(Err(e)) => warn!("Cannot treat media: {}", e),
match task.await {
// insert the media at the right place
Ok(Ok((i, v))) => media_ids[i] = v,
Ok(Err(e)) => warn!("{}", e),
Err(e) => error!("Something went wrong when joining the main thread: {}", e), Err(e) => error!("Something went wrong when joining the main thread: {}", e),
} }
} }