7 Commits

Author SHA1 Message Date
VC
b3e7ee9d84 Merge branch '5-migrate-from-tokio-loop-to-futures-stream' into 'master'
refactor: use futures instead of tokio for media upload

Closes #5

See merge request veretcle/scootaloo!28
2022-11-15 09:11:28 +00:00
VC
7f7219ea78 feat: turn tokio-based async logic into futures 2022-11-15 10:06:00 +01:00
VC
f371b8a297 feat: add default rate_limiting option 2022-11-15 10:06:00 +01:00
VC
ec3956eabb doc: add rate_limiting option 2022-11-15 10:06:00 +01:00
VC
ce84c05581 refactor: use futures instead of tokio for media upload 2022-11-15 10:05:57 +01:00
VC
b64621368b Merge branch '4-migrate-to-clap-v4' into 'master'
refactor: migrate from clap v2 to clap v4

Closes #4

See merge request veretcle/scootaloo!27
2022-11-14 19:57:32 +00:00
VC
89de1cf7a3 refactor: migrate from clap v2 to clap v4 2022-11-14 20:36:15 +01:00
7 changed files with 200 additions and 212 deletions

59
Cargo.lock generated
View File

@@ -46,15 +46,6 @@ dependencies = [
"libc",
]
[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "atty"
version = "0.2.14"
@@ -261,9 +252,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.22"
version = "0.4.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1"
checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f"
dependencies = [
"iana-time-zone",
"js-sys",
@@ -277,17 +268,24 @@ dependencies = [
[[package]]
name = "clap"
version = "2.34.0"
version = "4.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
checksum = "60494cedb60cb47462c0ff7be53de32c0e42a6fc2c772184554fa12bd9489c03"
dependencies = [
"ansi_term",
"atty",
"bitflags",
"clap_lex",
"strsim",
"textwrap",
"unicode-width",
"vec_map",
"termcolor",
]
[[package]]
name = "clap_lex"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8"
dependencies = [
"os_str_bytes",
]
[[package]]
@@ -1567,6 +1565,12 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "os_str_bytes"
version = "6.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b5bf27447411e9ee3ff51186bf7a08e16c341efdde93f4d823e8844429bed7e"
[[package]]
name = "parking_lot"
version = "0.9.0"
@@ -2099,7 +2103,7 @@ dependencies = [
[[package]]
name = "scootaloo"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"chrono",
"clap",
@@ -2368,9 +2372,9 @@ dependencies = [
[[package]]
name = "strsim"
version = "0.8.0"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "subtle"
@@ -2430,15 +2434,6 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]
[[package]]
name = "thiserror"
version = "1.0.37"
@@ -2869,12 +2864,6 @@ version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "vec_map"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
[[package]]
name = "version_check"
version = "0.9.4"

View File

@@ -1,6 +1,6 @@
[package]
name = "scootaloo"
version = "0.8.2"
version = "0.9.0"
authors = ["VC <veretcle+framagit@mateu.be>"]
edition = "2021"
@@ -10,11 +10,11 @@ edition = "2021"
chrono = "^0.4"
serde = { version = "1.0", features = ["derive"] }
toml = "^0.5"
clap = "^2.34"
futures = "^0.3"
clap = "^4"
egg-mode = "^0.16"
rusqlite = "^0.27"
tokio = { version = "1", features = ["full"]}
tokio = { version = "^1", features = ["full"]}
futures = "^0.3"
elefren = "^0.22"
html-escape = "^0.2"
reqwest = "^0.11"

View File

@@ -18,6 +18,7 @@ First up, create a configuration file (default path is `/usr/local/etc/scootaloo
[scootaloo]
db_path = "/var/lib/scootaloo/scootaloo.sqlite" ## file containing the SQLite Tweet corresponding Toot DB, must be writeable
cache_path = "/tmp/scootaloo" ## a dir where the temporary files will be download, must be writeable
rate_limiting = 4 ## optional, default 4, number of accounts handled simultaneously
[twitter]
## Consumer/Access key for Twitter (can be generated at https://developer.twitter.com/en/apps)

View File

@@ -32,6 +32,7 @@ pub struct MastodonConfig {
pub struct ScootalooConfig {
pub db_path: String,
pub cache_path: String,
pub rate_limit: Option<usize>,
}
/// Parses the TOML file into a Config Struct

View File

@@ -25,12 +25,13 @@ use rusqlite::Connection;
use std::sync::Arc;
use tokio::{spawn, sync::Mutex};
use futures::StreamExt;
const DEFAULT_RATE_LIMIT: usize = 4;
/// This is where the magic happens
#[tokio::main]
pub async fn run(config: Config) {
// create the task vector for handling multiple accounts
let mut mtask = vec![];
// open the SQLite connection
let conn = Arc::new(Mutex::new(
Connection::open(&config.scootaloo.db_path).unwrap_or_else(|e| {
@@ -41,19 +42,20 @@ pub async fn run(config: Config) {
}),
));
for mastodon_config in config.mastodon.into_values() {
let mut stream = futures::stream::iter(config.mastodon.into_values())
.map(|mastodon_config| {
// create temporary value for each task
let scootaloo_cache_path = config.scootaloo.cache_path.clone();
let token = get_oauth2_token(&config.twitter);
let task_conn = conn.clone();
let task = spawn(async move {
spawn(async move {
info!("Starting treating {}", &mastodon_config.twitter_screen_name);
// retrieve the last tweet ID for the username
let lconn = task_conn.lock().await;
let last_tweet_id =
read_state(&lconn, &mastodon_config.twitter_screen_name, None)?.map(|r| r.tweet_id);
let last_tweet_id = read_state(&lconn, &mastodon_config.twitter_screen_name, None)?
.map(|r| r.tweet_id);
drop(lconn);
// get user timeline feed (Vec<tweet>)
@@ -137,15 +139,13 @@ pub async fn run(config: Config) {
drop(lconn);
}
Ok::<(), ScootalooError>(())
});
// push each task into the vec task
mtask.push(task);
}
})
})
.buffer_unordered(config.scootaloo.rate_limit.unwrap_or(DEFAULT_RATE_LIMIT));
// launch and wait for every handle
for handle in mtask {
match handle.await {
while let Some(result) = stream.next().await {
match result {
Ok(Err(e)) => eprintln!("Error within thread: {}", e),
Err(e) => eprintln!("Error with thread: {}", e),
_ => (),

View File

@@ -1,5 +1,5 @@
use clap::{App, Arg, SubCommand};
use log::{error, LevelFilter};
use clap::{Arg, Command};
use log::LevelFilter;
use scootaloo::*;
use simple_logger::SimpleLogger;
use std::str::FromStr;
@@ -7,115 +7,121 @@ use std::str::FromStr;
const DEFAULT_CONFIG_PATH: &str = "/usr/local/etc/scootaloo.toml";
fn main() {
let matches = App::new(env!("CARGO_PKG_NAME"))
let matches = Command::new(env!("CARGO_PKG_NAME"))
.version(env!("CARGO_PKG_VERSION"))
.about("A Twitter to Mastodon bot")
.arg(
Arg::with_name("config")
.short("c")
Arg::new("config")
.short('c')
.long("config")
.value_name("CONFIG_FILE")
.help(&format!(
"TOML config file for scootaloo (default {})",
DEFAULT_CONFIG_PATH
))
.takes_value(true)
.num_args(1)
.default_value(DEFAULT_CONFIG_PATH)
.display_order(1),
)
.arg(
Arg::with_name("log_level")
.short("l")
Arg::new("log_level")
.short('l')
.long("loglevel")
.value_name("LOGLEVEL")
.help("Log level.Valid values are: Off, Warn, Error, Info, Debug")
.takes_value(true)
.num_args(1)
.value_parser(["Off", "Warn", "Error", "Info", "Debug"])
.display_order(2),
)
.subcommand(
SubCommand::with_name("register")
Command::new("register")
.version(env!("CARGO_PKG_VERSION"))
.about("Command to register to a Mastodon Instance")
.arg(
Arg::with_name("host")
.short("H")
Arg::new("host")
.short('H')
.long("host")
.value_name("HOST")
.help("Base URL of the Mastodon instance to register to (no default)")
.takes_value(true)
.num_args(1)
.required(true)
.display_order(1)
)
.arg(
Arg::with_name("name")
.short("n")
Arg::new("name")
.short('n')
.long("name")
.help("Twitter Screen Name (like https://twitter.com/screen_name, no default)")
.takes_value(true)
.num_args(1)
.required(true)
.display_order(2)
),
)
.subcommand(
SubCommand::with_name("init")
Command::new("init")
.version(env!("CARGO_PKG_VERSION"))
.about("Command to init Scootaloo DB")
.arg(
Arg::with_name("config")
.short("c")
Arg::new("config")
.short('c')
.long("config")
.value_name("CONFIG_FILE")
.help(&format!(
"TOML config file for scootaloo (default {})",
DEFAULT_CONFIG_PATH
))
.takes_value(true)
.default_value(DEFAULT_CONFIG_PATH)
.num_args(1)
.display_order(1),
),
)
.subcommand(
SubCommand::with_name("migrate")
Command::new("migrate")
.version(env!("CARGO_PKG_VERSION"))
.about("Command to migrate Scootaloo DB")
.arg(
Arg::with_name("config")
.short("c")
Arg::new("config")
.short('c')
.long("config")
.value_name("CONFIG_FILE")
.help(&format!("TOML config file for scootaloo (default {})", DEFAULT_CONFIG_PATH))
.takes_value(true)
.default_value(DEFAULT_CONFIG_PATH)
.num_args(1)
.display_order(1),
)
.arg(
Arg::with_name("name")
.short("n")
Arg::new("name")
.short('n')
.long("name")
.help("Twitter Screen Name (like https://twitter.com/screen_name, no default)")
.takes_value(true)
.num_args(1)
.display_order(2)
)
)
.get_matches();
match matches.subcommand() {
("register", Some(sub_m)) => {
Some(("register", sub_m)) => {
register(
sub_m.value_of("host").unwrap(),
sub_m.value_of("name").unwrap(),
sub_m.get_one::<String>("host").unwrap(),
sub_m.get_one::<String>("name").unwrap(),
);
return;
}
("init", Some(sub_m)) => {
let config = parse_toml(sub_m.value_of("config").unwrap_or(DEFAULT_CONFIG_PATH));
Some(("init", sub_m)) => {
let config = parse_toml(sub_m.get_one::<String>("config").unwrap());
init_db(&config.scootaloo.db_path).unwrap();
return;
}
("migrate", Some(sub_m)) => {
let config = parse_toml(sub_m.value_of("config").unwrap_or(DEFAULT_CONFIG_PATH));
Some(("migrate", sub_m)) => {
let config = parse_toml(sub_m.get_one::<String>("config").unwrap());
let config_twitter_screen_name =
&config.mastodon.values().next().unwrap().twitter_screen_name;
migrate_db(
&config.scootaloo.db_path,
sub_m.value_of("name").unwrap_or(config_twitter_screen_name),
sub_m
.get_one::<String>("name")
.unwrap_or(config_twitter_screen_name),
)
.unwrap();
return;
@@ -123,20 +129,14 @@ fn main() {
_ => (),
}
if matches.is_present("log_level") {
match LevelFilter::from_str(matches.value_of("log_level").unwrap()) {
Ok(level) => SimpleLogger::new().with_level(level).init().unwrap(),
Err(e) => {
if let Some(level) = matches.get_one::<String>("log_level") {
SimpleLogger::new()
.with_level(LevelFilter::Error)
.with_level(LevelFilter::from_str(level).unwrap())
.init()
.unwrap();
error!("Unknown log level filter: {}", e);
}
};
}
let config = parse_toml(matches.value_of("config").unwrap_or(DEFAULT_CONFIG_PATH));
let config = parse_toml(matches.get_one::<String>("config").unwrap());
run(config);
}

View File

@@ -15,6 +15,8 @@ use tokio::{
io::copy,
};
use futures::{stream, stream::StreamExt};
/// Generate associative table between media ids and tweet extended entities
pub async fn generate_media_ids(
tweet: &Tweet,
@@ -25,24 +27,20 @@ pub async fn generate_media_ids(
let mut media_ids: Vec<String> = vec![];
if let Some(m) = &tweet.extended_entities {
// create tasks list
let mut tasks = vec![];
// size of media_ids vector, should be equal to the media vector
media_ids.resize(m.media.len(), String::new());
info!("{} medias in tweet", m.media.len());
for (i, media) in m.media.iter().enumerate() {
let medias = m.media.clone();
let mut stream = stream::iter(medias)
.map(|media| {
// attribute media url
media_url = media.url.clone();
// clone everything we need
let cache_path = String::from(cache_path);
let media = media.clone();
let mastodon = mastodon.clone();
let task = tokio::task::spawn(async move {
tokio::task::spawn(async move {
info!("Start treating {}", media.media_url_https);
// get the tweet embedded media
let local_tweet_media_path = get_tweet_media(&media, &cache_path).await?;
@@ -54,16 +52,15 @@ pub async fn generate_media_ids(
// it doesnt matter if we cant remove, cache_media fn is idempotent
remove_file(&local_tweet_media_path).await.ok();
Ok::<(usize, String), ScootalooError>((i, mastodon_media.id))
});
Ok::<String, ScootalooError>(mastodon_media.id)
})
})
.buffered(4); // there are max four medias per tweet and they need to be treated in
// order
tasks.push(task);
}
for task in tasks {
match task.await {
// insert the media at the right place
Ok(Ok((i, v))) => media_ids[i] = v,
while let Some(result) = stream.next().await {
match result {
Ok(Ok(v)) => media_ids.push(v),
Ok(Err(e)) => warn!("Cannot treat media: {}", e),
Err(e) => error!("Something went wrong when joining the main thread: {}", e),
}