17 Commits

Author SHA1 Message Date
VC
a854243cf6 Merge branch 'command_help' into 'master'
fix: remove unnecessary information in help commands

See merge request veretcle/scootaloo!31
2022-11-18 12:31:17 +00:00
VC
b33ffa4401 fix: remove unnecessary information in help commands 2022-11-18 13:27:18 +01:00
VC
77941e0b9a Merge branch 'filter_tweet' into 'master'
refactor: eliminate response tweet earlier

See merge request veretcle/scootaloo!30
2022-11-18 12:17:54 +00:00
VC
1489f89bdb chore: bump version 2022-11-18 13:12:24 +01:00
VC
93a27deae8 refactor: eliminate response tweet earlier 2022-11-18 12:48:40 +01:00
VC
fe3745d91f Merge branch '6-make-last-tweet-retrieved-configurable' into 'master'
test: add rate_limit

Closes #6

See merge request veretcle/scootaloo!29
2022-11-15 20:37:37 +00:00
VC
9a1e4c8e6c doc: add section about page_size 2022-11-15 21:24:34 +01:00
VC
8b12f83c5d chore: bump version 2022-11-15 21:14:21 +01:00
VC
f93bb5158b test: add page_size tests 2022-11-15 21:14:12 +01:00
VC
d5db8b0d85 feat: add customizable page_size to twitter timeline 2022-11-15 21:14:01 +01:00
VC
fe8e81b54d feat: add page_size both in twitter and mastodon config 2022-11-15 21:13:23 +01:00
VC
636ea8c85e test: add rate_limit 2022-11-15 19:36:48 +01:00
VC
b3e7ee9d84 Merge branch '5-migrate-from-tokio-loop-to-futures-stream' into 'master'
refactor: use futures instead of tokio for media upload

Closes #5

See merge request veretcle/scootaloo!28
2022-11-15 09:11:28 +00:00
VC
7f7219ea78 feat: turn tokio-based async logic into futures 2022-11-15 10:06:00 +01:00
VC
f371b8a297 feat: add default rate_limiting option 2022-11-15 10:06:00 +01:00
VC
ec3956eabb doc: add rate_limiting option 2022-11-15 10:06:00 +01:00
VC
ce84c05581 refactor: use futures instead of tokio for media upload 2022-11-15 10:05:57 +01:00
12 changed files with 273 additions and 142 deletions

3
Cargo.lock generated
View File

@@ -2103,12 +2103,13 @@ dependencies = [
[[package]] [[package]]
name = "scootaloo" name = "scootaloo"
version = "0.8.3" version = "0.9.3"
dependencies = [ dependencies = [
"chrono", "chrono",
"clap", "clap",
"egg-mode", "egg-mode",
"elefren", "elefren",
"futures 0.3.25",
"html-escape", "html-escape",
"log", "log",
"mime", "mime",

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "scootaloo" name = "scootaloo"
version = "0.8.3" version = "0.9.3"
authors = ["VC <veretcle+framagit@mateu.be>"] authors = ["VC <veretcle+framagit@mateu.be>"]
edition = "2021" edition = "2021"
@@ -13,7 +13,8 @@ toml = "^0.5"
clap = "^4" clap = "^4"
egg-mode = "^0.16" egg-mode = "^0.16"
rusqlite = "^0.27" rusqlite = "^0.27"
tokio = { version = "1", features = ["full"]} tokio = { version = "^1", features = ["full"]}
futures = "^0.3"
elefren = "^0.22" elefren = "^0.22"
html-escape = "^0.2" html-escape = "^0.2"
reqwest = "^0.11" reqwest = "^0.11"

View File

@@ -18,9 +18,11 @@ First up, create a configuration file (default path is `/usr/local/etc/scootaloo
[scootaloo] [scootaloo]
db_path = "/var/lib/scootaloo/scootaloo.sqlite" ## file containing the SQLite Tweet corresponding Toot DB, must be writeable db_path = "/var/lib/scootaloo/scootaloo.sqlite" ## file containing the SQLite Tweet corresponding Toot DB, must be writeable
cache_path = "/tmp/scootaloo" ## a dir where the temporary files will be download, must be writeable cache_path = "/tmp/scootaloo" ## a dir where the temporary files will be download, must be writeable
rate_limiting = 4 ## optional, default 4, number of accounts handled simultaneously
[twitter] [twitter]
## Consumer/Access key for Twitter (can be generated at https://developer.twitter.com/en/apps) ## Consumer/Access key for Twitter (can be generated at https://developer.twitter.com/en/apps)
page_size = 20 ## optional, default 200, max number of tweet retrieved
consumer_key = "MYCONSUMERKEY" consumer_key = "MYCONSUMERKEY"
consumer_secret = "MYCONSUMERSECRET" consumer_secret = "MYCONSUMERSECRET"
access_key = "MYACCESSKEY" access_key = "MYACCESSKEY"
@@ -55,6 +57,11 @@ token = "MYTOKEN"
You can add other account if you like, after the `[mastodon]` moniker. Scootaloo would theorically support an unlimited number of accounts. You can add other account if you like, after the `[mastodon]` moniker. Scootaloo would theorically support an unlimited number of accounts.
You can also add a custom twitter page size in this section that would override the global (under the `twitter` moniker) and default one (200), like so:
```
twitter_page_size = 40
```
## Running ## Running
You can then run the application via `cron` for example. Here is the generic usage: You can then run the application via `cron` for example. Here is the generic usage:

View File

@@ -16,11 +16,13 @@ pub struct TwitterConfig {
pub consumer_secret: String, pub consumer_secret: String,
pub access_key: String, pub access_key: String,
pub access_secret: String, pub access_secret: String,
pub page_size: Option<i32>,
} }
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
pub struct MastodonConfig { pub struct MastodonConfig {
pub twitter_screen_name: String, pub twitter_screen_name: String,
pub twitter_page_size: Option<i32>,
pub base: String, pub base: String,
pub client_id: String, pub client_id: String,
pub client_secret: String, pub client_secret: String,
@@ -32,6 +34,7 @@ pub struct MastodonConfig {
pub struct ScootalooConfig { pub struct ScootalooConfig {
pub db_path: String, pub db_path: String,
pub cache_path: String, pub cache_path: String,
pub rate_limit: Option<usize>,
} }
/// Parses the TOML file into a Config Struct /// Parses the TOML file into a Config Struct

View File

@@ -25,12 +25,14 @@ use rusqlite::Connection;
use std::sync::Arc; use std::sync::Arc;
use tokio::{spawn, sync::Mutex}; use tokio::{spawn, sync::Mutex};
use futures::StreamExt;
const DEFAULT_RATE_LIMIT: usize = 4;
const DEFAULT_PAGE_SIZE: i32 = 200;
/// This is where the magic happens /// This is where the magic happens
#[tokio::main] #[tokio::main]
pub async fn run(config: Config) { pub async fn run(config: Config) {
// create the task vector for handling multiple accounts
let mut mtask = vec![];
// open the SQLite connection // open the SQLite connection
let conn = Arc::new(Mutex::new( let conn = Arc::new(Mutex::new(
Connection::open(&config.scootaloo.db_path).unwrap_or_else(|e| { Connection::open(&config.scootaloo.db_path).unwrap_or_else(|e| {
@@ -41,111 +43,101 @@ pub async fn run(config: Config) {
}), }),
)); ));
for mastodon_config in config.mastodon.into_values() { let mut stream = futures::stream::iter(config.mastodon.into_values())
// create temporary value for each task .map(|mastodon_config| {
let scootaloo_cache_path = config.scootaloo.cache_path.clone(); // calculate Twitter page size
let token = get_oauth2_token(&config.twitter); let page_size = mastodon_config
let task_conn = conn.clone(); .twitter_page_size
.unwrap_or_else(|| config.twitter.page_size.unwrap_or(DEFAULT_PAGE_SIZE));
let task = spawn(async move { // create temporary value for each task
info!("Starting treating {}", &mastodon_config.twitter_screen_name); let scootaloo_cache_path = config.scootaloo.cache_path.clone();
let token = get_oauth2_token(&config.twitter);
let task_conn = conn.clone();
// retrieve the last tweet ID for the username spawn(async move {
let lconn = task_conn.lock().await; info!("Starting treating {}", &mastodon_config.twitter_screen_name);
let last_tweet_id =
read_state(&lconn, &mastodon_config.twitter_screen_name, None)?.map(|r| r.tweet_id);
drop(lconn);
// get user timeline feed (Vec<tweet>) // retrieve the last tweet ID for the username
let mut feed =
get_user_timeline(&mastodon_config.twitter_screen_name, &token, last_tweet_id)
.await?;
// empty feed -> exiting
if feed.is_empty() {
info!("Nothing to retrieve since last time, exiting…");
return Ok(());
}
// get Mastodon instance
let mastodon = get_mastodon_token(&mastodon_config);
// order needs to be chronological
feed.reverse();
for tweet in &feed {
info!("Treating Tweet {} inside feed", tweet.id);
// initiate the toot_reply_id var
let mut toot_reply_id: Option<String> = None;
// determine if the tweet is part of a thread (response to self) or a standard response
if let Some(r) = &tweet.in_reply_to_screen_name {
if r.to_lowercase() != mastodon_config.twitter_screen_name.to_lowercase() {
// we are responding not threading
info!("Tweet is a direct response, skipping");
continue;
}
info!("Tweet is a thread");
// get the corresponding toot id
let lconn = task_conn.lock().await;
toot_reply_id = read_state(
&lconn,
&mastodon_config.twitter_screen_name,
tweet.in_reply_to_status_id,
)
.unwrap_or(None)
.map(|s| s.toot_id);
drop(lconn);
};
// build basic status by just yielding text and dereferencing contained urls
let mut status_text = build_basic_status(tweet);
// building associative media list
let (media_url, status_medias) =
generate_media_ids(tweet, &scootaloo_cache_path, &mastodon).await;
status_text = status_text.replace(&media_url, "");
info!("Building corresponding Mastodon status");
let mut status_builder = StatusBuilder::new();
status_builder.status(&status_text).media_ids(status_medias);
if let Some(i) = toot_reply_id {
status_builder.in_reply_to(&i);
}
// can be activated for test purposes
// status_builder.visibility(elefren::status_builder::Visibility::Private);
let status = status_builder.build()?;
let published_status = mastodon.new_status(status)?;
// this will return if it cannot publish the status preventing the last_tweet from
// being written into db
let ttt_towrite = TweetToToot {
twitter_screen_name: mastodon_config.twitter_screen_name.clone(),
tweet_id: tweet.id,
toot_id: published_status.id,
};
// write the current state (tweet ID and toot ID) to avoid copying it another time
let lconn = task_conn.lock().await; let lconn = task_conn.lock().await;
write_state(&lconn, ttt_towrite)?; let last_tweet_id = read_state(&lconn, &mastodon_config.twitter_screen_name, None)?
.map(|r| r.tweet_id);
drop(lconn); drop(lconn);
}
Ok::<(), ScootalooError>(())
});
// push each task into the vec task // get reversed, curated user timeline
mtask.push(task); let feed = get_user_timeline(
} &mastodon_config.twitter_screen_name,
&token,
last_tweet_id,
page_size,
)
.await?;
// get Mastodon instance
let mastodon = get_mastodon_token(&mastodon_config);
for tweet in &feed {
info!("Treating Tweet {} inside feed", tweet.id);
let lconn = task_conn.lock().await;
// initiate the toot_reply_id var and retrieve the corresponding toot_id
let toot_reply_id: Option<String> = tweet.in_reply_to_user_id.and_then(|_| {
read_state(
&lconn,
&mastodon_config.twitter_screen_name,
tweet.in_reply_to_status_id,
)
.unwrap_or(None)
.map(|s| s.toot_id)
});
drop(lconn);
// build basic status by just yielding text and dereferencing contained urls
let mut status_text = build_basic_status(tweet);
// building associative media list
let (media_url, status_medias) =
generate_media_ids(tweet, &scootaloo_cache_path, &mastodon).await;
status_text = status_text.replace(&media_url, "");
info!("Building corresponding Mastodon status");
let mut status_builder = StatusBuilder::new();
status_builder.status(&status_text).media_ids(status_medias);
if let Some(i) = toot_reply_id {
status_builder.in_reply_to(&i);
}
// can be activated for test purposes
// status_builder.visibility(elefren::status_builder::Visibility::Private);
let status = status_builder.build()?;
let published_status = mastodon.new_status(status)?;
// this will return if it cannot publish the status preventing the last_tweet from
// being written into db
let ttt_towrite = TweetToToot {
twitter_screen_name: mastodon_config.twitter_screen_name.clone(),
tweet_id: tweet.id,
toot_id: published_status.id,
};
// write the current state (tweet ID and toot ID) to avoid copying it another time
let lconn = task_conn.lock().await;
write_state(&lconn, ttt_towrite)?;
drop(lconn);
}
Ok::<(), ScootalooError>(())
})
})
.buffer_unordered(config.scootaloo.rate_limit.unwrap_or(DEFAULT_RATE_LIMIT));
// launch and wait for every handle // launch and wait for every handle
for handle in mtask { while let Some(result) = stream.next().await {
match handle.await { match result {
Ok(Err(e)) => eprintln!("Error within thread: {}", e), Ok(Err(e)) => eprintln!("Error within thread: {}", e),
Err(e) => eprintln!("Error with thread: {}", e), Err(e) => eprintln!("Error with thread: {}", e),
_ => (), _ => (),

View File

@@ -15,10 +15,7 @@ fn main() {
.short('c') .short('c')
.long("config") .long("config")
.value_name("CONFIG_FILE") .value_name("CONFIG_FILE")
.help(&format!( .help("TOML config file for scootaloo")
"TOML config file for scootaloo (default {})",
DEFAULT_CONFIG_PATH
))
.num_args(1) .num_args(1)
.default_value(DEFAULT_CONFIG_PATH) .default_value(DEFAULT_CONFIG_PATH)
.display_order(1), .display_order(1),
@@ -28,7 +25,7 @@ fn main() {
.short('l') .short('l')
.long("loglevel") .long("loglevel")
.value_name("LOGLEVEL") .value_name("LOGLEVEL")
.help("Log level.Valid values are: Off, Warn, Error, Info, Debug") .help("Log level")
.num_args(1) .num_args(1)
.value_parser(["Off", "Warn", "Error", "Info", "Debug"]) .value_parser(["Off", "Warn", "Error", "Info", "Debug"])
.display_order(2), .display_order(2),

View File

@@ -27,19 +27,31 @@ pub fn get_oauth2_token(config: &TwitterConfig) -> Token {
} }
} }
/// Gets Twitter user timeline /// Gets Twitter user timeline, eliminate responses to others and reverse it
pub async fn get_user_timeline( pub async fn get_user_timeline(
screen_name: &str, screen_name: &str,
token: &Token, token: &Token,
lid: Option<u64>, lid: Option<u64>,
page_size: i32,
) -> Result<Vec<Tweet>, Box<dyn Error>> { ) -> Result<Vec<Tweet>, Box<dyn Error>> {
// fix the page size to 200 as it is the maximum Twitter authorizes // fix the page size to 200 as it is the maximum Twitter authorizes
let (_, feed) = user_timeline(UserID::from(screen_name.to_owned()), true, false, token) let (_, feed) = user_timeline(UserID::from(screen_name.to_owned()), true, false, token)
.with_page_size(200) .with_page_size(page_size)
.older(lid) .older(lid)
.await?; .await?;
Ok(feed.to_vec()) let mut feed: Vec<Tweet> = feed
.iter()
.cloned()
.filter(|t| match &t.in_reply_to_screen_name {
Some(r) => r.to_lowercase() == screen_name.to_lowercase(),
None => true,
})
.collect();
feed.reverse();
Ok(feed)
} }
/// Retrieves a single media from a tweet and store it in a temporary file /// Retrieves a single media from a tweet and store it in a temporary file

View File

@@ -15,6 +15,8 @@ use tokio::{
io::copy, io::copy,
}; };
use futures::{stream, stream::StreamExt};
/// Generate associative table between media ids and tweet extended entities /// Generate associative table between media ids and tweet extended entities
pub async fn generate_media_ids( pub async fn generate_media_ids(
tweet: &Tweet, tweet: &Tweet,
@@ -25,45 +27,40 @@ pub async fn generate_media_ids(
let mut media_ids: Vec<String> = vec![]; let mut media_ids: Vec<String> = vec![];
if let Some(m) = &tweet.extended_entities { if let Some(m) = &tweet.extended_entities {
// create tasks list
let mut tasks = vec![];
// size of media_ids vector, should be equal to the media vector
media_ids.resize(m.media.len(), String::new());
info!("{} medias in tweet", m.media.len()); info!("{} medias in tweet", m.media.len());
for (i, media) in m.media.iter().enumerate() { let medias = m.media.clone();
// attribute media url
media_url = media.url.clone();
// clone everything we need let mut stream = stream::iter(medias)
let cache_path = String::from(cache_path); .map(|media| {
let media = media.clone(); // attribute media url
let mastodon = mastodon.clone(); media_url = media.url.clone();
let task = tokio::task::spawn(async move { // clone everything we need
info!("Start treating {}", media.media_url_https); let cache_path = String::from(cache_path);
// get the tweet embedded media let mastodon = mastodon.clone();
let local_tweet_media_path = get_tweet_media(&media, &cache_path).await?;
// upload media to Mastodon tokio::task::spawn(async move {
let mastodon_media = info!("Start treating {}", media.media_url_https);
mastodon.media(Cow::from(local_tweet_media_path.to_owned()))?; // get the tweet embedded media
// at this point, we can safely erase the original file let local_tweet_media_path = get_tweet_media(&media, &cache_path).await?;
// it doesnt matter if we cant remove, cache_media fn is idempotent
remove_file(&local_tweet_media_path).await.ok();
Ok::<(usize, String), ScootalooError>((i, mastodon_media.id)) // upload media to Mastodon
}); let mastodon_media =
mastodon.media(Cow::from(local_tweet_media_path.to_owned()))?;
// at this point, we can safely erase the original file
// it doesnt matter if we cant remove, cache_media fn is idempotent
remove_file(&local_tweet_media_path).await.ok();
tasks.push(task); Ok::<String, ScootalooError>(mastodon_media.id)
} })
})
.buffered(4); // there are max four medias per tweet and they need to be treated in
// order
for task in tasks { while let Some(result) = stream.next().await {
match task.await { match result {
// insert the media at the right place Ok(Ok(v)) => media_ids.push(v),
Ok(Ok((i, v))) => media_ids[i] = v,
Ok(Err(e)) => warn!("Cannot treat media: {}", e), Ok(Err(e)) => warn!("Cannot treat media: {}", e),
Err(e) => error!("Something went wrong when joining the main thread: {}", e), Err(e) => error!("Something went wrong when joining the main thread: {}", e),
} }

View File

@@ -1,5 +1,57 @@
use scootaloo::parse_toml; use scootaloo::parse_toml;
#[test]
fn test_page_size() {
const DEFAULT_PAGE_SIZE: i32 = 200;
let toml = parse_toml("tests/page_size.toml");
assert_eq!(toml.twitter.page_size, Some(100));
assert_eq!(toml.mastodon.get("0").unwrap().twitter_page_size, None);
assert_eq!(toml.mastodon.get("1").unwrap().twitter_page_size, Some(42));
// this is the exact line that is used inside fn run() to determine the twitter page size
// passed to fn get_user_timeline()
let page_size_for_0 = toml
.mastodon
.get("0")
.unwrap()
.twitter_page_size
.unwrap_or_else(|| toml.twitter.page_size.unwrap_or(DEFAULT_PAGE_SIZE));
let page_size_for_1 = toml
.mastodon
.get("1")
.unwrap()
.twitter_page_size
.unwrap_or_else(|| toml.twitter.page_size.unwrap_or(DEFAULT_PAGE_SIZE));
assert_eq!(page_size_for_0, 100);
assert_eq!(page_size_for_1, 42);
let toml = parse_toml("tests/no_page_size.toml");
assert_eq!(toml.twitter.page_size, None);
assert_eq!(toml.mastodon.get("0").unwrap().twitter_page_size, None);
// and same here
let page_size_for_0 = toml
.mastodon
.get("0")
.unwrap()
.twitter_page_size
.unwrap_or_else(|| toml.twitter.page_size.unwrap_or(DEFAULT_PAGE_SIZE));
assert_eq!(page_size_for_0, 200);
}
#[test]
fn test_parse_good_toml_rate_limit() {
let parse_good_toml = parse_toml("tests/good_test_rate_limit.toml");
assert_eq!(parse_good_toml.scootaloo.rate_limit, Some(69 as usize));
}
#[test] #[test]
fn test_parse_good_toml() { fn test_parse_good_toml() {
let parse_good_toml = parse_toml("tests/good_test.toml"); let parse_good_toml = parse_toml("tests/good_test.toml");
@@ -9,6 +61,7 @@ fn test_parse_good_toml() {
"/var/random/scootaloo.sqlite" "/var/random/scootaloo.sqlite"
); );
assert_eq!(parse_good_toml.scootaloo.cache_path, "/tmp/scootaloo"); assert_eq!(parse_good_toml.scootaloo.cache_path, "/tmp/scootaloo");
assert_eq!(parse_good_toml.scootaloo.rate_limit, None);
assert_eq!(parse_good_toml.twitter.consumer_key, "rand consumer key"); assert_eq!(parse_good_toml.twitter.consumer_key, "rand consumer key");
assert_eq!(parse_good_toml.twitter.consumer_secret, "secret"); assert_eq!(parse_good_toml.twitter.consumer_secret, "secret");

View File

@@ -0,0 +1,20 @@
[scootaloo]
db_path="/var/random/scootaloo.sqlite"
cache_path="/tmp/scootaloo"
rate_limit=69
[twitter]
consumer_key="rand consumer key"
consumer_secret="secret"
access_key="rand access key"
access_secret="super secret"
[mastodon]
[mastodon.tamerelol]
twitter_screen_name="tamerelol"
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"

19
tests/no_page_size.toml Normal file
View File

@@ -0,0 +1,19 @@
[scootaloo]
db_path="/var/random/scootaloo.sqlite"
cache_path="/tmp/scootaloo"
[twitter]
consumer_key="rand consumer key"
consumer_secret="secret"
access_key="rand access key"
access_secret="super secret"
[mastodon]
[mastodon.0]
twitter_screen_name="tamerelol"
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"

29
tests/page_size.toml Normal file
View File

@@ -0,0 +1,29 @@
[scootaloo]
db_path="/var/random/scootaloo.sqlite"
cache_path="/tmp/scootaloo"
[twitter]
consumer_key="rand consumer key"
consumer_secret="secret"
access_key="rand access key"
access_secret="super secret"
page_size=100
[mastodon]
[mastodon.0]
twitter_screen_name="tamerelol"
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"
[mastodon.1]
twitter_screen_name="tonperemdr"
twitter_page_size=42
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"