79 Commits

Author SHA1 Message Date
VC
b3e7ee9d84 Merge branch '5-migrate-from-tokio-loop-to-futures-stream' into 'master'
refactor: use futures instead of tokio for media upload

Closes #5

See merge request veretcle/scootaloo!28
2022-11-15 09:11:28 +00:00
VC
7f7219ea78 feat: turn tokio-based async logic into futures 2022-11-15 10:06:00 +01:00
VC
f371b8a297 feat: add default rate_limiting option 2022-11-15 10:06:00 +01:00
VC
ec3956eabb doc: add rate_limiting option 2022-11-15 10:06:00 +01:00
VC
ce84c05581 refactor: use futures instead of tokio for media upload 2022-11-15 10:05:57 +01:00
VC
b64621368b Merge branch '4-migrate-to-clap-v4' into 'master'
refactor: migrate from clap v2 to clap v4

Closes #4

See merge request veretcle/scootaloo!27
2022-11-14 19:57:32 +00:00
VC
89de1cf7a3 refactor: migrate from clap v2 to clap v4 2022-11-14 20:36:15 +01:00
VC
ffbe98f838 Merge branch 'generate_media_flow' into 'master'
Better media flow

See merge request veretcle/scootaloo!26
2022-11-14 13:41:07 +00:00
VC
822f4044c6 chore: bump version 2022-11-14 14:33:42 +01:00
VC
78924f6eeb refactor: simpler error bubbling inside async block 2022-11-14 14:33:39 +01:00
VC
9c14636735 refactor: avoid Box::new syntax, prefer into() 2022-11-14 14:25:08 +01:00
VC
01bac63fb9 Merge branch 'reply_improvements' into 'master'
refactor: improve reply/thread management

See merge request veretcle/scootaloo!25
2022-11-09 20:26:20 +00:00
VC
4f5663b450 feature: better error implementation for ScootalooError inside async block 2022-11-09 19:36:00 +01:00
VC
9a9c4b4809 chore: cargo update 2022-11-09 18:33:04 +01:00
VC
9970968b47 refactor: avoid panicking into thread, bubble up errors to main thread to be handled 2022-11-09 18:23:06 +01:00
VC
291c86677e refactor: get mastodon token after ensuring feed is not empty 2022-11-09 08:40:04 +01:00
VC
31afb1cf7d Merge branch 'async_media_upload' into 'master'
Async media upload

See merge request veretcle/scootaloo!24
2022-11-08 13:35:06 +00:00
VC
4415c4ac12 refactor: better logic flow for uploading/deleting media 2022-11-08 10:54:42 +01:00
VC
89f1372f9f bump: version v0.8.0 2022-11-08 08:54:36 +01:00
VC
06904434c8 fix: indentation error when registering 2022-11-08 08:54:36 +01:00
VC
3c64df23bc refactor: add info/debug 2022-11-08 08:54:32 +01:00
VC
c62f67c3b3 refactor: simpler mtask var 2022-11-08 08:37:26 +01:00
VC
3b0e7234af refactor: downloads/uploads every media from a tweet async way 2022-11-08 08:37:17 +01:00
VC
62011b4b81 refactor: downloads/uploads every media from a tweet async way 2022-11-07 21:47:12 +01:00
VC
5ce3bde3e7 fix: remove unecessary \n in TOML conf 2022-11-07 18:25:55 +01:00
VC
ab4184c0ed Merge branch 'async_multi_account' into 'master'
feat: attempt for async treatment of all accounts

See merge request veretcle/scootaloo!23
2022-11-05 09:36:55 +00:00
VC
de758c7bda refactor: separate function for media ids 2022-11-05 10:23:21 +01:00
VC
df75520175 feat: async treatment of all accounts 2022-11-04 15:26:27 +01:00
VC
73244f9ecc Merge branch 'multi_account_scootaloo' into 'master'
Multi account scootaloo

See merge request veretcle/scootaloo!22
2022-11-03 22:38:26 +00:00
VC
dad49da090 feat: add multi-account ability 2022-11-03 23:30:50 +01:00
VC
44ec3edfe2 Merge branch 'rust_1_63' into 'master'
feat: adapt to rust 1.63

See merge request veretcle/scootaloo!21
2022-08-17 16:06:39 +00:00
VC
8673dd7866 feat: adapt to rust 1.63 2022-08-17 18:02:12 +02:00
VC
ff496b167d Merge branch 'fmt_clippy' into 'master'
style: fmt & clippy processed

See merge request veretcle/scootaloo!20
2022-08-11 13:29:22 +00:00
VC
97ab6f4925 feat: bump version 2022-08-11 15:26:36 +02:00
VC
5b512cb757 ci: common ci 2022-08-11 13:50:52 +02:00
VC
b11595bfca style: fmt & clippy processed 2022-08-11 12:33:05 +02:00
VC
dab8725f99 Merge branch 'refactor_error' into 'master'
refactor(error): remove deprecated description()

See merge request veretcle/scootaloo!19
2022-05-03 10:17:37 +00:00
VC
08368b2a73 refactor(error): remove deprecated description() 2022-05-03 12:14:23 +02:00
VC
c6cdaa21b8 Merge branch 'useless_crate' into 'master'
refactor: remove useless crate:: ref

See merge request veretcle/scootaloo!18
2022-04-25 09:30:52 +00:00
VC
99a6adc1f4 refactor: remove useless crate:: ref 2022-04-25 11:27:11 +02:00
VC
1afbdc1672 Merge branch 'thread_to_thread' into 'master'
Make thread do thread

See merge request veretcle/scootaloo!16
2022-04-24 12:41:23 +00:00
VC
905793af72 refactor(fmt): delete String::from() format in favor of .to_string()/to_owned() 2022-04-24 14:20:45 +02:00
VC
734f03f5a9 feature: add test for build_basic_status() fn 2022-04-24 14:06:46 +02:00
VC
6c0383d9d0 refactor: build better decode functions 2022-04-24 12:18:17 +02:00
VC
a90facae86 refactor: refactor run() fn to be more efficient/more clear 2022-04-24 11:14:32 +02:00
VC
22402f0f46 refactor: optimize import and last_tweet_id var 2022-04-24 11:01:46 +02:00
VC
26491f146f refactor: replace scootaloo_config with &str in init_db() 2022-04-24 10:40:52 +02:00
VC
13bb6d6f37 feature: make thread in Twitter thread in Mastodon 2022-04-24 09:42:26 +02:00
VC
abfb2ff50a feature: more tests 2022-04-24 09:42:26 +02:00
VC
8b0945cb48 refactor: more clear option 2022-04-24 09:42:26 +02:00
VC
48b8eaaa5b feature: state is held into a sqlite db 2022-04-24 09:42:22 +02:00
VC
6363c12460 feature(test): add tests 2022-04-24 09:39:29 +02:00
VC
080218f385 refactor: make everything a little more modular 2022-04-24 09:39:29 +02:00
VC
de375b9f28 Merge branch 'syntax-color-in-doc' into 'master'
Add syntax colors in documentation

See merge request veretcle/scootaloo!17
2022-04-24 07:12:37 +00:00
M
1babc2725d Enable color syntax in documentation 2022-04-23 13:02:05 +00:00
VC
11b629203b Merge branch 'bump_versions' into 'master'
Bump versions of all dependencies

See merge request veretcle/scootaloo!15
2022-04-22 08:31:36 +00:00
VC
16792e515a refactor(ci): refactor .gitlab-ci to add tests 2022-04-22 09:41:53 +02:00
VC
d228ceaaf6 refactor(dependencies): bump version of all dependencies 2022-04-22 09:41:48 +02:00
VC
bd7d4dbbb5 Merge branch 'fix/unused_struct' into 'master'
fix: remove unused struct

See merge request veretcle/scootaloo!13
2022-04-12 11:44:04 +00:00
VC
ff03b32f9d fix: remove unused struct 2022-04-12 12:55:40 +02:00
VC
533a40f2c2 Merge branch 'noasync' into 'master'
getting async in a reasonnable way

See merge request veretcle/scootaloo!12
2021-04-25 05:32:09 +00:00
VC
c301649d49 last correction 2022-04-12 12:03:49 +02:00
VC
fd9cc31848 Update src/lib.rs 2021-04-24 08:04:52 +00:00
VC
4ef58bda0a Deleted Cargo.lock 2021-04-24 07:58:52 +00:00
VC
912ee25c50 Merge branch 'noasync' of framagit.org:veretcle/scootaloo into noasync 2021-04-24 09:45:50 +02:00
VC
4f03a1a6f3 Adding Cargo.lock 2021-04-24 09:43:51 +02:00
VC
ac80b67c9f Merge branch 'master' into 'noasync'
# Conflicts:
#   .gitlab-ci.yml
#   Cargo.lock
#   Cargo.toml
#   src/lib.rs
2021-04-24 07:40:04 +00:00
VC
7aec8e0e33 adding strip 2021-04-24 09:34:37 +02:00
VC
f58edf3c75 Backporting changes on gitlab-ci.yml 2021-04-24 09:15:31 +02:00
VC
394ec5d1f3 I consider this a good compromise between all async (that works but is pretty complex and honestly a bit useless) and nothing async that is not the most optimal way to deal with things as reqwest and egg-mode are async by nature 2021-04-24 09:08:09 +02:00
VC
c10de76854 Adding logging facility 2021-04-24 09:07:06 +02:00
VC
020af69fe0 Adding the necessary bits and pieces to make things work better 2021-04-24 09:04:50 +02:00
VC
da808b0051 FUUUUUUUUUUUUUUU 2021-04-20 11:46:33 +02:00
VC
5a4dd5cb99 paths 2021-04-20 11:40:49 +02:00
VC
5b04bd27b9 Updating changelog 2021-04-20 11:39:43 +02:00
VC
c52fc52d23 Optimizing size of the final executable 2021-04-20 11:38:50 +02:00
VC
09ed837a1b Updating CHANGELOG to please @meduzen 2021-04-19 21:19:47 +02:00
VC
d4db2933ae Merge branch 'async_attempt2' into 'master'
Async version

See merge request veretcle/scootaloo!11
2021-04-18 17:00:45 +00:00
VC
2e052ebf6a Still a WIP: need to use async reqwest to respect the global context of usage (reqwest::blocking is using async inside so it does not really sync whatever) 2021-04-18 17:00:44 +00:00
16 changed files with 2481 additions and 1247 deletions

View File

@@ -1,10 +1,5 @@
stages:
- build
rust-latest:
stage: build
image: rust:latest
script:
- cargo build --verbose
- cargo test --verbose
---
include:
project: 'veretcle/ci-common'
ref: 'main'
file: 'ci_rust.yml'

View File

@@ -1,3 +1,23 @@
# v0.3.3
* optimizing the size of the final executable (now ⩽ 6MiB)
# v0.3.2
* 100% async version
* now media are download in parallel thanks to async
* log are introduced into code for your viewing pleasure
# v0.2.3
* using the async version of `reqwest`
* introducing async functions and make `tokio` the de facto executor for everything async
# v0.2.1
* using `tokio-compat` to avoid having 3 different versions of `tokio` in the same executable
* encapsulating async calls inside blocking tokio runtime calls
# v0.1.8
* fix #1: mentions are treated like decoded urls (this is not really needed to push it this far but it would be easier in case you want to modify it)

1891
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,25 +1,26 @@
[package]
name = "scootaloo"
version = "0.2.1"
version = "0.9.0"
authors = ["VC <veretcle+framagit@mateu.be>"]
edition = "2018"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
chrono = "^0.4"
serde = { version = "1.0", features = ["derive"] }
toml = "^0.5"
clap = "^2.33"
tokio = { version = "1", features = ["rt-multi-thread"]}
tokio-compat-02 = "0.2"
egg-mode = "^0.15"
clap = "^4"
egg-mode = "^0.16"
rusqlite = "^0.27"
tokio = { version = "^1", features = ["full"]}
futures = "^0.3"
elefren = "^0.22"
html-escape = "^0.2"
reqwest = "^0.11"
log = "^0.4"
simple_logger = "^2.1"
mime = "^0.3"
reqwest = { version="^0.11", features = ["blocking"] }
htmlescape = "^0.3"
[profile.release]
strip = true

View File

@@ -3,41 +3,50 @@ A Twitter to Mastodon copy bot written in Rust
It:
* copies the content (text) of the original Tweet
* dereferences the links
* gets every attach media (photo, video or gif)
* gets every attached media (photo, video or gif)
If any of the last steps failed, the Toot gets published with the exact same text as the Tweet.
RT are excluded, replies are included.but only the source threads are copied, not the actual replies to other Twitter users.
RT are excluded, replies are included when considered part of a thread (reply to self), not the actual replies to other Twitter users.
# Usage
## Configuring
First up, create a configuration file (default path is `/usr/local/etc/scootaloo.toml`). It will look like this:
```
```toml
[scootaloo]
last_tweet_path="/usr/local/etc/last_tweet" ## file containing the last tweet id received, must be writable
cache_path="/tmp/scootaloo" ## a dir where the temporary files will be download, must be writeable
db_path = "/var/lib/scootaloo/scootaloo.sqlite" ## file containing the SQLite Tweet corresponding Toot DB, must be writeable
cache_path = "/tmp/scootaloo" ## a dir where the temporary files will be download, must be writeable
rate_limiting = 4 ## optional, default 4, number of accounts handled simultaneously
[twitter]
username="NintendojoFR" ## User Timeline to copy
## Consumer/Access key for Twitter (can be generated at https://developer.twitter.com/en/apps)
consumer_key="MYCONSUMERKEY"
consumer_secret="MYCONSUMERSECRET"
access_key="MYACCESSKEY"
access_secret="MYACCESSSECRET"
consumer_key = "MYCONSUMERKEY"
consumer_secret = "MYCONSUMERSECRET"
access_key = "MYACCESSKEY"
access_secret = "MYACCESSSECRET"
[mastodon]
```
Then run the command with the `init` subcommand to initiate the DB:
```sh
scootaloo init
```
This subcommand is completely idempotent.
Then run the command with the `register` subcommand:
```
```sh
scootaloo register --host https://m.nintendojo.fr
```
This will give you the end of the TOML file. It will look like this:
```
[mastodon]
```toml
[mastodon.nintendojofr] ## account
twitter_screen_name="NintendojoFR" ## User Timeline to copy
base = "https://m.nintendojo.fr"
client_id = "MYCLIENTID"
client_secret = "MYCLIENTSECRET"
@@ -45,9 +54,15 @@ redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "MYTOKEN"
```
You can add other account if you like, after the `[mastodon]` moniker. Scootaloo would theorically support an unlimited number of accounts.
## Running
You can then run the application via `cron` for example. Here is the generic usage:
```
```sh
A Twitter to Mastodon bot
USAGE:
scootaloo [OPTIONS] [SUBCOMMAND]
@@ -57,21 +72,38 @@ FLAGS:
OPTIONS:
-c, --config <CONFIG_FILE> TOML config file for scootaloo (default /usr/local/etc/scootaloo.toml)
-l, --loglevel <LOGLEVEL> Log level.Valid values are: Off, Warn, Error, Info, Debug
SUBCOMMANDS:
help Prints this message or the help of the given subcommand(s)
init Command to init Scootaloo DB
migrate Command to migrate Scootaloo DB
register Command to register to a Mastodon Instance
```
# Quirks
Scootaloo does not respect the spam limits imposed by Mastodon: it will make a 429 error if too much Tweets are converted to Toots in a short amount of time (and it will not recover from it). By default, it gets the last 200 tweets from the user timeline (which is a lot!). It is recommended to put a Tweet number into the `last_tweet` file before copying an old account.
Scootaloo does not respect the spam limits imposed by Mastodon: it will make a 429 error if too much Tweets are converted to Toots in a short amount of time (and it will not recover from it). By default, it gets the last 200 tweets from the user timeline (which is a lot!). It is recommended to put a Tweet number into the DB file before copying an old account.
You can do that with a command like:
```
echo -n '8189881949849' > last_tweet
You can insert that Tweet number, by connecting to the DB you created:
```sh
sqlite3 /var/lib/scootaloo/scootaloo.sqlite
```
**This file should only contain the last tweet ID without any other char (no EOL or new line).**
And inserting the data:
Oh and everything is sync (and not async) so this does not run at a blazing speed…
```sql
INSERT INTO tweet_to_toot VALUES ("<twitter_screen_name>", 1383782580412030982, "<twitter_screen_name>");
```
The last value is supposed to be the Toot ID. It cannot be null, so you better initialize it with something unique, like the Twitter Screen Name for example.
# Migrating from Scootaloo ⩽ 0.6.1
The DB scheme has change between version 0.6.x and 0.7.x (this is due to the multi-account nature of Scootaloo from 0.7.x onward). You need to migrate your DB. You can do so by issuing the command:
```
scootaloo migrate
```
You can optionnally specify a screen name with the `--name` option. By default, itll take the first screen name in the config file.

47
src/config.rs Normal file
View File

@@ -0,0 +1,47 @@
use std::{collections::HashMap, fs::read_to_string};
use serde::Deserialize;
/// General configuration Struct
#[derive(Debug, Deserialize)]
pub struct Config {
pub twitter: TwitterConfig,
pub mastodon: HashMap<String, MastodonConfig>,
pub scootaloo: ScootalooConfig,
}
#[derive(Debug, Deserialize)]
pub struct TwitterConfig {
pub consumer_key: String,
pub consumer_secret: String,
pub access_key: String,
pub access_secret: String,
}
#[derive(Debug, Deserialize)]
pub struct MastodonConfig {
pub twitter_screen_name: String,
pub base: String,
pub client_id: String,
pub client_secret: String,
pub redirect: String,
pub token: String,
}
#[derive(Debug, Deserialize)]
pub struct ScootalooConfig {
pub db_path: String,
pub cache_path: String,
pub rate_limit: Option<usize>,
}
/// Parses the TOML file into a Config Struct
pub fn parse_toml(toml_file: &str) -> Config {
let toml_config = read_to_string(toml_file)
.unwrap_or_else(|e| panic!("Cannot open config file {}: {}", toml_file, e));
let config: Config = toml::from_str(&toml_config)
.unwrap_or_else(|e| panic!("Cannot parse TOML file {}: {}", toml_file, e));
config
}

41
src/error.rs Normal file
View File

@@ -0,0 +1,41 @@
use std::{
boxed::Box,
convert::From,
error::Error,
fmt::{Display, Formatter, Result},
};
use elefren::Error as elefrenError;
#[derive(Debug)]
pub struct ScootalooError {
details: String,
}
impl ScootalooError {
pub fn new(msg: &str) -> ScootalooError {
ScootalooError {
details: msg.to_string(),
}
}
}
impl Error for ScootalooError {}
impl Display for ScootalooError {
fn fmt(&self, f: &mut Formatter) -> Result {
write!(f, "{}", self.details)
}
}
impl From<Box<dyn Error>> for ScootalooError {
fn from(error: Box<dyn Error>) -> Self {
ScootalooError::new(&format!("Error in a subset crate: {}", error))
}
}
impl From<elefrenError> for ScootalooError {
fn from(error: elefrenError) -> Self {
ScootalooError::new(&format!("Error in elefren crate: {}", error))
}
}

View File

@@ -1,410 +1,154 @@
// std
use std::{
path::Path,
borrow::Cow,
collections::HashMap,
io::{stdin, copy},
fmt,
fs::{read_to_string, write, create_dir_all, File, remove_file},
error::Error,
};
mod error;
use error::ScootalooError;
//tokio
use tokio::runtime::Runtime;
use tokio_compat_02::FutureExt;
mod config;
pub use config::parse_toml;
use config::Config;
// toml
use serde::Deserialize;
mod mastodon;
pub use mastodon::register;
use mastodon::{build_basic_status, get_mastodon_token};
// egg-mode
use egg_mode::{
Token,
KeyPair,
entities::{UrlEntity, MediaEntity, MentionEntity, MediaType},
user::UserID,
tweet::{
Tweet,
user_timeline,
},
};
mod twitter;
use twitter::*;
// elefren
use elefren::{
prelude::*,
apps::App,
status_builder::StatusBuilder,
scopes::Scopes,
};
mod util;
use crate::util::generate_media_ids;
// reqwest
use reqwest::blocking::Client;
mod state;
pub use state::{init_db, migrate_db};
use state::{read_state, write_state, TweetToToot};
// htmlescape
use htmlescape::decode_html;
use elefren::{prelude::*, status_builder::StatusBuilder};
use log::info;
use rusqlite::Connection;
use std::sync::Arc;
use tokio::{spawn, sync::Mutex};
/**********
* Generic usage functions
***********/
/*
* Those functions are related to the Twitter side of things
*/
/// Read last tweet id from a file
fn read_state(s: &str) -> Option<u64> {
let state = read_to_string(s);
use futures::StreamExt;
if let Ok(s) = state {
return s.parse::<u64>().ok();
}
None
}
/// Write last treated tweet id to a file
fn write_state(f: &str, s: u64) -> Result<(), std::io::Error> {
write(f, format!("{}", s))
}
/// Get twitter oauth2 token
fn get_oauth2_token(config: &Config) -> Token {
let con_token = KeyPair::new(String::from(&config.twitter.consumer_key), String::from(&config.twitter.consumer_secret));
let access_token = KeyPair::new(String::from(&config.twitter.access_key), String::from(&config.twitter.access_secret));
Token::Access {
consumer: con_token,
access: access_token,
}
}
/// Get twitter user timeline
fn get_user_timeline(config: &Config, token: Token, lid: Option<u64>) -> Result<Vec<Tweet>, Box<dyn Error>> {
// fix the page size to 200 as it is the maximum Twitter authorizes
let rt = Runtime::new()?;
let (_timeline, feed) = rt.block_on(user_timeline(UserID::from(String::from(&config.twitter.username)), true, false, &token)
.with_page_size(200)
.older(lid)
.compat())?;
Ok(feed.to_vec())
}
/// decode urls from UrlEntities
fn decode_urls(urls: &Vec<UrlEntity>) -> HashMap<String, String> {
let mut decoded_urls = HashMap::new();
for url in urls {
if url.expanded_url.is_some() {
// unwrap is safe here as we just verified that there is something inside expanded_url
decoded_urls.insert(String::from(&url.url), String::from(url.expanded_url.as_deref().unwrap()));
}
}
decoded_urls
}
fn twitter_mentions(ums: &Vec<MentionEntity>) -> HashMap<String, String> {
let mut decoded_mentions = HashMap::new();
for um in ums {
decoded_mentions.insert(format!("@{}", um.screen_name), format!("@{}@twitter.com", um.screen_name));
}
decoded_mentions
}
/// Retrieve a single media from a tweet and store it in a temporary file
fn get_tweet_media(m: &MediaEntity, t: &str) -> Result<String, Box<dyn Error>> {
match m.media_type {
MediaType::Photo => {
return cache_media(&m.media_url_https, t);
},
_ => {
match &m.video_info {
Some(v) => {
for variant in &v.variants {
if variant.content_type == "video/mp4" {
return cache_media(&variant.url, t);
}
}
return Err(Box::new(ScootalooError::new(format!("Media Type for {} is video but no mp4 file URL is available", &m.url).as_str())));
},
None => {
return Err(Box::new(ScootalooError::new(format!("Media Type for {} is video but does not contain any video_info", &m.url).as_str())));
},
}
},
};
}
/*
* Those functions are related to the Mastodon side of things
*/
/// Get Mastodon Data
fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
let data = Data {
base: Cow::from(String::from(&masto.base)),
client_id: Cow::from(String::from(&masto.client_id)),
client_secret: Cow::from(String::from(&masto.client_secret)),
redirect: Cow::from(String::from(&masto.redirect)),
token: Cow::from(String::from(&masto.token)),
};
Mastodon::from(data)
}
/// build toot text from tweet
fn build_basic_status(tweet: &Tweet) -> Result<String, Box<dyn Error>> {
let mut toot = String::from(&tweet.text);
let decoded_urls = decode_urls(&tweet.entities.urls);
for decoded_url in decoded_urls {
toot = toot.replace(&decoded_url.0, &decoded_url.1);
}
let decoded_mentions = twitter_mentions(&tweet.entities.user_mentions);
for decoded_mention in decoded_mentions {
toot = toot.replace(&decoded_mention.0, &decoded_mention.1);
}
if let Ok(t) = decode_html(&toot) {
toot = t;
}
Ok(toot)
}
/*
* Generic private functions
*/
fn cache_media(u: &str, t: &str) -> Result<String, Box<dyn Error>> {
// create dir
if !Path::new(t).is_dir() {
create_dir_all(t)?;
}
// get file
let client = Client::new();
let mut response = client.get(u).send()?;
// create local file
let dest_filename = match response.url()
.path_segments()
.and_then(|segments| segments.last()) {
Some(r) => r,
None => {
return Err(Box::new(ScootalooError::new(format!("Cannot determine the destination filename for {}", u).as_str())));
},
};
let dest_filepath = format!("{}/{}", t, dest_filename);
let mut dest_file = File::create(&dest_filepath)?;
copy(&mut response, &mut dest_file)?;
Ok(dest_filepath)
}
/**********
* local error handler
**********/
#[derive(Debug)]
struct ScootalooError {
details: String,
}
impl ScootalooError {
fn new(msg: &str) -> ScootalooError {
ScootalooError {
details: String::from(msg),
}
}
}
impl fmt::Display for ScootalooError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.details)
}
}
impl std::error::Error for ScootalooError {
fn description(&self) -> &str {
&self.details
}
}
/**********
* Config structure
***********/
/// General configuration Struct
#[derive(Debug, Deserialize)]
pub struct Config {
twitter: TwitterConfig,
mastodon: MastodonConfig,
scootaloo: ScootalooConfig,
}
#[derive(Debug, Deserialize)]
struct TwitterConfig {
username: String,
consumer_key: String,
consumer_secret: String,
access_key: String,
access_secret: String,
}
#[derive(Debug, Deserialize)]
struct MastodonConfig {
base: String,
client_id: String,
client_secret: String,
redirect: String,
token: String,
}
#[derive(Debug, Deserialize)]
struct ScootalooConfig {
last_tweet_path: String,
cache_path: String,
}
/*********
* Main functions
*********/
/// Parses the TOML file into a Config Struct
pub fn parse_toml(toml_file: &str) -> Config {
let toml_config = read_to_string(toml_file).unwrap_or_else(|e|
panic!("Cannot open config file {}: {}", toml_file, e)
);
let config: Config = toml::from_str(&toml_config).unwrap_or_else(|e|
panic!("Cannot parse TOML file {}: {}", toml_file, e)
);
config
}
/// Generic register function
/// As this function is supposed to be run only once, it will panic for every error it encounters
/// Most of this function is a direct copy/paste of the official `mammut` crate
pub fn register(host: &str) {
let mut builder = App::builder();
builder.client_name(Cow::from(String::from(env!("CARGO_PKG_NAME"))))
.redirect_uris(Cow::from(String::from("urn:ietf:wg:oauth:2.0:oob")))
.scopes(Scopes::write_all())
.website(Cow::from(String::from("https://framagit.org/veretcle/scootaloo")));
let app = builder.build().expect("Cannot build the app");
let registration = Registration::new(host).register(app).expect("Cannot build registration object");
let url = registration.authorize_url().expect("Cannot generate registration URI!");
println!("Click this link to authorize on Mastodon: {}", url);
println!("Paste the returned authorization code: ");
let mut input = String::new();
stdin().read_line(&mut input).expect("Unable to read back registration code!");
let code = input.trim();
let mastodon = registration.complete(code).expect("Unable to create access token!");
let toml = toml::to_string(&*mastodon).unwrap();
println!("Please insert the following block at the end of your configuration file:\n[mastodon]\n{}", toml);
}
const DEFAULT_RATE_LIMIT: usize = 4;
/// This is where the magic happens
pub fn run(config: Config) {
// retrieve the last tweet ID for the username
let last_tweet_id = read_state(&config.scootaloo.last_tweet_path);
#[tokio::main]
pub async fn run(config: Config) {
// open the SQLite connection
let conn = Arc::new(Mutex::new(
Connection::open(&config.scootaloo.db_path).unwrap_or_else(|e| {
panic!(
"Something went wrong when opening the DB {}: {}",
&config.scootaloo.db_path, e
)
}),
));
// get OAuth2 token
let token = get_oauth2_token(&config);
let mut stream = futures::stream::iter(config.mastodon.into_values())
.map(|mastodon_config| {
// create temporary value for each task
let scootaloo_cache_path = config.scootaloo.cache_path.clone();
let token = get_oauth2_token(&config.twitter);
let task_conn = conn.clone();
// get Mastodon instance
let mastodon = get_mastodon_token(&config.mastodon);
spawn(async move {
info!("Starting treating {}", &mastodon_config.twitter_screen_name);
// get user timeline feed (Vec<tweet>)
let mut feed = get_user_timeline(&config, token, last_tweet_id).unwrap_or_else(|e|
panic!("Something went wrong when trying to retrieve {}s timeline: {}", &config.twitter.username, e)
);
// retrieve the last tweet ID for the username
let lconn = task_conn.lock().await;
let last_tweet_id = read_state(&lconn, &mastodon_config.twitter_screen_name, None)?
.map(|r| r.tweet_id);
drop(lconn);
// empty feed -> exiting
if feed.is_empty() {
println!("Nothing to retrieve since last time, exiting…");
return;
}
// get user timeline feed (Vec<tweet>)
let mut feed =
get_user_timeline(&mastodon_config.twitter_screen_name, &token, last_tweet_id)
.await?;
// order needs to be chronological
feed.reverse();
for tweet in &feed {
// determine if the tweet is part of a thread (response to self) or a standard response
if let Some(r) = &tweet.in_reply_to_screen_name {
if &r.to_lowercase() != &config.twitter.username.to_lowercase() {
// we are responding not threading
continue;
// empty feed -> exiting
if feed.is_empty() {
info!("Nothing to retrieve since last time, exiting…");
return Ok(());
}
};
// build basic status by just yielding text and dereferencing contained urls
let mut status_text = match build_basic_status(tweet) {
Ok(t) => t,
Err(e) => {
println!("Could not create status from tweet {}: {}", tweet.id ,e);
continue;
},
};
// get Mastodon instance
let mastodon = get_mastodon_token(&mastodon_config);
let mut status_medias: Vec<String> = vec![];
// order needs to be chronological
feed.reverse();
// reupload the attachments if any
if let Some(m) = &tweet.extended_entities {
for media in &m.media {
let local_tweet_media_path = match get_tweet_media(&media, &config.scootaloo.cache_path) {
Ok(m) => m,
Err(e) => {
println!("Cannot get tweet media for {}: {}", &media.url, e);
continue;
},
};
for tweet in &feed {
info!("Treating Tweet {} inside feed", tweet.id);
// initiate the toot_reply_id var
let mut toot_reply_id: Option<String> = None;
// determine if the tweet is part of a thread (response to self) or a standard response
if let Some(r) = &tweet.in_reply_to_screen_name {
if r.to_lowercase() != mastodon_config.twitter_screen_name.to_lowercase() {
// we are responding not threading
info!("Tweet is a direct response, skipping");
continue;
}
info!("Tweet is a thread");
// get the corresponding toot id
let lconn = task_conn.lock().await;
toot_reply_id = read_state(
&lconn,
&mastodon_config.twitter_screen_name,
tweet.in_reply_to_status_id,
)
.unwrap_or(None)
.map(|s| s.toot_id);
drop(lconn);
};
let mastodon_media_ids = match mastodon.media(Cow::from(String::from(&local_tweet_media_path))) {
Ok(m) => {
remove_file(&local_tweet_media_path).unwrap_or_else(|e|
println!("Attachment for {} has been upload, but Im unable to remove the existing file: {}", &local_tweet_media_path, e)
);
m.id
},
Err(e) => {
println!("Cannot attach media {} to Mastodon Instance: {}", &local_tweet_media_path, e);
continue;
// build basic status by just yielding text and dereferencing contained urls
let mut status_text = build_basic_status(tweet);
// building associative media list
let (media_url, status_medias) =
generate_media_ids(tweet, &scootaloo_cache_path, &mastodon).await;
status_text = status_text.replace(&media_url, "");
info!("Building corresponding Mastodon status");
let mut status_builder = StatusBuilder::new();
status_builder.status(&status_text).media_ids(status_medias);
if let Some(i) = toot_reply_id {
status_builder.in_reply_to(&i);
}
};
status_medias.push(mastodon_media_ids);
// can be activated for test purposes
// status_builder.visibility(elefren::status_builder::Visibility::Private);
// last step, removing the reference to the media from with the toots text
status_text = status_text.replace(&media.url, "");
}
let status = status_builder.build()?;
let published_status = mastodon.new_status(status)?;
// this will return if it cannot publish the status preventing the last_tweet from
// being written into db
let ttt_towrite = TweetToToot {
twitter_screen_name: mastodon_config.twitter_screen_name.clone(),
tweet_id: tweet.id,
toot_id: published_status.id,
};
// write the current state (tweet ID and toot ID) to avoid copying it another time
let lconn = task_conn.lock().await;
write_state(&lconn, ttt_towrite)?;
drop(lconn);
}
Ok::<(), ScootalooError>(())
})
})
.buffer_unordered(config.scootaloo.rate_limit.unwrap_or(DEFAULT_RATE_LIMIT));
// launch and wait for every handle
while let Some(result) = stream.next().await {
match result {
Ok(Err(e)) => eprintln!("Error within thread: {}", e),
Err(e) => eprintln!("Error with thread: {}", e),
_ => (),
}
let status = StatusBuilder::new()
.status(&status_text)
.media_ids(status_medias)
.build()
.expect(format!("Cannot build status with text {}", &status_text).as_str());
// publish status
mastodon.new_status(status).unwrap();
// this will panic if it cannot publish the status, which is a good thing, it allows the
// last_tweet gathered not to be written
// write the current state (tweet ID) to avoid copying it another time
write_state(&config.scootaloo.last_tweet_path, tweet.id).unwrap_or_else(|e|
panic!("Cant write the last tweet retrieved: {}", e)
);
}
}

View File

@@ -1,38 +1,142 @@
// self
use clap::{Arg, Command};
use log::LevelFilter;
use scootaloo::*;
use simple_logger::SimpleLogger;
use std::str::FromStr;
// clap
use clap::{App, Arg, SubCommand};
const DEFAULT_CONFIG_PATH: &str = "/usr/local/etc/scootaloo.toml";
fn main() {
let matches = App::new(env!("CARGO_PKG_NAME"))
.version(env!("CARGO_PKG_VERSION"))
.about("A Twitter to Mastodon bot")
.arg(Arg::with_name("config")
.short("c")
.long("config")
.value_name("CONFIG_FILE")
.help("TOML config file for scootaloo (default /usr/local/etc/scootaloo.toml)")
.takes_value(true)
.display_order(1))
.subcommand(SubCommand::with_name("register")
.version(env!("CARGO_PKG_VERSION"))
.about("Command to register to a Mastodon Instance")
.arg(Arg::with_name("host")
.short("H")
.long("host")
.value_name("HOST")
.help("Base URL of the Mastodon instance to register to (no default)")
.takes_value(true)
.required(true)
.display_order(1)))
.get_matches();
if let Some(matches) = matches.subcommand_matches("register") {
register(matches.value_of("host").unwrap());
return;
let matches = Command::new(env!("CARGO_PKG_NAME"))
.version(env!("CARGO_PKG_VERSION"))
.about("A Twitter to Mastodon bot")
.arg(
Arg::new("config")
.short('c')
.long("config")
.value_name("CONFIG_FILE")
.help(&format!(
"TOML config file for scootaloo (default {})",
DEFAULT_CONFIG_PATH
))
.num_args(1)
.default_value(DEFAULT_CONFIG_PATH)
.display_order(1),
)
.arg(
Arg::new("log_level")
.short('l')
.long("loglevel")
.value_name("LOGLEVEL")
.help("Log level.Valid values are: Off, Warn, Error, Info, Debug")
.num_args(1)
.value_parser(["Off", "Warn", "Error", "Info", "Debug"])
.display_order(2),
)
.subcommand(
Command::new("register")
.version(env!("CARGO_PKG_VERSION"))
.about("Command to register to a Mastodon Instance")
.arg(
Arg::new("host")
.short('H')
.long("host")
.value_name("HOST")
.help("Base URL of the Mastodon instance to register to (no default)")
.num_args(1)
.required(true)
.display_order(1)
)
.arg(
Arg::new("name")
.short('n')
.long("name")
.help("Twitter Screen Name (like https://twitter.com/screen_name, no default)")
.num_args(1)
.required(true)
.display_order(2)
),
)
.subcommand(
Command::new("init")
.version(env!("CARGO_PKG_VERSION"))
.about("Command to init Scootaloo DB")
.arg(
Arg::new("config")
.short('c')
.long("config")
.value_name("CONFIG_FILE")
.help(&format!(
"TOML config file for scootaloo (default {})",
DEFAULT_CONFIG_PATH
))
.default_value(DEFAULT_CONFIG_PATH)
.num_args(1)
.display_order(1),
),
)
.subcommand(
Command::new("migrate")
.version(env!("CARGO_PKG_VERSION"))
.about("Command to migrate Scootaloo DB")
.arg(
Arg::new("config")
.short('c')
.long("config")
.value_name("CONFIG_FILE")
.help(&format!("TOML config file for scootaloo (default {})", DEFAULT_CONFIG_PATH))
.default_value(DEFAULT_CONFIG_PATH)
.num_args(1)
.display_order(1),
)
.arg(
Arg::new("name")
.short('n')
.long("name")
.help("Twitter Screen Name (like https://twitter.com/screen_name, no default)")
.num_args(1)
.display_order(2)
)
)
.get_matches();
match matches.subcommand() {
Some(("register", sub_m)) => {
register(
sub_m.get_one::<String>("host").unwrap(),
sub_m.get_one::<String>("name").unwrap(),
);
return;
}
Some(("init", sub_m)) => {
let config = parse_toml(sub_m.get_one::<String>("config").unwrap());
init_db(&config.scootaloo.db_path).unwrap();
return;
}
Some(("migrate", sub_m)) => {
let config = parse_toml(sub_m.get_one::<String>("config").unwrap());
let config_twitter_screen_name =
&config.mastodon.values().next().unwrap().twitter_screen_name;
migrate_db(
&config.scootaloo.db_path,
sub_m
.get_one::<String>("name")
.unwrap_or(config_twitter_screen_name),
)
.unwrap();
return;
}
_ => (),
}
let config = parse_toml(matches.value_of("config").unwrap_or("/usr/local/etc/scootaloo.toml"));
if let Some(level) = matches.get_one::<String>("log_level") {
SimpleLogger::new()
.with_level(LevelFilter::from_str(level).unwrap())
.init()
.unwrap();
}
let config = parse_toml(matches.get_one::<String>("config").unwrap());
run(config);
}

228
src/mastodon.rs Normal file
View File

@@ -0,0 +1,228 @@
use crate::config::MastodonConfig;
use egg_mode::{
entities::{MentionEntity, UrlEntity},
tweet::Tweet,
};
use elefren::{apps::App, prelude::*, scopes::Scopes};
use html_escape::decode_html_entities;
use std::{borrow::Cow, collections::HashMap, io::stdin};
/// Decodes the Twitter mention to something that will make sense once Twitter has joined the
/// Fediverse
fn twitter_mentions(ums: &[MentionEntity]) -> HashMap<String, String> {
ums.iter()
.map(|s| {
(
format!("@{}", s.screen_name),
format!("@{}@twitter.com", s.screen_name),
)
})
.collect()
}
/// Decodes urls from UrlEntities
fn decode_urls(urls: &[UrlEntity]) -> HashMap<String, String> {
urls.iter()
.filter(|s| s.expanded_url.is_some())
.map(|s| {
(
s.url.to_owned(),
s.expanded_url.as_deref().unwrap().to_owned(),
)
})
.collect()
}
/// Gets Mastodon Data
pub fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
let data = Data {
base: Cow::from(masto.base.to_owned()),
client_id: Cow::from(masto.client_id.to_owned()),
client_secret: Cow::from(masto.client_secret.to_owned()),
redirect: Cow::from(masto.redirect.to_owned()),
token: Cow::from(masto.token.to_owned()),
};
Mastodon::from(data)
}
/// Builds toot text from tweet
pub fn build_basic_status(tweet: &Tweet) -> String {
let mut toot = tweet.text.to_owned();
for decoded_url in decode_urls(&tweet.entities.urls) {
toot = toot.replace(&decoded_url.0, &decoded_url.1);
}
for decoded_mention in twitter_mentions(&tweet.entities.user_mentions) {
toot = toot.replace(&decoded_mention.0, &decoded_mention.1);
}
decode_html_entities(&toot).to_string()
}
/// Generic register function
/// As this function is supposed to be run only once, it will panic for every error it encounters
/// Most of this function is a direct copy/paste of the official `elefren` crate
pub fn register(host: &str, screen_name: &str) {
let mut builder = App::builder();
builder
.client_name(Cow::from(env!("CARGO_PKG_NAME").to_string()))
.redirect_uris(Cow::from("urn:ietf:wg:oauth:2.0:oob".to_string()))
.scopes(Scopes::write_all())
.website(Cow::from(
"https://framagit.org/veretcle/scootaloo".to_string(),
));
let app = builder.build().expect("Cannot build the app");
let registration = Registration::new(host)
.register(app)
.expect("Cannot build registration object");
let url = registration
.authorize_url()
.expect("Cannot generate registration URI!");
println!("Click this link to authorize on Mastodon: {}", url);
println!("Paste the returned authorization code: ");
let mut input = String::new();
stdin()
.read_line(&mut input)
.expect("Unable to read back registration code!");
let code = input.trim();
let mastodon = registration
.complete(code)
.expect("Unable to create access token!");
let toml = toml::to_string(&*mastodon).unwrap();
println!(
"Please insert the following block at the end of your configuration file:
[mastodon.{}]
twitter_screen_name = \"{}\"
{}",
screen_name.to_lowercase(),
screen_name,
toml
);
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::prelude::*;
use egg_mode::tweet::TweetEntities;
#[test]
fn test_twitter_mentions() {
let mention_entity = MentionEntity {
id: 12345,
range: (1, 3),
name: "Ta Mere l0l".to_string(),
screen_name: "tamerelol".to_string(),
};
let twitter_ums = vec![mention_entity];
let mut expected_mentions = HashMap::new();
expected_mentions.insert(
"@tamerelol".to_string(),
"@tamerelol@twitter.com".to_string(),
);
let decoded_mentions = twitter_mentions(&twitter_ums);
assert_eq!(expected_mentions, decoded_mentions);
}
#[test]
fn test_decode_urls() {
let url_entity1 = UrlEntity {
display_url: "tamerelol".to_string(),
expanded_url: Some("https://www.nintendojo.fr/dojobar".to_string()),
range: (1, 3),
url: "https://t.me/tamerelol".to_string(),
};
let url_entity2 = UrlEntity {
display_url: "tamerelol".to_string(),
expanded_url: None,
range: (1, 3),
url: "https://t.me/tamerelol".to_string(),
};
let twitter_urls = vec![url_entity1, url_entity2];
let mut expected_urls = HashMap::new();
expected_urls.insert(
"https://t.me/tamerelol".to_string(),
"https://www.nintendojo.fr/dojobar".to_string(),
);
let decoded_urls = decode_urls(&twitter_urls);
assert_eq!(expected_urls, decoded_urls);
}
#[test]
fn test_build_basic_status() {
let t = Tweet {
coordinates: None,
created_at: Utc::now(),
current_user_retweet: None,
display_text_range: None,
entities: TweetEntities {
hashtags: vec![],
symbols: vec![],
urls: vec![
UrlEntity {
display_url: "youtube.com/watch?v=w5TrSa…".to_string(),
expanded_url: Some("https://www.youtube.com/watch?v=w5TrSaoYmZ8".to_string()),
range: (93, 116),
url: "https://t.co/zXw0FfX2Nt".to_string(),
}
],
user_mentions: vec![
MentionEntity {
id: 491500016,
range: (80, 95),
name: "Nintendo France".to_string(),
screen_name: "NintendoFrance".to_string(),
}
],
media: None,
},
extended_entities: None,
favorite_count: 0,
favorited: None,
filter_level: None,
id: 1491541246984306693,
in_reply_to_user_id: None,
in_reply_to_screen_name: None,
in_reply_to_status_id: None,
lang: None,
place: None,
possibly_sensitive: None,
quoted_status: None,
quoted_status_id: None,
retweet_count: 0,
retweeted: None,
retweeted_status: None,
source: None,
text: "Mother 1 &amp; 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance https://t.co/zXw0FfX2Nt".to_string(),
truncated: false,
user: None,
withheld_copyright: false,
withheld_in_countries: None,
withheld_scope: None,
};
let t_out = build_basic_status(&t);
assert_eq!(&t_out, "Mother 1 & 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance@twitter.com https://www.youtube.com/watch?v=w5TrSaoYmZ8");
}
}

304
src/state.rs Normal file
View File

@@ -0,0 +1,304 @@
use std::error::Error;
use log::debug;
use rusqlite::{params, Connection, OptionalExtension};
/// Struct for each query line
#[derive(Debug)]
pub struct TweetToToot {
pub twitter_screen_name: String,
pub tweet_id: u64,
pub toot_id: String,
}
/// if None is passed, read the last tweet from DB
/// if a tweet_id is passed, read this particular tweet from DB
pub fn read_state(
conn: &Connection,
n: &str,
s: Option<u64>,
) -> Result<Option<TweetToToot>, Box<dyn Error>> {
debug!("Reading tweet_id {:?}", s);
let query: String = match s {
Some(i) => format!("SELECT * FROM tweet_to_toot WHERE tweet_id = {} and twitter_screen_name = \"{}\"", i, n),
None => format!("SELECT * FROM tweet_to_toot WHERE twitter_screen_name = \"{}\" ORDER BY tweet_id DESC LIMIT 1", n),
};
let mut stmt = conn.prepare(&query)?;
let t = stmt
.query_row([], |row| {
Ok(TweetToToot {
twitter_screen_name: row.get("twitter_screen_name")?,
tweet_id: row.get("tweet_id")?,
toot_id: row.get("toot_id")?,
})
})
.optional()?;
Ok(t)
}
/// Writes last treated tweet id and toot id to the db
pub fn write_state(conn: &Connection, t: TweetToToot) -> Result<(), Box<dyn Error>> {
debug!("Write struct {:?}", t);
conn.execute(
"INSERT INTO tweet_to_toot (twitter_screen_name, tweet_id, toot_id) VALUES (?1, ?2, ?3)",
params![t.twitter_screen_name, t.tweet_id, t.toot_id],
)?;
Ok(())
}
/// Initiates the DB from path
pub fn init_db(d: &str) -> Result<(), Box<dyn Error>> {
debug!("Initializing DB for Scootaloo");
let conn = Connection::open(d)?;
conn.execute(
"CREATE TABLE IF NOT EXISTS tweet_to_toot (
twitter_screen_name TEXT NOT NULL,
tweet_id INTEGER PRIMARY KEY,
toot_id TEXT UNIQUE
)",
[],
)?;
Ok(())
}
/// Migrate DB from 0.6.x to 0.7.x
pub fn migrate_db(d: &str, s: &str) -> Result<(), Box<dyn Error>> {
debug!("Migrating DB for Scootaloo");
let conn = Connection::open(d)?;
let res = conn.execute(
&format!(
"ALTER TABLE tweet_to_toot
ADD COLUMN twitter_screen_name TEXT NOT NULL
DEFAULT \"{}\"",
s
),
[],
);
match res {
Err(e) => match e.to_string().as_str() {
"duplicate column name: twitter_screen_name" => Ok(()),
_ => Err(e.into()),
},
_ => Ok(()),
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::{fs::remove_file, path::Path};
#[test]
fn test_init_db() {
let d = "/tmp/test_init_db.sqlite";
init_db(d).unwrap();
// check that file exist
assert!(Path::new(d).exists());
// open said file
let conn = Connection::open(d).unwrap();
conn.execute("SELECT * from tweet_to_toot;", []).unwrap();
remove_file(d).unwrap();
}
#[test]
fn test_init_init_db() {
// init_db fn should be idempotent so lets test that
let d = "/tmp/test_init_init_db.sqlite";
init_db(d).unwrap();
let conn = Connection::open(d).unwrap();
conn.execute(
"INSERT INTO tweet_to_toot (twitter_screen_name, tweet_id, toot_id)
VALUES
('tamerelol', 100, 'A');",
[],
)
.unwrap();
init_db(d).unwrap();
remove_file(d).unwrap();
}
#[test]
fn test_write_state() {
let d = "/tmp/test_write_state.sqlite";
init_db(d).unwrap();
let conn = Connection::open(d).unwrap();
let t_in = TweetToToot {
twitter_screen_name: "tamerelol".to_string(),
tweet_id: 123456789,
toot_id: "987654321".to_string(),
};
write_state(&conn, t_in).unwrap();
let mut stmt = conn.prepare("SELECT * FROM tweet_to_toot;").unwrap();
let t_out = stmt
.query_row([], |row| {
Ok(TweetToToot {
twitter_screen_name: row.get("twitter_screen_name").unwrap(),
tweet_id: row.get("tweet_id").unwrap(),
toot_id: row.get("toot_id").unwrap(),
})
})
.unwrap();
assert_eq!(&t_out.twitter_screen_name, "tamerelol");
assert_eq!(t_out.tweet_id, 123456789);
assert_eq!(&t_out.toot_id, "987654321");
remove_file(d).unwrap();
}
#[test]
fn test_none_to_tweet_id_read_state() {
let d = "/tmp/test_none_to_tweet_id_read_state.sqlite";
init_db(d).unwrap();
let conn = Connection::open(d).unwrap();
conn.execute(
"INSERT INTO tweet_to_toot (twitter_screen_name, tweet_id, toot_id)
VALUES
('tamerelol', 101, 'A'),
('tamerelol', 102, 'B');",
[],
)
.unwrap();
let t_out = read_state(&conn, "tamerelol", None).unwrap().unwrap();
remove_file(d).unwrap();
assert_eq!(t_out.tweet_id, 102);
assert_eq!(t_out.toot_id, "B");
}
#[test]
fn test_none_to_none_read_state() {
let d = "/tmp/test_none_to_none_read_state.sqlite";
init_db(d).unwrap();
let conn = Connection::open(d).unwrap();
let t_out = read_state(&conn, "tamerelol", None).unwrap();
remove_file(d).unwrap();
assert!(t_out.is_none());
}
#[test]
fn test_tweet_id_to_none_read_state() {
let d = "/tmp/test_tweet_id_to_none_read_state.sqlite";
init_db(d).unwrap();
let conn = Connection::open(d).unwrap();
conn.execute(
"INSERT INTO tweet_to_toot (twitter_screen_name, tweet_id, toot_id)
VALUES
('tamerelol', 100, 'A');",
[],
)
.unwrap();
let t_out = read_state(&conn, "tamerelol", Some(101)).unwrap();
remove_file(d).unwrap();
assert!(t_out.is_none());
}
#[test]
fn test_tweet_id_to_tweet_id_read_state() {
let d = "/tmp/test_tweet_id_to_tweet_id_read_state.sqlite";
init_db(d).unwrap();
let conn = Connection::open(d).unwrap();
conn.execute(
"INSERT INTO tweet_to_toot (twitter_screen_name, tweet_id, toot_id)
VALUES
('tamerelol', 100, 'A');",
[],
)
.unwrap();
let t_out = read_state(&conn, "tamerelol", Some(100)).unwrap().unwrap();
remove_file(d).unwrap();
assert_eq!(t_out.tweet_id, 100);
assert_eq!(t_out.toot_id, "A");
}
#[test]
fn test_migrate_db_add_column() {
let d = "/tmp/test_migrate_db_add_column.sqlite";
let conn = Connection::open(d).unwrap();
conn.execute(
"CREATE TABLE IF NOT EXISTS tweet_to_toot (
tweet_id INTEGER PRIMARY KEY,
toot_id TEXT UNIQUE
)",
[],
)
.unwrap();
migrate_db(d, "tamerelol").unwrap();
let mut stmt = conn.prepare("PRAGMA table_info(tweet_to_toot);").unwrap();
let mut t = stmt.query([]).unwrap();
while let Some(row) = t.next().unwrap() {
if row.get::<usize, u8>(0).unwrap() == 2 {
assert_eq!(
row.get::<usize, String>(1).unwrap(),
"twitter_screen_name".to_string()
);
}
}
remove_file(d).unwrap();
}
#[test]
fn test_migrate_db_no_add_column() {
let d = "/tmp/test_migrate_db_no_add_column.sqlite";
init_db(d).unwrap();
migrate_db(d, "tamerelol").unwrap();
remove_file(d).unwrap();
}
}

181
src/twitter.rs Normal file
View File

@@ -0,0 +1,181 @@
use crate::config::TwitterConfig;
use crate::util::cache_media;
use crate::ScootalooError;
use egg_mode::{
entities::{MediaEntity, MediaType},
tweet::{user_timeline, Tweet},
user::UserID,
KeyPair, Token,
};
use std::error::Error;
/// Gets Twitter oauth2 token
pub fn get_oauth2_token(config: &TwitterConfig) -> Token {
let con_token = KeyPair::new(
config.consumer_key.to_owned(),
config.consumer_secret.to_owned(),
);
let access_token = KeyPair::new(
config.access_key.to_owned(),
config.access_secret.to_owned(),
);
Token::Access {
consumer: con_token,
access: access_token,
}
}
/// Gets Twitter user timeline
pub async fn get_user_timeline(
screen_name: &str,
token: &Token,
lid: Option<u64>,
) -> Result<Vec<Tweet>, Box<dyn Error>> {
// fix the page size to 200 as it is the maximum Twitter authorizes
let (_, feed) = user_timeline(UserID::from(screen_name.to_owned()), true, false, token)
.with_page_size(200)
.older(lid)
.await?;
Ok(feed.to_vec())
}
/// Retrieves a single media from a tweet and store it in a temporary file
pub async fn get_tweet_media(m: &MediaEntity, t: &str) -> Result<String, Box<dyn Error>> {
match m.media_type {
MediaType::Photo => cache_media(&m.media_url_https, t).await,
_ => match &m.video_info {
Some(v) => match &v.variants.iter().find(|&x| x.content_type == "video/mp4") {
Some(u) => cache_media(&u.url, t).await,
None => Err(ScootalooError::new(&format!(
"Media Type for {} is video but no mp4 file URL is available",
&m.url
))
.into()),
},
None => Err(ScootalooError::new(&format!(
"Media Type for {} is video but does not contain any video_info",
&m.url
))
.into()),
},
}
}
#[cfg(test)]
mod tests {
use super::*;
use egg_mode::entities::{
MediaSize, MediaSizes,
MediaType::{Gif, Photo},
ResizeMode::Crop,
ResizeMode::Fit,
VideoInfo, VideoVariant,
};
use std::fs::remove_dir_all;
const TMP_DIR: &'static str = "/tmp/scootaloo_get_tweet_media_test";
#[tokio::test]
async fn test_get_tweet_media() {
let m_photo = MediaEntity {
display_url: "pic.twitter.com/sHrwmP69Yv".to_string(),
expanded_url: "https://twitter.com/NintendojoFR/status/1555473821121056771/photo/1"
.to_string(),
id: 1555473771280080896,
range: (91, 114),
media_url: "http://pbs.twimg.com/media/FZYnJ1qWIAAReHt.jpg".to_string(),
media_url_https: "https://pbs.twimg.com/media/FZYnJ1qWIAAReHt.jpg"
.to_string(),
sizes: MediaSizes {
thumb: MediaSize {
w: 150,
h: 150,
resize: Crop
},
small: MediaSize {
w: 680,
h: 510,
resize: Fit
},
medium: MediaSize {
w: 1200,
h: 900,
resize: Fit
},
large: MediaSize {
w: 1280,
h: 960,
resize: Fit
}
},
source_status_id: None,
media_type: Photo,
url: "https://t.co/sHrwmP69Yv".to_string(),
video_info: None,
ext_alt_text: Some("Le menu «\u{a0}Classes » du jeu vidéo Xenoblade Chronicles 3 (Switch). Laffinité du personnage pour la classe est notée par quatre lettres : C, A, C, A (caca)."
.to_string())
};
let m_video = MediaEntity {
display_url: "pic.twitter.com/xDln0RrkjU".to_string(),
expanded_url: "https://twitter.com/NintendojoFR/status/1551822196833673218/photo/1"
.to_string(),
id: 1551822189711790081,
range: (275, 298),
media_url: "http://pbs.twimg.com/tweet_video_thumb/FYkuD0RXEAE-iDx.jpg".to_string(),
media_url_https: "https://pbs.twimg.com/tweet_video_thumb/FYkuD0RXEAE-iDx.jpg"
.to_string(),
sizes: MediaSizes {
thumb: MediaSize {
w: 150,
h: 150,
resize: Crop,
},
small: MediaSize {
w: 320,
h: 240,
resize: Fit,
},
medium: MediaSize {
w: 320,
h: 240,
resize: Fit,
},
large: MediaSize {
w: 320,
h: 240,
resize: Fit,
},
},
source_status_id: None,
media_type: Gif,
url: "https://t.co/xDln0RrkjU".to_string(),
video_info: Some(VideoInfo {
aspect_ratio: (4, 3),
duration_millis: None,
variants: vec![VideoVariant {
bitrate: Some(0),
content_type: "video/mp4".parse::<mime::Mime>().unwrap(),
url: "https://video.twimg.com/tweet_video/FYkuD0RXEAE-iDx.mp4".to_string(),
}],
}),
ext_alt_text: Some("Scared Nintendo GIF".to_string()),
};
let tweet_media_photo = get_tweet_media(&m_photo, TMP_DIR).await.unwrap();
let tweet_media_video = get_tweet_media(&m_video, TMP_DIR).await.unwrap();
assert_eq!(
tweet_media_photo,
format!("{}/FZYnJ1qWIAAReHt.jpg", TMP_DIR)
);
assert_eq!(
tweet_media_video,
format!("{}/FYkuD0RXEAE-iDx.mp4", TMP_DIR)
);
remove_dir_all(TMP_DIR).unwrap();
}
}

136
src/util.rs Normal file
View File

@@ -0,0 +1,136 @@
use crate::{twitter::get_tweet_media, ScootalooError};
use std::{borrow::Cow, error::Error};
use egg_mode::tweet::Tweet;
use elefren::prelude::*;
use log::{error, info, warn};
use reqwest::Url;
use tokio::{
fs::{create_dir_all, remove_file, File},
io::copy,
};
use futures::{stream, stream::StreamExt};
/// Generate associative table between media ids and tweet extended entities
pub async fn generate_media_ids(
tweet: &Tweet,
cache_path: &str,
mastodon: &Mastodon,
) -> (String, Vec<String>) {
let mut media_url = "".to_string();
let mut media_ids: Vec<String> = vec![];
if let Some(m) = &tweet.extended_entities {
info!("{} medias in tweet", m.media.len());
let medias = m.media.clone();
let mut stream = stream::iter(medias)
.map(|media| {
// attribute media url
media_url = media.url.clone();
// clone everything we need
let cache_path = String::from(cache_path);
let mastodon = mastodon.clone();
tokio::task::spawn(async move {
info!("Start treating {}", media.media_url_https);
// get the tweet embedded media
let local_tweet_media_path = get_tweet_media(&media, &cache_path).await?;
// upload media to Mastodon
let mastodon_media =
mastodon.media(Cow::from(local_tweet_media_path.to_owned()))?;
// at this point, we can safely erase the original file
// it doesnt matter if we cant remove, cache_media fn is idempotent
remove_file(&local_tweet_media_path).await.ok();
Ok::<String, ScootalooError>(mastodon_media.id)
})
})
.buffered(4); // there are max four medias per tweet and they need to be treated in
// order
while let Some(result) = stream.next().await {
match result {
Ok(Ok(v)) => media_ids.push(v),
Ok(Err(e)) => warn!("Cannot treat media: {}", e),
Err(e) => error!("Something went wrong when joining the main thread: {}", e),
}
}
} else {
info!("No media in tweet");
}
// in case some media_ids slot remained empty due to errors, remove them
media_ids.retain(|x| !x.is_empty());
(media_url, media_ids)
}
/// Gets and caches Twitter Media inside the determined temp dir
pub async fn cache_media(u: &str, t: &str) -> Result<String, Box<dyn Error>> {
// create dir
create_dir_all(t).await?;
// get file
let mut response = reqwest::get(u).await?;
// create local file
let url = Url::parse(u)?;
let dest_filename = url
.path_segments()
.ok_or_else(|| {
ScootalooError::new(&format!(
"Cannot determine the destination filename for {}",
u
))
})?
.last()
.ok_or_else(|| {
ScootalooError::new(&format!(
"Cannot determine the destination filename for {}",
u
))
})?;
let dest_filepath = format!("{}/{}", t, dest_filename);
let mut dest_file = File::create(&dest_filepath).await?;
while let Some(chunk) = response.chunk().await? {
copy(&mut &*chunk, &mut dest_file).await?;
}
Ok(dest_filepath)
}
#[cfg(test)]
mod tests {
use super::*;
use std::{fs::remove_dir_all, path::Path};
const TMP_DIR: &'static str = "/tmp/scootaloo_test";
#[tokio::test]
async fn test_cache_media() {
let dest = cache_media(
"https://forum.nintendojo.fr/styles/prosilver/theme/images/ndfr_casual.png",
TMP_DIR,
)
.await
.unwrap();
assert!(Path::new(&dest).exists());
remove_dir_all(TMP_DIR).unwrap();
}
}

1
tests/bad_test.toml Normal file
View File

@@ -0,0 +1 @@
blah

66
tests/config.rs Normal file
View File

@@ -0,0 +1,66 @@
use scootaloo::parse_toml;
#[test]
fn test_parse_good_toml() {
let parse_good_toml = parse_toml("tests/good_test.toml");
assert_eq!(
parse_good_toml.scootaloo.db_path,
"/var/random/scootaloo.sqlite"
);
assert_eq!(parse_good_toml.scootaloo.cache_path, "/tmp/scootaloo");
assert_eq!(parse_good_toml.twitter.consumer_key, "rand consumer key");
assert_eq!(parse_good_toml.twitter.consumer_secret, "secret");
assert_eq!(parse_good_toml.twitter.access_key, "rand access key");
assert_eq!(parse_good_toml.twitter.access_secret, "super secret");
assert_eq!(
&parse_good_toml
.mastodon
.get("tamerelol")
.unwrap()
.twitter_screen_name,
"tamerelol"
);
assert_eq!(
&parse_good_toml.mastodon.get("tamerelol").unwrap().base,
"https://m.nintendojo.fr"
);
assert_eq!(
&parse_good_toml.mastodon.get("tamerelol").unwrap().client_id,
"rand client id"
);
assert_eq!(
&parse_good_toml
.mastodon
.get("tamerelol")
.unwrap()
.client_secret,
"secret"
);
assert_eq!(
&parse_good_toml.mastodon.get("tamerelol").unwrap().redirect,
"urn:ietf:wg:oauth:2.0:oob"
);
assert_eq!(
&parse_good_toml.mastodon.get("tamerelol").unwrap().token,
"super secret"
);
}
#[test]
#[should_panic(
expected = "Cannot open config file tests/no_file.toml: No such file or directory (os error 2)"
)]
fn test_parse_no_toml() {
let _parse_no_toml = parse_toml("tests/no_file.toml");
}
#[test]
#[should_panic(
expected = "Cannot parse TOML file tests/bad_test.toml: expected an equals, found a newline at line 1 column 5"
)]
fn test_parse_bad_toml() {
let _parse_bad_toml = parse_toml("tests/bad_test.toml");
}

19
tests/good_test.toml Normal file
View File

@@ -0,0 +1,19 @@
[scootaloo]
db_path="/var/random/scootaloo.sqlite"
cache_path="/tmp/scootaloo"
[twitter]
consumer_key="rand consumer key"
consumer_secret="secret"
access_key="rand access key"
access_secret="super secret"
[mastodon]
[mastodon.tamerelol]
twitter_screen_name="tamerelol"
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"