From 080218f3855978aaa47349a0dfa6c6ad39b54fa7 Mon Sep 17 00:00:00 2001 From: VC Date: Fri, 22 Apr 2022 13:36:02 +0200 Subject: [PATCH 01/12] refactor: make everything a little more modular --- Cargo.lock | 81 +++++++++++- Cargo.toml | 3 +- README.md | 9 +- src/config.rs | 51 ++++++++ src/error.rs | 27 ++++ src/lib.rs | 337 ++++-------------------------------------------- src/main.rs | 29 ++++- src/mastodon.rs | 106 +++++++++++++++ src/state.rs | 38 ++++++ src/twitter.rs | 83 ++++++++++++ src/util.rs | 37 ++++++ 11 files changed, 478 insertions(+), 323 deletions(-) create mode 100644 src/config.rs create mode 100644 src/error.rs create mode 100644 src/mastodon.rs create mode 100644 src/state.rs create mode 100644 src/twitter.rs create mode 100644 src/util.rs diff --git a/Cargo.lock b/Cargo.lock index 53f7909..3924ec5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom 0.2.6", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "0.7.15" @@ -550,6 +561,18 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "flate2" version = "1.0.20" @@ -755,9 +778,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.2" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8" +checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" dependencies = [ "cfg-if 1.0.0", "libc", @@ -819,6 +842,24 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashlink" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf" +dependencies = [ + "hashbrown 0.11.2", +] + [[package]] name = "hermit-abi" version = "0.1.18" @@ -1027,7 +1068,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "824845a0bf897a9042383849b02c1bc219c2383772efcd5c6f9766fa4b81aef3" dependencies = [ "autocfg 1.0.1", - "hashbrown", + "hashbrown 0.9.1", ] [[package]] @@ -1123,6 +1164,16 @@ version = "0.2.124" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50" +[[package]] +name = "libsqlite3-sys" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "898745e570c7d0453cc1fbc4a701eb6c662ed54e8fec8b7d14be137ebeeb9d14" +dependencies = [ + "pkg-config", + "vcpkg", +] + [[package]] name = "lock_api" version = "0.3.4" @@ -1340,9 +1391,9 @@ checksum = "a9a7ab5d64814df0fe4a4b5ead45ed6c5f181ee3ff04ba344313a6c80446c5d4" [[package]] name = "once_cell" -version = "1.7.2" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3" +checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" [[package]] name = "opaque-debug" @@ -1698,7 +1749,7 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" dependencies = [ - "getrandom 0.2.2", + "getrandom 0.2.6", ] [[package]] @@ -1899,6 +1950,21 @@ dependencies = [ "winreg 0.7.0", ] +[[package]] +name = "rusqlite" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85127183a999f7db96d1a976a309eebbfb6ea3b0b400ddd8340190129de6eb7a" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "memchr", + "smallvec 1.6.1", +] + [[package]] name = "rustc-demangle" version = "0.1.18" @@ -1947,7 +2013,7 @@ dependencies = [ [[package]] name = "scootaloo" -version = "0.4.2" +version = "0.5.0" dependencies = [ "clap", "egg-mode", @@ -1956,6 +2022,7 @@ dependencies = [ "htmlescape", "log", "reqwest 0.11.3", + "rusqlite", "serde", "simple_logger", "tokio 1.5.0", diff --git a/Cargo.toml b/Cargo.toml index 724e30f..2aefd55 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "scootaloo" -version = "0.4.2" +version = "0.5.0" authors = ["VC "] edition = "2021" @@ -12,6 +12,7 @@ toml = "^0.5" clap = "^2.34" futures = "^0.3" egg-mode = "^0.16" +rusqlite = "^0.27" tokio = { version = "1", features = ["full"]} elefren = "^0.22" htmlescape = "^0.3" diff --git a/README.md b/README.md index 538ac71..f6f53ed 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ It: If any of the last steps failed, the Toot gets published with the exact same text as the Tweet. -RT are excluded, replies are included.but only the source threads are copied, not the actual replies to other Twitter users. +RT are excluded, replies are included when considered part of a thread (reply to self), not the actual replies to other Twitter users. # Usage @@ -16,7 +16,7 @@ First up, create a configuration file (default path is `/usr/local/etc/scootaloo ```toml [scootaloo] -last_tweet_path="/usr/local/etc/last_tweet" ## file containing the last tweet id received, must be writable +db_path="/var/lib/scootaloo/scootaloo.sqlite" ## file containing the SQLite Tweet corresponding Toot DB, must be writeable cache_path="/tmp/scootaloo" ## a dir where the temporary files will be download, must be writeable [twitter] @@ -29,6 +29,11 @@ access_key="MYACCESSKEY" access_secret="MYACCESSSECRET" ``` +Then run the command with the `init` subcommand to initiate the DB: +``` +scootaloo init +``` + Then run the command with the `register` subcommand: ```sh scootaloo register --host https://m.nintendojo.fr diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..d0e2881 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,51 @@ +// std +use std::fs::read_to_string; + +// toml +use serde::Deserialize; + +/// General configuration Struct +#[derive(Debug, Deserialize)] +pub struct Config { + pub twitter: TwitterConfig, + pub mastodon: MastodonConfig, + pub scootaloo: ScootalooConfig, +} + +#[derive(Debug, Deserialize)] +pub struct TwitterConfig { + pub username: String, + pub consumer_key: String, + pub consumer_secret: String, + pub access_key: String, + pub access_secret: String, +} + +#[derive(Debug, Deserialize)] +pub struct MastodonConfig { + pub base: String, + pub client_id: String, + pub client_secret: String, + pub redirect: String, + pub token: String, +} + +#[derive(Debug, Deserialize)] +pub struct ScootalooConfig { + pub db_path: String, + pub cache_path: String, +} + +/// Parses the TOML file into a Config Struct +pub fn parse_toml(toml_file: &str) -> Config { + let toml_config = read_to_string(toml_file).unwrap_or_else(|e| + panic!("Cannot open config file {}: {}", toml_file, e) + ); + + let config: Config = toml::from_str(&toml_config).unwrap_or_else(|e| + panic!("Cannot parse TOML file {}: {}", toml_file, e) + ); + + config +} + diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..015294c --- /dev/null +++ b/src/error.rs @@ -0,0 +1,27 @@ +use std::fmt; + +#[derive(Debug)] +pub struct ScootalooError { + details: String, +} + +impl ScootalooError { + pub fn new(msg: &str) -> ScootalooError { + ScootalooError { + details: String::from(msg), + } + } +} + +impl fmt::Display for ScootalooError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.details) + } +} + +impl std::error::Error for ScootalooError { + fn description(&self) -> &str { + &self.details + } +} + diff --git a/src/lib.rs b/src/lib.rs index 810e944..c2b2e9d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,334 +1,53 @@ +// auto-imports +mod error; +use error::ScootalooError; + +mod config; +use config::Config; +pub use config::parse_toml; + +mod mastodon; +use mastodon::{get_mastodon_token, build_basic_status}; +pub use mastodon::register; + +mod twitter; +use twitter::*; + +mod util; + +mod state; +use state::{read_state, write_state}; +pub use state::init_db; + // std -use std::{ - borrow::Cow, - collections::HashMap, - io::stdin, - fmt, - fs::{read_to_string, write}, - error::Error, -}; +use std::borrow::Cow; -// toml -use serde::Deserialize; - -// egg-mode -use egg_mode::{ - Token, - KeyPair, - entities::{UrlEntity, MediaEntity, MentionEntity, MediaType}, - user::UserID, - tweet::{ - Tweet, - user_timeline, - }, -}; +// tokio +use tokio::fs::remove_file; // elefren use elefren::{ prelude::*, - apps::App, status_builder::StatusBuilder, - scopes::Scopes, }; -// reqwest -use reqwest::Url; - -// tokio -use tokio::{ - io::copy, - fs::{File, create_dir_all, remove_file}, -}; - -// htmlescape -use htmlescape::decode_html; - // log use log::{info, warn, error, debug}; -/********** - * Generic usage functions -***********/ -/* - * Those functions are related to the Twitter side of things - */ -/// Reads last tweet id from a file -fn read_state(s: &str) -> Option { - let state = read_to_string(s); - - if let Ok(s) = state { - debug!("Last Tweet ID (from file): {}", &s); - return s.parse::().ok(); - } - - None -} - -/// Writes last treated tweet id to a file -fn write_state(f: &str, s: u64) -> Result<(), std::io::Error> { - write(f, format!("{}", s)) -} - -/// Gets Twitter oauth2 token -fn get_oauth2_token(config: &Config) -> Token { - let con_token = KeyPair::new(String::from(&config.twitter.consumer_key), String::from(&config.twitter.consumer_secret)); - let access_token = KeyPair::new(String::from(&config.twitter.access_key), String::from(&config.twitter.access_secret)); - - Token::Access { - consumer: con_token, - access: access_token, - } -} - -/// Gets Twitter user timeline -async fn get_user_timeline(config: &Config, token: Token, lid: Option) -> Result, Box> { - // fix the page size to 200 as it is the maximum Twitter authorizes - let (_, feed) = user_timeline(UserID::from(String::from(&config.twitter.username)), true, false, &token) - .with_page_size(200) - .older(lid) - .await?; - - Ok(feed.to_vec()) -} - -/// Decodes urls from UrlEntities -fn decode_urls(urls: &Vec) -> HashMap { - let mut decoded_urls = HashMap::new(); - - for url in urls { - if url.expanded_url.is_some() { - // unwrap is safe here as we just verified that there is something inside expanded_url - decoded_urls.insert(String::from(&url.url), String::from(url.expanded_url.as_deref().unwrap())); - } - } - - decoded_urls -} - -/// Decodes the Twitter mention to something that will make sense once Twitter has joined the -/// Fediverse -fn twitter_mentions(ums: &Vec) -> HashMap { - let mut decoded_mentions = HashMap::new(); - - for um in ums { - decoded_mentions.insert(format!("@{}", um.screen_name), format!("@{}@twitter.com", um.screen_name)); - } - - decoded_mentions -} - -/// Retrieves a single media from a tweet and store it in a temporary file -async fn get_tweet_media(m: &MediaEntity, t: &str) -> Result> { - match m.media_type { - MediaType::Photo => { - return cache_media(&m.media_url_https, t).await; - }, - _ => { - match &m.video_info { - Some(v) => { - for variant in &v.variants { - if variant.content_type == "video/mp4" { - return cache_media(&variant.url, t).await; - } - } - return Err(ScootalooError::new(&format!("Media Type for {} is video but no mp4 file URL is available", &m.url)).into()); - }, - None => { - return Err(ScootalooError::new(&format!("Media Type for {} is video but does not contain any video_info", &m.url)).into()); - }, - } - }, - }; -} - -/* - * Those functions are related to the Mastodon side of things - */ -/// Gets Mastodon Data -fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon { - let data = Data { - base: Cow::from(String::from(&masto.base)), - client_id: Cow::from(String::from(&masto.client_id)), - client_secret: Cow::from(String::from(&masto.client_secret)), - redirect: Cow::from(String::from(&masto.redirect)), - token: Cow::from(String::from(&masto.token)), - }; - - Mastodon::from(data) -} - -/// Builds toot text from tweet -fn build_basic_status(tweet: &Tweet) -> Result> { - let mut toot = String::from(&tweet.text); - - let decoded_urls = decode_urls(&tweet.entities.urls); - - for decoded_url in decoded_urls { - toot = toot.replace(&decoded_url.0, &decoded_url.1); - } - - let decoded_mentions = twitter_mentions(&tweet.entities.user_mentions); - - for decoded_mention in decoded_mentions { - toot = toot.replace(&decoded_mention.0, &decoded_mention.1); - } - - if let Ok(t) = decode_html(&toot) { - toot = t; - } - - Ok(toot) -} - -/* - * Generic private functions - */ -/// Gets and caches Twitter Media inside the determined temp dir -async fn cache_media(u: &str, t: &str) -> Result> { - // create dir - create_dir_all(t).await?; - - // get file - let mut response = reqwest::get(u).await?; - - // create local file - let url = Url::parse(u)?; - let dest_filename = url.path_segments().ok_or_else(|| ScootalooError::new(&format!("Cannot determine the destination filename for {}", u)))? - .last().ok_or_else(|| ScootalooError::new(&format!("Cannot determine the destination filename for {}", u)))?; - - let dest_filepath = format!("{}/{}", t, dest_filename); - - let mut dest_file = File::create(&dest_filepath).await?; - - while let Some(chunk) = response.chunk().await? { - copy(&mut &*chunk, &mut dest_file).await?; - } - - Ok(dest_filepath) -} - -/********** - * local error handler -**********/ -#[derive(Debug)] -struct ScootalooError { - details: String, -} - -impl ScootalooError { - fn new(msg: &str) -> ScootalooError { - ScootalooError { - details: String::from(msg), - } - } -} - -impl fmt::Display for ScootalooError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.details) - } -} - -impl std::error::Error for ScootalooError { - fn description(&self) -> &str { - &self.details - } -} - -/********** - * Config structure -***********/ -/// General configuration Struct -#[derive(Debug, Deserialize)] -pub struct Config { - twitter: TwitterConfig, - mastodon: MastodonConfig, - scootaloo: ScootalooConfig, -} - -#[derive(Debug, Deserialize)] -struct TwitterConfig { - username: String, - consumer_key: String, - consumer_secret: String, - access_key: String, - access_secret: String, -} - -#[derive(Debug, Deserialize)] -struct MastodonConfig { - base: String, - client_id: String, - client_secret: String, - redirect: String, - token: String, -} - -#[derive(Debug, Deserialize)] -struct ScootalooConfig { - last_tweet_path: String, - cache_path: String, -} - -/********* - * Main functions -*********/ -/// Parses the TOML file into a Config Struct -pub fn parse_toml(toml_file: &str) -> Config { - let toml_config = read_to_string(toml_file).unwrap_or_else(|e| - panic!("Cannot open config file {}: {}", toml_file, e) - ); - - let config: Config = toml::from_str(&toml_config).unwrap_or_else(|e| - panic!("Cannot parse TOML file {}: {}", toml_file, e) - ); - - config -} - -/// Generic register function -/// As this function is supposed to be run only once, it will panic for every error it encounters -/// Most of this function is a direct copy/paste of the official `elefren` crate -pub fn register(host: &str) { - let mut builder = App::builder(); - builder.client_name(Cow::from(String::from(env!("CARGO_PKG_NAME")))) - .redirect_uris(Cow::from(String::from("urn:ietf:wg:oauth:2.0:oob"))) - .scopes(Scopes::write_all()) - .website(Cow::from(String::from("https://framagit.org/veretcle/scootaloo"))); - - let app = builder.build().expect("Cannot build the app"); - - let registration = Registration::new(host).register(app).expect("Cannot build registration object"); - let url = registration.authorize_url().expect("Cannot generate registration URI!"); - - println!("Click this link to authorize on Mastodon: {}", url); - println!("Paste the returned authorization code: "); - - let mut input = String::new(); - stdin().read_line(&mut input).expect("Unable to read back registration code!"); - - let code = input.trim(); - let mastodon = registration.complete(code).expect("Unable to create access token!"); - - let toml = toml::to_string(&*mastodon).unwrap(); - - println!("Please insert the following block at the end of your configuration file:\n[mastodon]\n{}", toml); -} - /// This is where the magic happens #[tokio::main] pub async fn run(config: Config) { // retrieve the last tweet ID for the username - let last_tweet_id = read_state(&config.scootaloo.last_tweet_path); + let last_tweet_id = read_state(&config.scootaloo.db_path); // get OAuth2 token - let token = get_oauth2_token(&config); + let token = get_oauth2_token(&config.twitter); // get Mastodon instance let mastodon = get_mastodon_token(&config.mastodon); // get user timeline feed (Vec) - let mut feed = get_user_timeline(&config, token, last_tweet_id) + let mut feed = get_user_timeline(&config.twitter, token, last_tweet_id) .await .unwrap_or_else(|e| panic!("Something went wrong when trying to retrieve {}’s timeline: {}", &config.twitter.username, e) @@ -411,7 +130,7 @@ pub async fn run(config: Config) { // last_tweet gathered not to be written // write the current state (tweet ID) to avoid copying it another time - write_state(&config.scootaloo.last_tweet_path, tweet.id).unwrap_or_else(|e| + write_state(&config.scootaloo.db_path, tweet.id).unwrap_or_else(|e| panic!("Can’t write the last tweet retrieved: {}", e) ); } diff --git a/src/main.rs b/src/main.rs index 529faa7..77bb07d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,6 +11,8 @@ use simple_logger::SimpleLogger; // std use std::str::FromStr; +const DEFAULT_CONFIG_PATH: &'static str = "/usr/local/etc/scootaloo.toml"; + fn main() { let matches = App::new(env!("CARGO_PKG_NAME")) .version(env!("CARGO_PKG_VERSION")) @@ -40,10 +42,29 @@ fn main() { .takes_value(true) .required(true) .display_order(1))) + .subcommand(SubCommand::with_name("init") + .version(env!("CARGO_PKG_VERSION")) + .about("Command to init Scootaloo DB") + .arg(Arg::with_name("config") + .short("c") + .long("config") + .value_name("CONFIG_FILE") + .help("TOML config file for scootaloo (default /usr/local/etc/scootaloo.toml") + .takes_value(true) + .display_order(1))) .get_matches(); - if let Some(matches) = matches.subcommand_matches("register") { - register(matches.value_of("host").unwrap()); - return; + + match matches.subcommand() { + ("register", Some(sub_m)) => { + register(sub_m.value_of("host").unwrap()); + return; + }, + ("init", Some(sub_m)) => { + let config = parse_toml(sub_m.value_of("config").unwrap_or(DEFAULT_CONFIG_PATH)); + init_db(&config).unwrap(); + return; + }, + _ => (), } if matches.is_present("log_level") { @@ -56,7 +77,7 @@ fn main() { }; } - let config = parse_toml(matches.value_of("config").unwrap_or("/usr/local/etc/scootaloo.toml")); + let config = parse_toml(matches.value_of("config").unwrap_or(DEFAULT_CONFIG_PATH)); run(config); } diff --git a/src/mastodon.rs b/src/mastodon.rs new file mode 100644 index 0000000..3f93e9d --- /dev/null +++ b/src/mastodon.rs @@ -0,0 +1,106 @@ +// auto imports +use crate::config::MastodonConfig; +use crate::twitter::decode_urls; + +// std +use std::{ + borrow::Cow, + error::Error, + collections::HashMap, + io::stdin, +}; + +// htmlescape +use htmlescape::decode_html; + +// egg-mode +use egg_mode::{ + tweet::Tweet, + entities::MentionEntity, +}; + +// elefren +use elefren::{ + prelude::*, + apps::App, + scopes::Scopes, +}; + + +/// Decodes the Twitter mention to something that will make sense once Twitter has joined the +/// Fediverse +fn twitter_mentions(ums: &Vec) -> HashMap { + let mut decoded_mentions = HashMap::new(); + + for um in ums { + decoded_mentions.insert(format!("@{}", um.screen_name), format!("@{}@twitter.com", um.screen_name)); + } + + decoded_mentions +} + +/// Gets Mastodon Data +pub fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon { + let data = Data { + base: Cow::from(String::from(&masto.base)), + client_id: Cow::from(String::from(&masto.client_id)), + client_secret: Cow::from(String::from(&masto.client_secret)), + redirect: Cow::from(String::from(&masto.redirect)), + token: Cow::from(String::from(&masto.token)), + }; + + Mastodon::from(data) +} + +/// Builds toot text from tweet +pub fn build_basic_status(tweet: &Tweet) -> Result> { + let mut toot = String::from(&tweet.text); + + let decoded_urls = decode_urls(&tweet.entities.urls); + + for decoded_url in decoded_urls { + toot = toot.replace(&decoded_url.0, &decoded_url.1); + } + + let decoded_mentions = twitter_mentions(&tweet.entities.user_mentions); + + for decoded_mention in decoded_mentions { + toot = toot.replace(&decoded_mention.0, &decoded_mention.1); + } + + if let Ok(t) = decode_html(&toot) { + toot = t; + } + + Ok(toot) +} + +/// Generic register function +/// As this function is supposed to be run only once, it will panic for every error it encounters +/// Most of this function is a direct copy/paste of the official `elefren` crate +pub fn register(host: &str) { + let mut builder = App::builder(); + builder.client_name(Cow::from(String::from(env!("CARGO_PKG_NAME")))) + .redirect_uris(Cow::from(String::from("urn:ietf:wg:oauth:2.0:oob"))) + .scopes(Scopes::write_all()) + .website(Cow::from(String::from("https://framagit.org/veretcle/scootaloo"))); + + let app = builder.build().expect("Cannot build the app"); + + let registration = Registration::new(host).register(app).expect("Cannot build registration object"); + let url = registration.authorize_url().expect("Cannot generate registration URI!"); + + println!("Click this link to authorize on Mastodon: {}", url); + println!("Paste the returned authorization code: "); + + let mut input = String::new(); + stdin().read_line(&mut input).expect("Unable to read back registration code!"); + + let code = input.trim(); + let mastodon = registration.complete(code).expect("Unable to create access token!"); + + let toml = toml::to_string(&*mastodon).unwrap(); + + println!("Please insert the following block at the end of your configuration file:\n[mastodon]\n{}", toml); +} + diff --git a/src/state.rs b/src/state.rs new file mode 100644 index 0000000..bc9f482 --- /dev/null +++ b/src/state.rs @@ -0,0 +1,38 @@ +// auto-imports +use crate::config::Config; + +// std +use std::{ + fs::{read_to_string, write}, + error::Error, +}; + +// log +use log::debug; + +/// Reads last tweet id from a file +pub fn read_state(s: &str) -> Option { + let state = read_to_string(s); + + if let Ok(s) = state { + debug!("Last Tweet ID (from file): {}", &s); + return s.parse::().ok(); + } + + None +} + +/// Writes last treated tweet id to a file +pub fn write_state(f: &str, s: u64) -> Result<(), std::io::Error> { + write(f, format!("{}", s)) +} + +/********* + * Main functions +*********/ +/// Initiates the DB from path +pub fn init_db(config: &Config) -> Result<(), Box> { + println!("config.scootaloo.db_path: {}", config.scootaloo.db_path); + Ok(()) +} + diff --git a/src/twitter.rs b/src/twitter.rs new file mode 100644 index 0000000..f44c071 --- /dev/null +++ b/src/twitter.rs @@ -0,0 +1,83 @@ +// auto-imports +use crate::ScootalooError; +use crate::config::TwitterConfig; +use crate::util::cache_media; + +// std +use std::{ + error::Error, + collections::HashMap, +}; + +// egg-mode +use egg_mode::{ + Token, + KeyPair, + entities::{UrlEntity, MediaEntity, MediaType}, + user::UserID, + tweet::{ + Tweet, + user_timeline, + }, +}; + +/// Gets Twitter oauth2 token +pub fn get_oauth2_token(config: &TwitterConfig) -> Token { + let con_token = KeyPair::new(String::from(&config.consumer_key), String::from(&config.consumer_secret)); + let access_token = KeyPair::new(String::from(&config.access_key), String::from(&config.access_secret)); + + Token::Access { + consumer: con_token, + access: access_token, + } +} + +/// Gets Twitter user timeline +pub async fn get_user_timeline(config: &TwitterConfig, token: Token, lid: Option) -> Result, Box> { + // fix the page size to 200 as it is the maximum Twitter authorizes + let (_, feed) = user_timeline(UserID::from(String::from(&config.username)), true, false, &token) + .with_page_size(200) + .older(lid) + .await?; + + Ok(feed.to_vec()) +} + +/// Decodes urls from UrlEntities +pub fn decode_urls(urls: &Vec) -> HashMap { + let mut decoded_urls = HashMap::new(); + + for url in urls { + if url.expanded_url.is_some() { + // unwrap is safe here as we just verified that there is something inside expanded_url + decoded_urls.insert(String::from(&url.url), String::from(url.expanded_url.as_deref().unwrap())); + } + } + + decoded_urls +} + +/// Retrieves a single media from a tweet and store it in a temporary file +pub async fn get_tweet_media(m: &MediaEntity, t: &str) -> Result> { + match m.media_type { + MediaType::Photo => { + return cache_media(&m.media_url_https, t).await; + }, + _ => { + match &m.video_info { + Some(v) => { + for variant in &v.variants { + if variant.content_type == "video/mp4" { + return cache_media(&variant.url, t).await; + } + } + return Err(ScootalooError::new(&format!("Media Type for {} is video but no mp4 file URL is available", &m.url)).into()); + }, + None => { + return Err(ScootalooError::new(&format!("Media Type for {} is video but does not contain any video_info", &m.url)).into()); + }, + } + }, + }; +} + diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..6f7e838 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,37 @@ +// std +use std::error::Error; +use crate::ScootalooError; + +// reqwest +use reqwest::Url; + +// tokio +use tokio::{ + io::copy, + fs::{File, create_dir_all}, +}; + +/// Gets and caches Twitter Media inside the determined temp dir +pub async fn cache_media(u: &str, t: &str) -> Result> { + // create dir + create_dir_all(t).await?; + + // get file + let mut response = reqwest::get(u).await?; + + // create local file + let url = Url::parse(u)?; + let dest_filename = url.path_segments().ok_or_else(|| ScootalooError::new(&format!("Cannot determine the destination filename for {}", u)))? + .last().ok_or_else(|| ScootalooError::new(&format!("Cannot determine the destination filename for {}", u)))?; + + let dest_filepath = format!("{}/{}", t, dest_filename); + + let mut dest_file = File::create(&dest_filepath).await?; + + while let Some(chunk) = response.chunk().await? { + copy(&mut &*chunk, &mut dest_file).await?; + } + + Ok(dest_filepath) +} + From 6363c1246010304a82a8d77f53c4863f98e76ed0 Mon Sep 17 00:00:00 2001 From: VC Date: Fri, 22 Apr 2022 17:10:08 +0200 Subject: [PATCH 02/12] feature(test): add tests --- config.rs | 35 ++++++++++++++++++++++ src/lib.rs | 2 +- src/main.rs | 2 +- src/mastodon.rs | 66 ++++++++++++++++++++++++++++++++++++++++-- src/state.rs | 69 ++++++++++++++++++++++++++++++++++++++++++-- src/twitter.rs | 21 ++------------ tests/bad_test.toml | 1 + tests/config.rs | 33 +++++++++++++++++++++ tests/good_test.toml | 19 ++++++++++++ 9 files changed, 222 insertions(+), 26 deletions(-) create mode 100644 config.rs create mode 100644 tests/bad_test.toml create mode 100644 tests/config.rs create mode 100644 tests/good_test.toml diff --git a/config.rs b/config.rs new file mode 100644 index 0000000..0fa7025 --- /dev/null +++ b/config.rs @@ -0,0 +1,35 @@ +use scootaloo::parse_toml; + +#[test] +fn parse_good_toml() { + let tConfig = TwitterConfig { + username: "test", + consumer_key: "foo", + consumer_secret: "bar", + access_key: "secret", + access_secret: "super secret", + }; + + let mConfig = MastodonConfig { + base: "https://www.example.com", + client_id: "my_id", + client_secret: "this is secret", + redirect: "ooo:oooo:o", + token: "super secret", + }; + + let sConfig = ScootalooConfig { + db_path: "/tmp/scootaloo/scootaloo.db", + cache_path: "/tmp", + }; + + let test_config = Config { + twitter: tConfig, + mastodon: mConfig, + scootaloo: sConfig, + }; + + let parsed_config = parse_toml("tests/right_config.toml"); + + assert_eq!(parsed_config, test_config); +} diff --git a/src/lib.rs b/src/lib.rs index c2b2e9d..643ffd5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ // auto-imports mod error; -use error::ScootalooError; +use crate::error::ScootalooError; mod config; use config::Config; diff --git a/src/main.rs b/src/main.rs index 77bb07d..e700c1e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -61,7 +61,7 @@ fn main() { }, ("init", Some(sub_m)) => { let config = parse_toml(sub_m.value_of("config").unwrap_or(DEFAULT_CONFIG_PATH)); - init_db(&config).unwrap(); + init_db(&config.scootaloo).unwrap(); return; }, _ => (), diff --git a/src/mastodon.rs b/src/mastodon.rs index 3f93e9d..d81a963 100644 --- a/src/mastodon.rs +++ b/src/mastodon.rs @@ -1,6 +1,5 @@ // auto imports use crate::config::MastodonConfig; -use crate::twitter::decode_urls; // std use std::{ @@ -16,7 +15,7 @@ use htmlescape::decode_html; // egg-mode use egg_mode::{ tweet::Tweet, - entities::MentionEntity, + entities::{UrlEntity, MentionEntity}, }; // elefren @@ -39,6 +38,20 @@ fn twitter_mentions(ums: &Vec) -> HashMap { decoded_mentions } +/// Decodes urls from UrlEntities +fn decode_urls(urls: &Vec) -> HashMap { + let mut decoded_urls = HashMap::new(); + + for url in urls { + if url.expanded_url.is_some() { + // unwrap is safe here as we just verified that there is something inside expanded_url + decoded_urls.insert(String::from(&url.url), String::from(url.expanded_url.as_deref().unwrap())); + } + } + + decoded_urls +} + /// Gets Mastodon Data pub fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon { let data = Data { @@ -104,3 +117,52 @@ pub fn register(host: &str) { println!("Please insert the following block at the end of your configuration file:\n[mastodon]\n{}", toml); } +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_twitter_mentions() { + let mention_entity = MentionEntity { + id: 12345, + range: (1, 3), + name: String::from("Ta Mere l0l"), + screen_name: String::from("tamerelol"), + }; + + let twitter_ums = vec![mention_entity]; + + let mut expected_mentions = HashMap::new(); + expected_mentions.insert(String::from("@tamerelol"), String::from("@tamerelol@twitter.com")); + + let decoded_mentions = twitter_mentions(&twitter_ums); + + assert_eq!(expected_mentions, decoded_mentions); + } + + #[test] + fn test_decode_urls() { + let url_entity1 = UrlEntity { + display_url: String::from("tamerelol"), + expanded_url: Some(String::from("https://www.nintendojo.fr/dojobar")), + range: (1, 3), + url: String::from("https://t.me/tamerelol"), + }; + + let url_entity2 = UrlEntity { + display_url: String::from("tamerelol"), + expanded_url: None, + range: (1, 3), + url: String::from("https://t.me/tamerelol"), + }; + + let twitter_urls = vec![url_entity1, url_entity2]; + + let mut expected_urls = HashMap::new(); + expected_urls.insert(String::from("https://t.me/tamerelol"), String::from("https://www.nintendojo.fr/dojobar")); + + let decoded_urls = decode_urls(&twitter_urls); + + assert_eq!(expected_urls, decoded_urls); + } +} diff --git a/src/state.rs b/src/state.rs index bc9f482..42b2f16 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,5 +1,5 @@ // auto-imports -use crate::config::Config; +use crate::config::ScootalooConfig; // std use std::{ @@ -10,6 +10,9 @@ use std::{ // log use log::debug; +// rusqlite +use rusqlite::{Connection, OpenFlags, params}; + /// Reads last tweet id from a file pub fn read_state(s: &str) -> Option { let state = read_to_string(s); @@ -31,8 +34,68 @@ pub fn write_state(f: &str, s: u64) -> Result<(), std::io::Error> { * Main functions *********/ /// Initiates the DB from path -pub fn init_db(config: &Config) -> Result<(), Box> { - println!("config.scootaloo.db_path: {}", config.scootaloo.db_path); +pub fn init_db(config: &ScootalooConfig) -> Result<(), Box> { + let conn = Connection::open(&config.db_path)?; + + conn.execute( + "CREATE TABLE IF NOT EXISTS tweet_to_toot ( + tweet_id INTEGER PRIMARY KEY, + toot_id TEXT UNIQUE + )", + [], + )?; + Ok(()) } +#[cfg(test)] +mod tests { + use super::*; + use std::{ + fs::remove_file, + path::Path, + }; + + #[test] + fn test_init_db() { + let scootaloo_config = ScootalooConfig { + db_path: String::from("/tmp/test_init_db.sqlite"), + cache_path: String::from("/tmp/scootaloo"), + }; + + init_db(&scootaloo_config).unwrap(); + + // check that file exist + assert!(Path::new(&scootaloo_config.db_path).exists()); + + // open said file + let conn = Connection::open_with_flags(&scootaloo_config.db_path, OpenFlags::SQLITE_OPEN_READ_ONLY).unwrap(); + + conn.execute( + "SELECT * from tweet_to_toot;", + [], + ).unwrap(); + + conn.close().unwrap(); + remove_file(&scootaloo_config.db_path).unwrap(); + } + + #[test] + fn test_read_state() { + let scootaloo_config = ScootalooConfig { + db_path: String::from("/tmp/test_read_state.sqlite"), + cache_path: String::from("/tmp/scootaloo"), + }; + + init_db(&scootaloo_config).unwrap(); + + let conn = Connection::open(&scootaloo_config.db_path).unwrap(); + + conn.execute( + "INSERT INTO tweet_to_toot (tweet_id, toot_id) VALUES (?1, ?2)", + params![123456789 as u64, String::from("987654321")], + ).unwrap(); + + + } +} diff --git a/src/twitter.rs b/src/twitter.rs index f44c071..5821bb0 100644 --- a/src/twitter.rs +++ b/src/twitter.rs @@ -4,16 +4,13 @@ use crate::config::TwitterConfig; use crate::util::cache_media; // std -use std::{ - error::Error, - collections::HashMap, -}; +use std::error::Error; // egg-mode use egg_mode::{ Token, KeyPair, - entities::{UrlEntity, MediaEntity, MediaType}, + entities::{MediaEntity, MediaType}, user::UserID, tweet::{ Tweet, @@ -43,20 +40,6 @@ pub async fn get_user_timeline(config: &TwitterConfig, token: Token, lid: Option Ok(feed.to_vec()) } -/// Decodes urls from UrlEntities -pub fn decode_urls(urls: &Vec) -> HashMap { - let mut decoded_urls = HashMap::new(); - - for url in urls { - if url.expanded_url.is_some() { - // unwrap is safe here as we just verified that there is something inside expanded_url - decoded_urls.insert(String::from(&url.url), String::from(url.expanded_url.as_deref().unwrap())); - } - } - - decoded_urls -} - /// Retrieves a single media from a tweet and store it in a temporary file pub async fn get_tweet_media(m: &MediaEntity, t: &str) -> Result> { match m.media_type { diff --git a/tests/bad_test.toml b/tests/bad_test.toml new file mode 100644 index 0000000..907b308 --- /dev/null +++ b/tests/bad_test.toml @@ -0,0 +1 @@ +blah diff --git a/tests/config.rs b/tests/config.rs new file mode 100644 index 0000000..ce1e1a7 --- /dev/null +++ b/tests/config.rs @@ -0,0 +1,33 @@ +use scootaloo::parse_toml; + +#[test] +fn test_parse_good_toml() { + let parse_good_toml = parse_toml("tests/good_test.toml"); + + assert_eq!(parse_good_toml.scootaloo.db_path, "/var/random/scootaloo.sqlite"); + assert_eq!(parse_good_toml.scootaloo.cache_path, "/tmp/scootaloo"); + + assert_eq!(parse_good_toml.twitter.username, "tamerelol"); + assert_eq!(parse_good_toml.twitter.consumer_key, "rand consumer key"); + assert_eq!(parse_good_toml.twitter.consumer_secret, "secret"); + assert_eq!(parse_good_toml.twitter.access_key, "rand access key"); + assert_eq!(parse_good_toml.twitter.access_secret, "super secret"); + + assert_eq!(parse_good_toml.mastodon.base, "https://m.nintendojo.fr"); + assert_eq!(parse_good_toml.mastodon.client_id, "rand client id"); + assert_eq!(parse_good_toml.mastodon.client_secret, "secret"); + assert_eq!(parse_good_toml.mastodon.redirect, "urn:ietf:wg:oauth:2.0:oob"); + assert_eq!(parse_good_toml.mastodon.token, "super secret"); +} + +#[test] +#[should_panic(expected = "Cannot open config file tests/no_file.toml: No such file or directory (os error 2)")] +fn test_parse_no_toml() { + let _parse_no_toml = parse_toml("tests/no_file.toml"); +} + +#[test] +#[should_panic(expected = "Cannot parse TOML file tests/bad_test.toml: expected an equals, found a newline at line 1 column 5")] +fn test_parse_bad_toml() { + let _parse_bad_toml = parse_toml("tests/bad_test.toml"); +} diff --git a/tests/good_test.toml b/tests/good_test.toml new file mode 100644 index 0000000..f29d112 --- /dev/null +++ b/tests/good_test.toml @@ -0,0 +1,19 @@ +[scootaloo] + +db_path="/var/random/scootaloo.sqlite" +cache_path="/tmp/scootaloo" + +[twitter] +username="tamerelol" + +consumer_key="rand consumer key" +consumer_secret="secret" +access_key="rand access key" +access_secret="super secret" + +[mastodon] +base = "https://m.nintendojo.fr" +client_id = "rand client id" +client_secret = "secret" +redirect = "urn:ietf:wg:oauth:2.0:oob" +token = "super secret" From 48b8eaaa5beeafbb4f95a208f1e51ef65c267d20 Mon Sep 17 00:00:00 2001 From: VC Date: Sat, 23 Apr 2022 10:01:00 +0200 Subject: [PATCH 03/12] feature: state is held into a sqlite db --- README.md | 19 ++++---- config.rs | 35 -------------- src/lib.rs | 21 ++++++-- src/state.rs | 133 +++++++++++++++++++++++++++++++++++---------------- 4 files changed, 120 insertions(+), 88 deletions(-) delete mode 100644 config.rs diff --git a/README.md b/README.md index f6f53ed..0a17168 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ access_secret="MYACCESSSECRET" ``` Then run the command with the `init` subcommand to initiate the DB: -``` +```sh scootaloo init ``` @@ -52,7 +52,9 @@ token = "MYTOKEN" You can then run the application via `cron` for example. Here is the generic usage: -``` +```sh +A Twitter to Mastodon bot + USAGE: scootaloo [OPTIONS] [SUBCOMMAND] @@ -62,20 +64,21 @@ FLAGS: OPTIONS: -c, --config TOML config file for scootaloo (default /usr/local/etc/scootaloo.toml) + -l, --loglevel Log level. Valid values are: Off, Warn, Error, Info, Debug SUBCOMMANDS: help Prints this message or the help of the given subcommand(s) + init Command to init Scootaloo DB register Command to register to a Mastodon Instance ``` # Quirks -Scootaloo does not respect the spam limits imposed by Mastodon: it will make a 429 error if too much Tweets are converted to Toots in a short amount of time (and it will not recover from it). By default, it gets the last 200 tweets from the user timeline (which is a lot!). It is recommended to put a Tweet number into the `last_tweet` file before copying an old account. +Scootaloo does not respect the spam limits imposed by Mastodon: it will make a 429 error if too much Tweets are converted to Toots in a short amount of time (and it will not recover from it). By default, it gets the last 200 tweets from the user timeline (which is a lot!). It is recommended to put a Tweet number into the DB file before copying an old account. -You can do that with a command like: +You can can insert it like this: ```sh -echo -n '8189881949849' > last_tweet +sqlite3 /var/lib/scootaloo/scootaloo.sqlite +INSERT INTO tweet_to_toot VALUES (1383782580412030982, ""); +.quit ``` - -**This file should only contain the last tweet ID without any other char (no EOL or new line).** - diff --git a/config.rs b/config.rs deleted file mode 100644 index 0fa7025..0000000 --- a/config.rs +++ /dev/null @@ -1,35 +0,0 @@ -use scootaloo::parse_toml; - -#[test] -fn parse_good_toml() { - let tConfig = TwitterConfig { - username: "test", - consumer_key: "foo", - consumer_secret: "bar", - access_key: "secret", - access_secret: "super secret", - }; - - let mConfig = MastodonConfig { - base: "https://www.example.com", - client_id: "my_id", - client_secret: "this is secret", - redirect: "ooo:oooo:o", - token: "super secret", - }; - - let sConfig = ScootalooConfig { - db_path: "/tmp/scootaloo/scootaloo.db", - cache_path: "/tmp", - }; - - let test_config = Config { - twitter: tConfig, - mastodon: mConfig, - scootaloo: sConfig, - }; - - let parsed_config = parse_toml("tests/right_config.toml"); - - assert_eq!(parsed_config, test_config); -} diff --git a/src/lib.rs b/src/lib.rs index 643ffd5..f7834f6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,7 +16,7 @@ use twitter::*; mod util; mod state; -use state::{read_state, write_state}; +use state::{read_state, write_state, TweetToToot}; pub use state::init_db; // std @@ -34,11 +34,19 @@ use elefren::{ // log use log::{info, warn, error, debug}; +// rusqlite +use rusqlite::Connection; + /// This is where the magic happens #[tokio::main] pub async fn run(config: Config) { + // open the SQLite connection + let conn = Connection::open(&config.scootaloo.db_path).unwrap(); // retrieve the last tweet ID for the username - let last_tweet_id = read_state(&config.scootaloo.db_path); + let last_tweet_id = match read_state(&conn, None).unwrap() { + Some(i) => Some(i.tweet_id), + None => None, + }; // get OAuth2 token let token = get_oauth2_token(&config.twitter); @@ -125,12 +133,17 @@ pub async fn run(config: Config) { // publish status // again unwrap is safe here as we are in the main thread - mastodon.new_status(status).unwrap(); + let published_status = mastodon.new_status(status).unwrap(); // this will panic if it cannot publish the status, which is a good thing, it allows the // last_tweet gathered not to be written + let ttt_towrite = TweetToToot { + tweet_id: tweet.id, + toot_id: published_status.id, + }; + // write the current state (tweet ID) to avoid copying it another time - write_state(&config.scootaloo.db_path, tweet.id).unwrap_or_else(|e| + write_state(&conn, ttt_towrite).unwrap_or_else(|e| panic!("Can’t write the last tweet retrieved: {}", e) ); } diff --git a/src/state.rs b/src/state.rs index 42b2f16..d4e91f6 100644 --- a/src/state.rs +++ b/src/state.rs @@ -2,32 +2,52 @@ use crate::config::ScootalooConfig; // std -use std::{ - fs::{read_to_string, write}, - error::Error, -}; +use std::error::Error; // log use log::debug; // rusqlite -use rusqlite::{Connection, OpenFlags, params}; +use rusqlite::{Connection, params, OptionalExtension}; -/// Reads last tweet id from a file -pub fn read_state(s: &str) -> Option { - let state = read_to_string(s); - - if let Ok(s) = state { - debug!("Last Tweet ID (from file): {}", &s); - return s.parse::().ok(); - } - - None +/// Struct for each query line +#[derive(Debug)] +pub struct TweetToToot { + pub tweet_id: u64, + pub toot_id: String, } -/// Writes last treated tweet id to a file -pub fn write_state(f: &str, s: u64) -> Result<(), std::io::Error> { - write(f, format!("{}", s)) +/// if None is passed, read the last tweet from DB +/// if a tweet_id is passed, read this particular tweet from DB +pub fn read_state(conn: &Connection, s: Option) -> Result, Box> { + debug!("Reading tweet_id {:?}", s); + let query: String; + match s { + Some(i) => query = format!("SELECT * FROM tweet_to_toot WHERE tweet_id = {}", i), + None => query = String::from("SELECT * FROM tweet_to_toot ORDER BY tweet_id DESC LIMIT 1"), + }; + + let mut stmt = conn.prepare(&query)?; + + let t = stmt.query_row([], |row| { + Ok(TweetToToot { + tweet_id: row.get(0)?, + toot_id: row.get(1)?, + }) + }).optional()?; + + Ok(t) +} + +/// Writes last treated tweet id and toot id to the db +pub fn write_state(conn: &Connection, t: TweetToToot) -> Result<(), Box> { + debug!("Write struct {:?}", t); + conn.execute( + "INSERT INTO tweet_to_toot (tweet_id, toot_id) VALUES (?1, ?2)", + params![t.tweet_id, t.toot_id], + )?; + + Ok(()) } /********* @@ -35,6 +55,7 @@ pub fn write_state(f: &str, s: u64) -> Result<(), std::io::Error> { *********/ /// Initiates the DB from path pub fn init_db(config: &ScootalooConfig) -> Result<(), Box> { + debug!("Initializing DB for Scootaloo"); let conn = Connection::open(&config.db_path)?; conn.execute( @@ -57,7 +78,7 @@ mod tests { }; #[test] - fn test_init_db() { + fn test_db() { let scootaloo_config = ScootalooConfig { db_path: String::from("/tmp/test_init_db.sqlite"), cache_path: String::from("/tmp/scootaloo"), @@ -69,33 +90,63 @@ mod tests { assert!(Path::new(&scootaloo_config.db_path).exists()); // open said file - let conn = Connection::open_with_flags(&scootaloo_config.db_path, OpenFlags::SQLITE_OPEN_READ_ONLY).unwrap(); - + let conn = Connection::open(&scootaloo_config.db_path).unwrap(); conn.execute( "SELECT * from tweet_to_toot;", [], ).unwrap(); - conn.close().unwrap(); + // write a state to DB + let t = TweetToToot { + tweet_id: 123456789, + toot_id: String::from("987654321"), + }; + write_state(&conn, t).unwrap(); + + let mut stmt = conn.prepare("SELECT * FROM tweet_to_toot limit 1;").unwrap(); + let mut rows = stmt.query([]).unwrap(); + + while let Some(row) = rows.next().unwrap() { + assert_eq!(123456789 as u64, row.get::<_, u64>(0).unwrap()); + assert_eq!("987654321", row.get::<_, String>(1).unwrap()); + } + + // write several other states + let (t1, t2) = ( + TweetToToot { + tweet_id: 11111111, + toot_id: String::from("tamerelol"), + }, + TweetToToot { + tweet_id: 1123456789, + toot_id: String::from("tonperemdr"), + }); + + write_state(&conn, t1).unwrap(); + write_state(&conn, t2).unwrap(); + + match read_state(&conn, None).unwrap() { + Some(i) => { + assert_eq!(1123456789, i.tweet_id); + assert_eq!("tonperemdr", &i.toot_id); + }, + None => panic!("This should not happen!"), + } + + match read_state(&conn, Some(11111111)).unwrap() { + Some(i) => { + assert_eq!(11111111, i.tweet_id); + assert_eq!("tamerelol", &i.toot_id); + }, + None => panic!("This should not happen!"), + } + + match read_state(&conn, Some(0000000)).unwrap() { + Some(_) => panic!("This should not happen"), + _ => (), + } + remove_file(&scootaloo_config.db_path).unwrap(); } - - #[test] - fn test_read_state() { - let scootaloo_config = ScootalooConfig { - db_path: String::from("/tmp/test_read_state.sqlite"), - cache_path: String::from("/tmp/scootaloo"), - }; - - init_db(&scootaloo_config).unwrap(); - - let conn = Connection::open(&scootaloo_config.db_path).unwrap(); - - conn.execute( - "INSERT INTO tweet_to_toot (tweet_id, toot_id) VALUES (?1, ?2)", - params![123456789 as u64, String::from("987654321")], - ).unwrap(); - - - } } + From 8b0945cb48f5bd16fa5f5f32a5405da236bf6119 Mon Sep 17 00:00:00 2001 From: VC Date: Sat, 23 Apr 2022 10:15:29 +0200 Subject: [PATCH 04/12] refactor: more clear option --- src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index e700c1e..80c9bfb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -21,7 +21,7 @@ fn main() { .short("c") .long("config") .value_name("CONFIG_FILE") - .help("TOML config file for scootaloo (default /usr/local/etc/scootaloo.toml)") + .help(&*format!("TOML config file for scootaloo (default {})", DEFAULT_CONFIG_PATH)) .takes_value(true) .display_order(1)) .arg(Arg::with_name("log_level") @@ -49,7 +49,7 @@ fn main() { .short("c") .long("config") .value_name("CONFIG_FILE") - .help("TOML config file for scootaloo (default /usr/local/etc/scootaloo.toml") + .help(&*format!("TOML config file for scootaloo (default {})", DEFAULT_CONFIG_PATH)) .takes_value(true) .display_order(1))) .get_matches(); From abfb2ff50a8f3b9d91c0327eff3a1797a4893f17 Mon Sep 17 00:00:00 2001 From: VC Date: Sat, 23 Apr 2022 10:15:47 +0200 Subject: [PATCH 05/12] feature: more tests --- src/mastodon.rs | 1 + src/util.rs | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/mastodon.rs b/src/mastodon.rs index d81a963..68d3353 100644 --- a/src/mastodon.rs +++ b/src/mastodon.rs @@ -166,3 +166,4 @@ mod tests { assert_eq!(expected_urls, decoded_urls); } } + diff --git a/src/util.rs b/src/util.rs index 6f7e838..7e220fe 100644 --- a/src/util.rs +++ b/src/util.rs @@ -35,3 +35,24 @@ pub async fn cache_media(u: &str, t: &str) -> Result> { Ok(dest_filepath) } +#[cfg(test)] +mod tests { + use super::*; + + use std::{ + path::Path, + fs::remove_dir_all, + }; + + const TMP_DIR: &'static str = "/tmp/scootaloo_test"; + + #[tokio::test] + async fn test_cache_media() { + let dest = cache_media("https://forum.nintendojo.fr/styles/prosilver/theme/images/ndfr_casual.png", TMP_DIR).await.unwrap(); + + assert!(Path::new(&dest).exists()); + + remove_dir_all(TMP_DIR).unwrap(); + } +} + From 13bb6d6f376d9a8ed8313d85d1214034590def6b Mon Sep 17 00:00:00 2001 From: VC Date: Sat, 23 Apr 2022 13:39:41 +0200 Subject: [PATCH 06/12] feature: make thread in Twitter thread in Mastodon --- src/config.rs | 3 - src/lib.rs | 32 +++++---- src/main.rs | 11 +-- src/mastodon.rs | 8 --- src/state.rs | 174 +++++++++++++++++++++++++++++++++--------------- 5 files changed, 142 insertions(+), 86 deletions(-) diff --git a/src/config.rs b/src/config.rs index d0e2881..ba7a60c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,7 +1,4 @@ -// std use std::fs::read_to_string; - -// toml use serde::Deserialize; /// General configuration Struct diff --git a/src/lib.rs b/src/lib.rs index f7834f6..779cb7e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,3 @@ -// auto-imports mod error; use crate::error::ScootalooError; @@ -19,22 +18,13 @@ mod state; use state::{read_state, write_state, TweetToToot}; pub use state::init_db; -// std use std::borrow::Cow; - -// tokio use tokio::fs::remove_file; - -// elefren use elefren::{ prelude::*, status_builder::StatusBuilder, }; - -// log use log::{info, warn, error, debug}; - -// rusqlite use rusqlite::Connection; /// This is where the magic happens @@ -72,6 +62,8 @@ pub async fn run(config: Config) { for tweet in &feed { debug!("Treating Tweet {} inside feed", tweet.id); + // initiate the toot_reply_id var + let mut toot_reply_id: Option = None; // determine if the tweet is part of a thread (response to self) or a standard response if let Some(r) = &tweet.in_reply_to_screen_name { if &r.to_lowercase() != &config.twitter.username.to_lowercase() { @@ -79,6 +71,11 @@ pub async fn run(config: Config) { info!("Tweet is a direct response, skipping"); continue; } + + let searched_toot = read_state(&conn, tweet.in_reply_to_status_id).unwrap_or(None); + if let Some(i) = searched_toot { + toot_reply_id = Some(i.toot_id); + }; }; // build basic status by just yielding text and dereferencing contained urls @@ -125,10 +122,17 @@ pub async fn run(config: Config) { // finished reuploading attachments, now let’s do the toot baby! debug!("Building corresponding Mastodon status"); - let status = StatusBuilder::new() - .status(&status_text) - .media_ids(status_medias) - .build() + + let mut status_builder = StatusBuilder::new(); + + status_builder.status(&status_text) + .media_ids(status_medias); + + if let Some(i) = toot_reply_id { + status_builder.in_reply_to(&i); + } + + let status = status_builder.build() .expect(&format!("Cannot build status with text {}", &status_text)); // publish status diff --git a/src/main.rs b/src/main.rs index 80c9bfb..60d22ad 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,14 +1,7 @@ -// self use scootaloo::*; - -// clap use clap::{App, Arg, SubCommand}; - -// log use log::{LevelFilter, error}; use simple_logger::SimpleLogger; - -// std use std::str::FromStr; const DEFAULT_CONFIG_PATH: &'static str = "/usr/local/etc/scootaloo.toml"; @@ -21,7 +14,7 @@ fn main() { .short("c") .long("config") .value_name("CONFIG_FILE") - .help(&*format!("TOML config file for scootaloo (default {})", DEFAULT_CONFIG_PATH)) + .help(&format!("TOML config file for scootaloo (default {})", DEFAULT_CONFIG_PATH)) .takes_value(true) .display_order(1)) .arg(Arg::with_name("log_level") @@ -49,7 +42,7 @@ fn main() { .short("c") .long("config") .value_name("CONFIG_FILE") - .help(&*format!("TOML config file for scootaloo (default {})", DEFAULT_CONFIG_PATH)) + .help(&format!("TOML config file for scootaloo (default {})", DEFAULT_CONFIG_PATH)) .takes_value(true) .display_order(1))) .get_matches(); diff --git a/src/mastodon.rs b/src/mastodon.rs index 68d3353..d2a51a1 100644 --- a/src/mastodon.rs +++ b/src/mastodon.rs @@ -1,24 +1,16 @@ -// auto imports use crate::config::MastodonConfig; -// std use std::{ borrow::Cow, error::Error, collections::HashMap, io::stdin, }; - -// htmlescape use htmlescape::decode_html; - -// egg-mode use egg_mode::{ tweet::Tweet, entities::{UrlEntity, MentionEntity}, }; - -// elefren use elefren::{ prelude::*, apps::App, diff --git a/src/state.rs b/src/state.rs index d4e91f6..93c103d 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,13 +1,7 @@ -// auto-imports use crate::config::ScootalooConfig; -// std use std::error::Error; - -// log use log::debug; - -// rusqlite use rusqlite::{Connection, params, OptionalExtension}; /// Struct for each query line @@ -50,9 +44,6 @@ pub fn write_state(conn: &Connection, t: TweetToToot) -> Result<(), Box Result<(), Box> { debug!("Initializing DB for Scootaloo"); @@ -78,7 +69,7 @@ mod tests { }; #[test] - fn test_db() { + fn test_init_db() { let scootaloo_config = ScootalooConfig { db_path: String::from("/tmp/test_init_db.sqlite"), cache_path: String::from("/tmp/scootaloo"), @@ -96,57 +87,136 @@ mod tests { [], ).unwrap(); - // write a state to DB - let t = TweetToToot { + remove_file(scootaloo_config.db_path).unwrap(); + } + + #[test] + fn test_write_state() { + let scootaloo_config = ScootalooConfig { + db_path: String::from("/tmp/test_write_state.sqlite"), + cache_path: String::from("/tmp/scootaloo"), + }; + + init_db(&scootaloo_config).unwrap(); + + let conn = Connection::open(&scootaloo_config.db_path).unwrap(); + + let t_in = TweetToToot { tweet_id: 123456789, toot_id: String::from("987654321"), }; - write_state(&conn, t).unwrap(); - let mut stmt = conn.prepare("SELECT * FROM tweet_to_toot limit 1;").unwrap(); - let mut rows = stmt.query([]).unwrap(); + write_state(&conn, t_in).unwrap(); - while let Some(row) = rows.next().unwrap() { - assert_eq!(123456789 as u64, row.get::<_, u64>(0).unwrap()); - assert_eq!("987654321", row.get::<_, String>(1).unwrap()); - } + let mut stmt = conn.prepare("SELECT * FROM tweet_to_toot;").unwrap(); - // write several other states - let (t1, t2) = ( - TweetToToot { - tweet_id: 11111111, - toot_id: String::from("tamerelol"), - }, - TweetToToot { - tweet_id: 1123456789, - toot_id: String::from("tonperemdr"), - }); + let t_out = stmt.query_row([], |row| { + Ok(TweetToToot { + tweet_id: row.get(0).unwrap(), + toot_id: row.get(1).unwrap(), + }) + }).unwrap(); - write_state(&conn, t1).unwrap(); - write_state(&conn, t2).unwrap(); - - match read_state(&conn, None).unwrap() { - Some(i) => { - assert_eq!(1123456789, i.tweet_id); - assert_eq!("tonperemdr", &i.toot_id); - }, - None => panic!("This should not happen!"), - } - - match read_state(&conn, Some(11111111)).unwrap() { - Some(i) => { - assert_eq!(11111111, i.tweet_id); - assert_eq!("tamerelol", &i.toot_id); - }, - None => panic!("This should not happen!"), - } - - match read_state(&conn, Some(0000000)).unwrap() { - Some(_) => panic!("This should not happen"), - _ => (), - } + assert_eq!(t_out.tweet_id, 123456789); + assert_eq!(t_out.toot_id, String::from("987654321")); remove_file(&scootaloo_config.db_path).unwrap(); } + + #[test] + fn test_none_to_tweet_id_read_state() { + let scootaloo_config = ScootalooConfig { + db_path: String::from("/tmp/test_none_to_tweet_id_read_state.sqlite"), + cache_path: String::from("/tmp/scootaloo"), + }; + + init_db(&scootaloo_config).unwrap(); + + let conn = Connection::open(&scootaloo_config.db_path).unwrap(); + + conn.execute( + "INSERT INTO tweet_to_toot (tweet_id, toot_id) + VALUES + (101, 'A'), + (102, 'B');", + [], + ).unwrap(); + + let t_out = read_state(&conn, None).unwrap().unwrap(); + + remove_file(&scootaloo_config.db_path).unwrap(); + + assert_eq!(t_out.tweet_id, 102); + assert_eq!(t_out.toot_id, "B"); + } + + #[test] + fn test_none_to_none_read_state() { + let scootaloo_config = ScootalooConfig { + db_path: String::from("/tmp/test_none_to_none_read_state.sqlite"), + cache_path: String::from("/tmp/scootaloo"), + }; + + init_db(&scootaloo_config).unwrap(); + + let conn = Connection::open(&scootaloo_config.db_path).unwrap(); + + let t_out = read_state(&conn, None).unwrap(); + + remove_file(&scootaloo_config.db_path).unwrap(); + + assert!(t_out.is_none()); + } + + #[test] + fn test_tweet_id_to_none_read_state() { + let scootaloo_config = ScootalooConfig { + db_path: String::from("/tmp/test_tweet_id_to_none_read_state.sqlite"), + cache_path: String::from("/tmp/scootaloo"), + }; + + init_db(&scootaloo_config).unwrap(); + + let conn = Connection::open(&scootaloo_config.db_path).unwrap(); + + conn.execute( + "INSERT INTO tweet_to_toot (tweet_id, toot_id) + VALUES + (100, 'A');", + [], + ).unwrap(); + + let t_out = read_state(&conn, Some(101)).unwrap(); + + remove_file(&scootaloo_config.db_path).unwrap(); + + assert!(t_out.is_none()); + } + + #[test] + fn test_tweet_id_to_tweet_id_read_state() { + let scootaloo_config = ScootalooConfig { + db_path: String::from("/tmp/test_tweet_id_to_tweet_id_read_state.sqlite"), + cache_path: String::from("/tmp/scootaloo"), + }; + + init_db(&scootaloo_config).unwrap(); + + let conn = Connection::open(&scootaloo_config.db_path).unwrap(); + + conn.execute( + "INSERT INTO tweet_to_toot (tweet_id, toot_id) + VALUES + (100, 'A');", + [], + ).unwrap(); + + let t_out = read_state(&conn, Some(100)).unwrap().unwrap(); + + remove_file(&scootaloo_config.db_path).unwrap(); + + assert_eq!(t_out.tweet_id, 100); + assert_eq!(t_out.toot_id, "A"); + } } From 26491f146f6d366c49f548560ae2106cb25e3dc6 Mon Sep 17 00:00:00 2001 From: VC Date: Sun, 24 Apr 2022 10:02:45 +0200 Subject: [PATCH 07/12] refactor: replace scootaloo_config with &str in init_db() --- README.md | 12 +++++-- src/main.rs | 2 +- src/state.rs | 95 ++++++++++++++++++++++++++-------------------------- 3 files changed, 58 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 0a17168..2a92071 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,8 @@ Then run the command with the `init` subcommand to initiate the DB: scootaloo init ``` +This subcommand is completely idempotent. + Then run the command with the `register` subcommand: ```sh scootaloo register --host https://m.nintendojo.fr @@ -76,9 +78,13 @@ SUBCOMMANDS: Scootaloo does not respect the spam limits imposed by Mastodon: it will make a 429 error if too much Tweets are converted to Toots in a short amount of time (and it will not recover from it). By default, it gets the last 200 tweets from the user timeline (which is a lot!). It is recommended to put a Tweet number into the DB file before copying an old account. -You can can insert it like this: +You can insert that Tweet number, by connecting to the DB you created: ```sh sqlite3 /var/lib/scootaloo/scootaloo.sqlite -INSERT INTO tweet_to_toot VALUES (1383782580412030982, ""); -.quit +``` + +And inserting the data: + +```sql +INSERT INTO tweet_to_toot VALUES (1383782580412030982, ""); ``` diff --git a/src/main.rs b/src/main.rs index 60d22ad..d31f57c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,7 +54,7 @@ fn main() { }, ("init", Some(sub_m)) => { let config = parse_toml(sub_m.value_of("config").unwrap_or(DEFAULT_CONFIG_PATH)); - init_db(&config.scootaloo).unwrap(); + init_db(&config.scootaloo.db_path).unwrap(); return; }, _ => (), diff --git a/src/state.rs b/src/state.rs index 93c103d..8b9b569 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,5 +1,3 @@ -use crate::config::ScootalooConfig; - use std::error::Error; use log::debug; use rusqlite::{Connection, params, OptionalExtension}; @@ -45,9 +43,9 @@ pub fn write_state(conn: &Connection, t: TweetToToot) -> Result<(), Box Result<(), Box> { +pub fn init_db(d: &str) -> Result<(), Box> { debug!("Initializing DB for Scootaloo"); - let conn = Connection::open(&config.db_path)?; + let conn = Connection::open(d)?; conn.execute( "CREATE TABLE IF NOT EXISTS tweet_to_toot ( @@ -70,36 +68,51 @@ mod tests { #[test] fn test_init_db() { - let scootaloo_config = ScootalooConfig { - db_path: String::from("/tmp/test_init_db.sqlite"), - cache_path: String::from("/tmp/scootaloo"), - }; + let d = "/tmp/test_init_db.sqlite"; - init_db(&scootaloo_config).unwrap(); + init_db(d).unwrap(); // check that file exist - assert!(Path::new(&scootaloo_config.db_path).exists()); + assert!(Path::new(d).exists()); // open said file - let conn = Connection::open(&scootaloo_config.db_path).unwrap(); + let conn = Connection::open(d).unwrap(); conn.execute( "SELECT * from tweet_to_toot;", [], ).unwrap(); - remove_file(scootaloo_config.db_path).unwrap(); + remove_file(d).unwrap(); + } + + #[test] + fn test_init_init_db() { + // init_db fn should be idempotent so let’s test that + let d = "/tmp/test_init_init_db.sqlite"; + + init_db(d).unwrap(); + + let conn = Connection::open(d).unwrap(); + + conn.execute( + "INSERT INTO tweet_to_toot + VALUES + (100, 'A');", + [], + ).unwrap(); + + init_db(d).unwrap(); + + remove_file(d).unwrap(); } #[test] fn test_write_state() { - let scootaloo_config = ScootalooConfig { - db_path: String::from("/tmp/test_write_state.sqlite"), - cache_path: String::from("/tmp/scootaloo"), - }; + let d = "/tmp/test_write_state.sqlite"; - init_db(&scootaloo_config).unwrap(); + init_db(d).unwrap(); - let conn = Connection::open(&scootaloo_config.db_path).unwrap(); + let conn = Connection::open(d).unwrap(); let t_in = TweetToToot { tweet_id: 123456789, @@ -120,19 +133,16 @@ mod tests { assert_eq!(t_out.tweet_id, 123456789); assert_eq!(t_out.toot_id, String::from("987654321")); - remove_file(&scootaloo_config.db_path).unwrap(); + remove_file(d).unwrap(); } #[test] fn test_none_to_tweet_id_read_state() { - let scootaloo_config = ScootalooConfig { - db_path: String::from("/tmp/test_none_to_tweet_id_read_state.sqlite"), - cache_path: String::from("/tmp/scootaloo"), - }; + let d = "/tmp/test_none_to_tweet_id_read_state.sqlite"; - init_db(&scootaloo_config).unwrap(); + init_db(d).unwrap(); - let conn = Connection::open(&scootaloo_config.db_path).unwrap(); + let conn = Connection::open(d).unwrap(); conn.execute( "INSERT INTO tweet_to_toot (tweet_id, toot_id) @@ -144,7 +154,7 @@ mod tests { let t_out = read_state(&conn, None).unwrap().unwrap(); - remove_file(&scootaloo_config.db_path).unwrap(); + remove_file(d).unwrap(); assert_eq!(t_out.tweet_id, 102); assert_eq!(t_out.toot_id, "B"); @@ -152,32 +162,26 @@ mod tests { #[test] fn test_none_to_none_read_state() { - let scootaloo_config = ScootalooConfig { - db_path: String::from("/tmp/test_none_to_none_read_state.sqlite"), - cache_path: String::from("/tmp/scootaloo"), - }; + let d = "/tmp/test_none_to_none_read_state.sqlite"; - init_db(&scootaloo_config).unwrap(); + init_db(d).unwrap(); - let conn = Connection::open(&scootaloo_config.db_path).unwrap(); + let conn = Connection::open(d).unwrap(); let t_out = read_state(&conn, None).unwrap(); - remove_file(&scootaloo_config.db_path).unwrap(); + remove_file(d).unwrap(); assert!(t_out.is_none()); } #[test] fn test_tweet_id_to_none_read_state() { - let scootaloo_config = ScootalooConfig { - db_path: String::from("/tmp/test_tweet_id_to_none_read_state.sqlite"), - cache_path: String::from("/tmp/scootaloo"), - }; + let d = "/tmp/test_tweet_id_to_none_read_state.sqlite"; - init_db(&scootaloo_config).unwrap(); + init_db(d).unwrap(); - let conn = Connection::open(&scootaloo_config.db_path).unwrap(); + let conn = Connection::open(d).unwrap(); conn.execute( "INSERT INTO tweet_to_toot (tweet_id, toot_id) @@ -188,21 +192,18 @@ mod tests { let t_out = read_state(&conn, Some(101)).unwrap(); - remove_file(&scootaloo_config.db_path).unwrap(); + remove_file(d).unwrap(); assert!(t_out.is_none()); } #[test] fn test_tweet_id_to_tweet_id_read_state() { - let scootaloo_config = ScootalooConfig { - db_path: String::from("/tmp/test_tweet_id_to_tweet_id_read_state.sqlite"), - cache_path: String::from("/tmp/scootaloo"), - }; + let d = "/tmp/test_tweet_id_to_tweet_id_read_state.sqlite"; - init_db(&scootaloo_config).unwrap(); + init_db(d).unwrap(); - let conn = Connection::open(&scootaloo_config.db_path).unwrap(); + let conn = Connection::open(d).unwrap(); conn.execute( "INSERT INTO tweet_to_toot (tweet_id, toot_id) @@ -213,7 +214,7 @@ mod tests { let t_out = read_state(&conn, Some(100)).unwrap().unwrap(); - remove_file(&scootaloo_config.db_path).unwrap(); + remove_file(d).unwrap(); assert_eq!(t_out.tweet_id, 100); assert_eq!(t_out.toot_id, "A"); From 22402f0f46a353deb345391df02ec2c99efd6c2a Mon Sep 17 00:00:00 2001 From: VC Date: Sun, 24 Apr 2022 11:01:46 +0200 Subject: [PATCH 08/12] refactor: optimize import and last_tweet_id var --- src/lib.rs | 5 +---- src/mastodon.rs | 1 - src/twitter.rs | 4 ---- src/util.rs | 5 ----- 4 files changed, 1 insertion(+), 14 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 779cb7e..bb50934 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,10 +33,7 @@ pub async fn run(config: Config) { // open the SQLite connection let conn = Connection::open(&config.scootaloo.db_path).unwrap(); // retrieve the last tweet ID for the username - let last_tweet_id = match read_state(&conn, None).unwrap() { - Some(i) => Some(i.tweet_id), - None => None, - }; + let last_tweet_id = read_state(&conn, None).unwrap().map(|s| s.tweet_id); // get OAuth2 token let token = get_oauth2_token(&config.twitter); diff --git a/src/mastodon.rs b/src/mastodon.rs index d2a51a1..55f319e 100644 --- a/src/mastodon.rs +++ b/src/mastodon.rs @@ -17,7 +17,6 @@ use elefren::{ scopes::Scopes, }; - /// Decodes the Twitter mention to something that will make sense once Twitter has joined the /// Fediverse fn twitter_mentions(ums: &Vec) -> HashMap { diff --git a/src/twitter.rs b/src/twitter.rs index 5821bb0..5475f68 100644 --- a/src/twitter.rs +++ b/src/twitter.rs @@ -1,12 +1,8 @@ -// auto-imports use crate::ScootalooError; use crate::config::TwitterConfig; use crate::util::cache_media; -// std use std::error::Error; - -// egg-mode use egg_mode::{ Token, KeyPair, diff --git a/src/util.rs b/src/util.rs index 7e220fe..f665882 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,11 +1,6 @@ -// std use std::error::Error; use crate::ScootalooError; - -// reqwest use reqwest::Url; - -// tokio use tokio::{ io::copy, fs::{File, create_dir_all}, From a90facae866ec7825f02509c982e9afc6d8396e7 Mon Sep 17 00:00:00 2001 From: VC Date: Sun, 24 Apr 2022 11:14:32 +0200 Subject: [PATCH 09/12] refactor: refactor run() fn to be more efficient/more clear --- src/lib.rs | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index bb50934..49f454c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,9 +31,16 @@ use rusqlite::Connection; #[tokio::main] pub async fn run(config: Config) { // open the SQLite connection - let conn = Connection::open(&config.scootaloo.db_path).unwrap(); + let conn = Connection::open(&config.scootaloo.db_path) + .unwrap_or_else(|e| + panic!("Something went wrong when opening the DB {}: {}", &config.scootaloo.db_path, e) + ); // retrieve the last tweet ID for the username - let last_tweet_id = read_state(&conn, None).unwrap().map(|s| s.tweet_id); + let last_tweet_id = read_state(&conn, None) + .unwrap_or_else(|e| + panic!("Cannot retrieve last_tweet_id: {}", e) + ) + .map(|s| s.tweet_id); // get OAuth2 token let token = get_oauth2_token(&config.twitter); @@ -68,11 +75,10 @@ pub async fn run(config: Config) { info!("Tweet is a direct response, skipping"); continue; } - - let searched_toot = read_state(&conn, tweet.in_reply_to_status_id).unwrap_or(None); - if let Some(i) = searched_toot { - toot_reply_id = Some(i.toot_id); - }; + info!("Tweet is a thread"); + toot_reply_id = read_state(&conn, tweet.in_reply_to_status_id) + .unwrap_or(None) + .map(|s| s.toot_id); }; // build basic status by just yielding text and dereferencing contained urls @@ -85,7 +91,6 @@ pub async fn run(config: Config) { }; let mut status_medias: Vec = vec![]; - // reupload the attachments if any if let Some(m) = &tweet.extended_entities { for media in &m.media { @@ -99,9 +104,11 @@ pub async fn run(config: Config) { let mastodon_media_ids = match mastodon.media(Cow::from(String::from(&local_tweet_media_path))) { Ok(m) => { - remove_file(&local_tweet_media_path).await.unwrap_or_else(|e| - warn!("Attachment for {} has been uploaded, but I’m unable to remove the existing file: {}", &local_tweet_media_path, e) - ); + remove_file(&local_tweet_media_path) + .await + .unwrap_or_else(|e| + warn!("Attachment for {} has been uploaded, but I’m unable to remove the existing file: {}", &local_tweet_media_path, e) + ); m.id }, Err(e) => { @@ -143,7 +150,7 @@ pub async fn run(config: Config) { toot_id: published_status.id, }; - // write the current state (tweet ID) to avoid copying it another time + // write the current state (tweet ID and toot ID) to avoid copying it another time write_state(&conn, ttt_towrite).unwrap_or_else(|e| panic!("Can’t write the last tweet retrieved: {}", e) ); From 6c0383d9d0b3f0770a13a661127b08ec45d234c3 Mon Sep 17 00:00:00 2001 From: VC Date: Sun, 24 Apr 2022 11:57:40 +0200 Subject: [PATCH 10/12] refactor: build better decode functions --- src/mastodon.rs | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/src/mastodon.rs b/src/mastodon.rs index 55f319e..ec079c3 100644 --- a/src/mastodon.rs +++ b/src/mastodon.rs @@ -20,27 +20,18 @@ use elefren::{ /// Decodes the Twitter mention to something that will make sense once Twitter has joined the /// Fediverse fn twitter_mentions(ums: &Vec) -> HashMap { - let mut decoded_mentions = HashMap::new(); - - for um in ums { - decoded_mentions.insert(format!("@{}", um.screen_name), format!("@{}@twitter.com", um.screen_name)); - } - - decoded_mentions + ums.iter().map(|s| + (format!("@{}", s.screen_name), format!("@{}@twitter.com", s.screen_name)) + ).collect() } /// Decodes urls from UrlEntities fn decode_urls(urls: &Vec) -> HashMap { - let mut decoded_urls = HashMap::new(); - - for url in urls { - if url.expanded_url.is_some() { - // unwrap is safe here as we just verified that there is something inside expanded_url - decoded_urls.insert(String::from(&url.url), String::from(url.expanded_url.as_deref().unwrap())); - } - } - - decoded_urls + urls.iter() + .filter(|s| s.expanded_url.is_some()) + .map(|s| + (String::from(&s.url), String::from(s.expanded_url.as_deref().unwrap())) + ).collect() } /// Gets Mastodon Data @@ -61,13 +52,11 @@ pub fn build_basic_status(tweet: &Tweet) -> Result> { let mut toot = String::from(&tweet.text); let decoded_urls = decode_urls(&tweet.entities.urls); - for decoded_url in decoded_urls { toot = toot.replace(&decoded_url.0, &decoded_url.1); } let decoded_mentions = twitter_mentions(&tweet.entities.user_mentions); - for decoded_mention in decoded_mentions { toot = toot.replace(&decoded_mention.0, &decoded_mention.1); } From 734f03f5a9afbf925ab86b41dd91377be2d17ff3 Mon Sep 17 00:00:00 2001 From: VC Date: Sun, 24 Apr 2022 14:06:46 +0200 Subject: [PATCH 11/12] feature: add test for build_basic_status() fn --- Cargo.lock | 18 +++++++++--- Cargo.toml | 3 +- src/lib.rs | 8 +---- src/mastodon.rs | 78 +++++++++++++++++++++++++++++++++++++++++-------- 4 files changed, 83 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3924ec5..5344a3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -880,10 +880,13 @@ dependencies = [ ] [[package]] -name = "htmlescape" -version = "0.3.1" +name = "html-escape" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" +checksum = "b8e7479fa1ef38eb49fb6a42c426be515df2d063f06cb8efd3e50af073dbc26c" +dependencies = [ + "utf8-width", +] [[package]] name = "http" @@ -2015,11 +2018,12 @@ dependencies = [ name = "scootaloo" version = "0.5.0" dependencies = [ + "chrono", "clap", "egg-mode", "elefren", "futures 0.3.14", - "htmlescape", + "html-escape", "log", "reqwest 0.11.3", "rusqlite", @@ -2744,6 +2748,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8-width" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" + [[package]] name = "uuid" version = "0.7.4" diff --git a/Cargo.toml b/Cargo.toml index 2aefd55..4de7cf6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +chrono = "^0.4" serde = { version = "1.0", features = ["derive"] } toml = "^0.5" clap = "^2.34" @@ -15,7 +16,7 @@ egg-mode = "^0.16" rusqlite = "^0.27" tokio = { version = "1", features = ["full"]} elefren = "^0.22" -htmlescape = "^0.3" +html-escape = "^0.2" reqwest = "^0.11" log = "^0.4" simple_logger = "^2.1" diff --git a/src/lib.rs b/src/lib.rs index 49f454c..4d98a9a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -82,13 +82,7 @@ pub async fn run(config: Config) { }; // build basic status by just yielding text and dereferencing contained urls - let mut status_text = match build_basic_status(tweet) { - Ok(t) => t, - Err(e) => { - error!("Could not create status from tweet {}: {}", tweet.id ,e); - continue; - }, - }; + let mut status_text = build_basic_status(tweet); let mut status_medias: Vec = vec![]; // reupload the attachments if any diff --git a/src/mastodon.rs b/src/mastodon.rs index ec079c3..5033728 100644 --- a/src/mastodon.rs +++ b/src/mastodon.rs @@ -2,11 +2,10 @@ use crate::config::MastodonConfig; use std::{ borrow::Cow, - error::Error, collections::HashMap, io::stdin, }; -use htmlescape::decode_html; +use html_escape::decode_html_entities; use egg_mode::{ tweet::Tweet, entities::{UrlEntity, MentionEntity}, @@ -48,24 +47,18 @@ pub fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon { } /// Builds toot text from tweet -pub fn build_basic_status(tweet: &Tweet) -> Result> { +pub fn build_basic_status(tweet: &Tweet) -> String { let mut toot = String::from(&tweet.text); - let decoded_urls = decode_urls(&tweet.entities.urls); - for decoded_url in decoded_urls { + for decoded_url in decode_urls(&tweet.entities.urls) { toot = toot.replace(&decoded_url.0, &decoded_url.1); } - let decoded_mentions = twitter_mentions(&tweet.entities.user_mentions); - for decoded_mention in decoded_mentions { + for decoded_mention in twitter_mentions(&tweet.entities.user_mentions) { toot = toot.replace(&decoded_mention.0, &decoded_mention.1); } - if let Ok(t) = decode_html(&toot) { - toot = t; - } - - Ok(toot) + decode_html_entities(&toot).to_string() } /// Generic register function @@ -101,6 +94,9 @@ pub fn register(host: &str) { mod tests { use super::*; + use egg_mode::tweet::TweetEntities; + use chrono::prelude::*; + #[test] fn test_twitter_mentions() { let mention_entity = MentionEntity { @@ -145,5 +141,63 @@ mod tests { assert_eq!(expected_urls, decoded_urls); } + + #[test] + fn test_build_basic_status() { + let t = Tweet { + coordinates: None, + created_at: Utc::now(), + current_user_retweet: None, + display_text_range: None, + entities: TweetEntities { + hashtags: vec![], + symbols: vec![], + urls: vec![ + UrlEntity { + display_url: "youtube.com/watch?v=w5TrSa…".to_string(), + expanded_url: Some("https://www.youtube.com/watch?v=w5TrSaoYmZ8".to_string()), + range: (93, 116), + url: "https://t.co/zXw0FfX2Nt".to_string(), + } + ], + user_mentions: vec![ + MentionEntity { + id: 491500016, + range: (80, 95), + name: "Nintendo France".to_string(), + screen_name: "NintendoFrance".to_string(), + } + ], + media: None, + }, + extended_entities: None, + favorite_count: 0, + favorited: None, + filter_level: None, + id: 1491541246984306693, + in_reply_to_user_id: None, + in_reply_to_screen_name: None, + in_reply_to_status_id: None, + lang: None, + place: None, + possibly_sensitive: None, + quoted_status: None, + quoted_status_id: None, + retweet_count: 0, + retweeted: None, + retweeted_status: None, + source: None, + text: "Mother 1 & 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance https://t.co/zXw0FfX2Nt".to_string(), + truncated: false, + user: None, + withheld_copyright: false, + withheld_in_countries: None, + withheld_scope: None, + }; + + let t_out = build_basic_status(&t); + + assert_eq!(&t_out, "Mother 1 & 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance@twitter.com https://www.youtube.com/watch?v=w5TrSaoYmZ8"); + } } From 905793af722d04d2a348b612c727215bfe002e24 Mon Sep 17 00:00:00 2001 From: VC Date: Sun, 24 Apr 2022 14:20:45 +0200 Subject: [PATCH 12/12] refactor(fmt): delete String::from() format in favor of .to_string()/to_owned() --- src/error.rs | 2 +- src/lib.rs | 2 +- src/mastodon.rs | 38 +++++++++++++++++++------------------- src/state.rs | 6 +++--- src/twitter.rs | 6 +++--- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/error.rs b/src/error.rs index 015294c..a44482c 100644 --- a/src/error.rs +++ b/src/error.rs @@ -8,7 +8,7 @@ pub struct ScootalooError { impl ScootalooError { pub fn new(msg: &str) -> ScootalooError { ScootalooError { - details: String::from(msg), + details: msg.to_string(), } } } diff --git a/src/lib.rs b/src/lib.rs index 4d98a9a..732d231 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -96,7 +96,7 @@ pub async fn run(config: Config) { }, }; - let mastodon_media_ids = match mastodon.media(Cow::from(String::from(&local_tweet_media_path))) { + let mastodon_media_ids = match mastodon.media(Cow::from(local_tweet_media_path.to_owned())) { Ok(m) => { remove_file(&local_tweet_media_path) .await diff --git a/src/mastodon.rs b/src/mastodon.rs index 5033728..e46e79a 100644 --- a/src/mastodon.rs +++ b/src/mastodon.rs @@ -29,18 +29,18 @@ fn decode_urls(urls: &Vec) -> HashMap { urls.iter() .filter(|s| s.expanded_url.is_some()) .map(|s| - (String::from(&s.url), String::from(s.expanded_url.as_deref().unwrap())) + (s.url.to_owned(), s.expanded_url.as_deref().unwrap().to_owned()) ).collect() } /// Gets Mastodon Data pub fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon { let data = Data { - base: Cow::from(String::from(&masto.base)), - client_id: Cow::from(String::from(&masto.client_id)), - client_secret: Cow::from(String::from(&masto.client_secret)), - redirect: Cow::from(String::from(&masto.redirect)), - token: Cow::from(String::from(&masto.token)), + base: Cow::from(masto.base.to_owned()), + client_id: Cow::from(masto.client_id.to_owned()), + client_secret: Cow::from(masto.client_secret.to_owned()), + redirect: Cow::from(masto.redirect.to_owned()), + token: Cow::from(masto.token.to_owned()), }; Mastodon::from(data) @@ -48,7 +48,7 @@ pub fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon { /// Builds toot text from tweet pub fn build_basic_status(tweet: &Tweet) -> String { - let mut toot = String::from(&tweet.text); + let mut toot = tweet.text.to_owned(); for decoded_url in decode_urls(&tweet.entities.urls) { toot = toot.replace(&decoded_url.0, &decoded_url.1); @@ -66,10 +66,10 @@ pub fn build_basic_status(tweet: &Tweet) -> String { /// Most of this function is a direct copy/paste of the official `elefren` crate pub fn register(host: &str) { let mut builder = App::builder(); - builder.client_name(Cow::from(String::from(env!("CARGO_PKG_NAME")))) - .redirect_uris(Cow::from(String::from("urn:ietf:wg:oauth:2.0:oob"))) + builder.client_name(Cow::from(env!("CARGO_PKG_NAME").to_string())) + .redirect_uris(Cow::from("urn:ietf:wg:oauth:2.0:oob".to_string())) .scopes(Scopes::write_all()) - .website(Cow::from(String::from("https://framagit.org/veretcle/scootaloo"))); + .website(Cow::from("https://framagit.org/veretcle/scootaloo".to_string())); let app = builder.build().expect("Cannot build the app"); @@ -102,14 +102,14 @@ mod tests { let mention_entity = MentionEntity { id: 12345, range: (1, 3), - name: String::from("Ta Mere l0l"), - screen_name: String::from("tamerelol"), + name: "Ta Mere l0l".to_string(), + screen_name: "tamerelol".to_string(), }; let twitter_ums = vec![mention_entity]; let mut expected_mentions = HashMap::new(); - expected_mentions.insert(String::from("@tamerelol"), String::from("@tamerelol@twitter.com")); + expected_mentions.insert("@tamerelol".to_string(), "@tamerelol@twitter.com".to_string()); let decoded_mentions = twitter_mentions(&twitter_ums); @@ -119,23 +119,23 @@ mod tests { #[test] fn test_decode_urls() { let url_entity1 = UrlEntity { - display_url: String::from("tamerelol"), - expanded_url: Some(String::from("https://www.nintendojo.fr/dojobar")), + display_url: "tamerelol".to_string(), + expanded_url: Some("https://www.nintendojo.fr/dojobar".to_string()), range: (1, 3), - url: String::from("https://t.me/tamerelol"), + url: "https://t.me/tamerelol".to_string(), }; let url_entity2 = UrlEntity { - display_url: String::from("tamerelol"), + display_url: "tamerelol".to_string(), expanded_url: None, range: (1, 3), - url: String::from("https://t.me/tamerelol"), + url: "https://t.me/tamerelol".to_string(), }; let twitter_urls = vec![url_entity1, url_entity2]; let mut expected_urls = HashMap::new(); - expected_urls.insert(String::from("https://t.me/tamerelol"), String::from("https://www.nintendojo.fr/dojobar")); + expected_urls.insert("https://t.me/tamerelol".to_string(), "https://www.nintendojo.fr/dojobar".to_string()); let decoded_urls = decode_urls(&twitter_urls); diff --git a/src/state.rs b/src/state.rs index 8b9b569..011a3ad 100644 --- a/src/state.rs +++ b/src/state.rs @@ -16,7 +16,7 @@ pub fn read_state(conn: &Connection, s: Option) -> Result query = format!("SELECT * FROM tweet_to_toot WHERE tweet_id = {}", i), - None => query = String::from("SELECT * FROM tweet_to_toot ORDER BY tweet_id DESC LIMIT 1"), + None => query = "SELECT * FROM tweet_to_toot ORDER BY tweet_id DESC LIMIT 1".to_string(), }; let mut stmt = conn.prepare(&query)?; @@ -116,7 +116,7 @@ mod tests { let t_in = TweetToToot { tweet_id: 123456789, - toot_id: String::from("987654321"), + toot_id: "987654321".to_string(), }; write_state(&conn, t_in).unwrap(); @@ -131,7 +131,7 @@ mod tests { }).unwrap(); assert_eq!(t_out.tweet_id, 123456789); - assert_eq!(t_out.toot_id, String::from("987654321")); + assert_eq!(t_out.toot_id, "987654321".to_string()); remove_file(d).unwrap(); } diff --git a/src/twitter.rs b/src/twitter.rs index 5475f68..cdeb66b 100644 --- a/src/twitter.rs +++ b/src/twitter.rs @@ -16,8 +16,8 @@ use egg_mode::{ /// Gets Twitter oauth2 token pub fn get_oauth2_token(config: &TwitterConfig) -> Token { - let con_token = KeyPair::new(String::from(&config.consumer_key), String::from(&config.consumer_secret)); - let access_token = KeyPair::new(String::from(&config.access_key), String::from(&config.access_secret)); + let con_token = KeyPair::new(config.consumer_key.to_owned(),config.consumer_secret.to_owned()); + let access_token = KeyPair::new(config.access_key.to_owned(), config.access_secret.to_owned()); Token::Access { consumer: con_token, @@ -28,7 +28,7 @@ pub fn get_oauth2_token(config: &TwitterConfig) -> Token { /// Gets Twitter user timeline pub async fn get_user_timeline(config: &TwitterConfig, token: Token, lid: Option) -> Result, Box> { // fix the page size to 200 as it is the maximum Twitter authorizes - let (_, feed) = user_timeline(UserID::from(String::from(&config.username)), true, false, &token) + let (_, feed) = user_timeline(UserID::from(config.username.to_owned()), true, false, &token) .with_page_size(200) .older(lid) .await?;