24 Commits

Author SHA1 Message Date
VC
11b629203b Merge branch 'bump_versions' into 'master'
Bump versions of all dependencies

See merge request veretcle/scootaloo!15
2022-04-22 08:31:36 +00:00
VC
16792e515a refactor(ci): refactor .gitlab-ci to add tests 2022-04-22 09:41:53 +02:00
VC
d228ceaaf6 refactor(dependencies): bump version of all dependencies 2022-04-22 09:41:48 +02:00
VC
bd7d4dbbb5 Merge branch 'fix/unused_struct' into 'master'
fix: remove unused struct

See merge request veretcle/scootaloo!13
2022-04-12 11:44:04 +00:00
VC
ff03b32f9d fix: remove unused struct 2022-04-12 12:55:40 +02:00
VC
533a40f2c2 Merge branch 'noasync' into 'master'
getting async in a reasonnable way

See merge request veretcle/scootaloo!12
2021-04-25 05:32:09 +00:00
VC
c301649d49 last correction 2022-04-12 12:03:49 +02:00
VC
fd9cc31848 Update src/lib.rs 2021-04-24 08:04:52 +00:00
VC
4ef58bda0a Deleted Cargo.lock 2021-04-24 07:58:52 +00:00
VC
912ee25c50 Merge branch 'noasync' of framagit.org:veretcle/scootaloo into noasync 2021-04-24 09:45:50 +02:00
VC
4f03a1a6f3 Adding Cargo.lock 2021-04-24 09:43:51 +02:00
VC
ac80b67c9f Merge branch 'master' into 'noasync'
# Conflicts:
#   .gitlab-ci.yml
#   Cargo.lock
#   Cargo.toml
#   src/lib.rs
2021-04-24 07:40:04 +00:00
VC
7aec8e0e33 adding strip 2021-04-24 09:34:37 +02:00
VC
f58edf3c75 Backporting changes on gitlab-ci.yml 2021-04-24 09:15:31 +02:00
VC
394ec5d1f3 I consider this a good compromise between all async (that works but is pretty complex and honestly a bit useless) and nothing async that is not the most optimal way to deal with things as reqwest and egg-mode are async by nature 2021-04-24 09:08:09 +02:00
VC
c10de76854 Adding logging facility 2021-04-24 09:07:06 +02:00
VC
020af69fe0 Adding the necessary bits and pieces to make things work better 2021-04-24 09:04:50 +02:00
VC
da808b0051 FUUUUUUUUUUUUUUU 2021-04-20 11:46:33 +02:00
VC
5a4dd5cb99 paths 2021-04-20 11:40:49 +02:00
VC
5b04bd27b9 Updating changelog 2021-04-20 11:39:43 +02:00
VC
c52fc52d23 Optimizing size of the final executable 2021-04-20 11:38:50 +02:00
VC
09ed837a1b Updating CHANGELOG to please @meduzen 2021-04-19 21:19:47 +02:00
VC
d4db2933ae Merge branch 'async_attempt2' into 'master'
Async version

See merge request veretcle/scootaloo!11
2021-04-18 17:00:45 +00:00
VC
2e052ebf6a Still a WIP: need to use async reqwest to respect the global context of usage (reqwest::blocking is using async inside so it does not really sync whatever) 2021-04-18 17:00:44 +00:00
7 changed files with 794 additions and 695 deletions

View File

@@ -1,10 +1,15 @@
---
stages:
- build
- build
rust-latest:
stage: build
image: rust:latest
script:
- cargo build --verbose
- cargo test --verbose
stage: build
artifacts:
paths:
- target/release/scootaloo
image: rust:latest
script:
- cargo test
- cargo build --release --verbose
- strip target/release/${CI_PROJECT_NAME}

View File

@@ -1,3 +1,23 @@
# v0.3.3
* optimizing the size of the final executable (now ⩽ 6MiB)
# v0.3.2
* 100% async version
* now media are download in parallel thanks to async
* log are introduced into code for your viewing pleasure
# v0.2.3
* using the async version of `reqwest`
* introducing async functions and make `tokio` the de facto executor for everything async
# v0.2.1
* using `tokio-compat` to avoid having 3 different versions of `tokio` in the same executable
* encapsulating async calls inside blocking tokio runtime calls
# v0.1.8
* fix #1: mentions are treated like decoded urls (this is not really needed to push it this far but it would be easier in case you want to modify it)

1283
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,25 +1,21 @@
[package]
name = "scootaloo"
version = "0.2.1"
version = "0.4.2"
authors = ["VC <veretcle+framagit@mateu.be>"]
edition = "2018"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
serde = { version = "1.0", features = ["derive"] }
toml = "^0.5"
clap = "^2.33"
tokio = { version = "1", features = ["rt-multi-thread"]}
tokio-compat-02 = "0.2"
egg-mode = "^0.15"
clap = "^2.34"
futures = "^0.3"
egg-mode = "^0.16"
tokio = { version = "1", features = ["full"]}
elefren = "^0.22"
reqwest = { version="^0.11", features = ["blocking"] }
htmlescape = "^0.3"
reqwest = "^0.11"
log = "^0.4"
simple_logger = "^2.1"

View File

@@ -3,7 +3,7 @@ A Twitter to Mastodon copy bot written in Rust
It:
* copies the content (text) of the original Tweet
* dereferences the links
* gets every attach media (photo, video or gif)
* gets every attached media (photo, video or gif)
If any of the last steps failed, the Toot gets published with the exact same text as the Tweet.
@@ -74,4 +74,3 @@ echo -n '8189881949849' > last_tweet
**This file should only contain the last tweet ID without any other char (no EOL or new line).**
Oh and everything is sync (and not async) so this does not run at a blazing speed…

View File

@@ -1,18 +1,13 @@
// std
use std::{
path::Path,
borrow::Cow,
collections::HashMap,
io::{stdin, copy},
io::stdin,
fmt,
fs::{read_to_string, write, create_dir_all, File, remove_file},
fs::{read_to_string, write},
error::Error,
};
//tokio
use tokio::runtime::Runtime;
use tokio_compat_02::FutureExt;
// toml
use serde::Deserialize;
@@ -37,34 +32,44 @@ use elefren::{
};
// reqwest
use reqwest::blocking::Client;
use reqwest::Url;
// tokio
use tokio::{
io::copy,
fs::{File, create_dir_all, remove_file},
};
// htmlescape
use htmlescape::decode_html;
// log
use log::{info, warn, error, debug};
/**********
* Generic usage functions
***********/
/*
* Those functions are related to the Twitter side of things
*/
/// Read last tweet id from a file
/// Reads last tweet id from a file
fn read_state(s: &str) -> Option<u64> {
let state = read_to_string(s);
if let Ok(s) = state {
debug!("Last Tweet ID (from file): {}", &s);
return s.parse::<u64>().ok();
}
None
}
/// Write last treated tweet id to a file
/// Writes last treated tweet id to a file
fn write_state(f: &str, s: u64) -> Result<(), std::io::Error> {
write(f, format!("{}", s))
}
/// Get twitter oauth2 token
/// Gets Twitter oauth2 token
fn get_oauth2_token(config: &Config) -> Token {
let con_token = KeyPair::new(String::from(&config.twitter.consumer_key), String::from(&config.twitter.consumer_secret));
let access_token = KeyPair::new(String::from(&config.twitter.access_key), String::from(&config.twitter.access_secret));
@@ -75,19 +80,18 @@ fn get_oauth2_token(config: &Config) -> Token {
}
}
/// Get twitter user timeline
fn get_user_timeline(config: &Config, token: Token, lid: Option<u64>) -> Result<Vec<Tweet>, Box<dyn Error>> {
/// Gets Twitter user timeline
async fn get_user_timeline(config: &Config, token: Token, lid: Option<u64>) -> Result<Vec<Tweet>, Box<dyn Error>> {
// fix the page size to 200 as it is the maximum Twitter authorizes
let rt = Runtime::new()?;
let (_timeline, feed) = rt.block_on(user_timeline(UserID::from(String::from(&config.twitter.username)), true, false, &token)
let (_, feed) = user_timeline(UserID::from(String::from(&config.twitter.username)), true, false, &token)
.with_page_size(200)
.older(lid)
.compat())?;
.await?;
Ok(feed.to_vec())
}
/// decode urls from UrlEntities
/// Decodes urls from UrlEntities
fn decode_urls(urls: &Vec<UrlEntity>) -> HashMap<String, String> {
let mut decoded_urls = HashMap::new();
@@ -101,6 +105,8 @@ fn decode_urls(urls: &Vec<UrlEntity>) -> HashMap<String, String> {
decoded_urls
}
/// Decodes the Twitter mention to something that will make sense once Twitter has joined the
/// Fediverse
fn twitter_mentions(ums: &Vec<MentionEntity>) -> HashMap<String, String> {
let mut decoded_mentions = HashMap::new();
@@ -111,24 +117,24 @@ fn twitter_mentions(ums: &Vec<MentionEntity>) -> HashMap<String, String> {
decoded_mentions
}
/// Retrieve a single media from a tweet and store it in a temporary file
fn get_tweet_media(m: &MediaEntity, t: &str) -> Result<String, Box<dyn Error>> {
/// Retrieves a single media from a tweet and store it in a temporary file
async fn get_tweet_media(m: &MediaEntity, t: &str) -> Result<String, Box<dyn Error>> {
match m.media_type {
MediaType::Photo => {
return cache_media(&m.media_url_https, t);
return cache_media(&m.media_url_https, t).await;
},
_ => {
match &m.video_info {
Some(v) => {
for variant in &v.variants {
if variant.content_type == "video/mp4" {
return cache_media(&variant.url, t);
return cache_media(&variant.url, t).await;
}
}
return Err(Box::new(ScootalooError::new(format!("Media Type for {} is video but no mp4 file URL is available", &m.url).as_str())));
return Err(ScootalooError::new(&format!("Media Type for {} is video but no mp4 file URL is available", &m.url)).into());
},
None => {
return Err(Box::new(ScootalooError::new(format!("Media Type for {} is video but does not contain any video_info", &m.url).as_str())));
return Err(ScootalooError::new(&format!("Media Type for {} is video but does not contain any video_info", &m.url)).into());
},
}
},
@@ -138,7 +144,7 @@ fn get_tweet_media(m: &MediaEntity, t: &str) -> Result<String, Box<dyn Error>> {
/*
* Those functions are related to the Mastodon side of things
*/
/// Get Mastodon Data
/// Gets Mastodon Data
fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
let data = Data {
base: Cow::from(String::from(&masto.base)),
@@ -151,7 +157,7 @@ fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
Mastodon::from(data)
}
/// build toot text from tweet
/// Builds toot text from tweet
fn build_basic_status(tweet: &Tweet) -> Result<String, Box<dyn Error>> {
let mut toot = String::from(&tweet.text);
@@ -177,31 +183,26 @@ fn build_basic_status(tweet: &Tweet) -> Result<String, Box<dyn Error>> {
/*
* Generic private functions
*/
fn cache_media(u: &str, t: &str) -> Result<String, Box<dyn Error>> {
/// Gets and caches Twitter Media inside the determined temp dir
async fn cache_media(u: &str, t: &str) -> Result<String, Box<dyn Error>> {
// create dir
if !Path::new(t).is_dir() {
create_dir_all(t)?;
}
create_dir_all(t).await?;
// get file
let client = Client::new();
let mut response = client.get(u).send()?;
let mut response = reqwest::get(u).await?;
// create local file
let dest_filename = match response.url()
.path_segments()
.and_then(|segments| segments.last()) {
Some(r) => r,
None => {
return Err(Box::new(ScootalooError::new(format!("Cannot determine the destination filename for {}", u).as_str())));
},
};
let url = Url::parse(u)?;
let dest_filename = url.path_segments().ok_or_else(|| ScootalooError::new(&format!("Cannot determine the destination filename for {}", u)))?
.last().ok_or_else(|| ScootalooError::new(&format!("Cannot determine the destination filename for {}", u)))?;
let dest_filepath = format!("{}/{}", t, dest_filename);
let mut dest_file = File::create(&dest_filepath)?;
let mut dest_file = File::create(&dest_filepath).await?;
copy(&mut response, &mut dest_file)?;
while let Some(chunk) = response.chunk().await? {
copy(&mut &*chunk, &mut dest_file).await?;
}
Ok(dest_filepath)
}
@@ -287,7 +288,7 @@ pub fn parse_toml(toml_file: &str) -> Config {
/// Generic register function
/// As this function is supposed to be run only once, it will panic for every error it encounters
/// Most of this function is a direct copy/paste of the official `mammut` crate
/// Most of this function is a direct copy/paste of the official `elefren` crate
pub fn register(host: &str) {
let mut builder = App::builder();
builder.client_name(Cow::from(String::from(env!("CARGO_PKG_NAME"))))
@@ -315,7 +316,8 @@ pub fn register(host: &str) {
}
/// This is where the magic happens
pub fn run(config: Config) {
#[tokio::main]
pub async fn run(config: Config) {
// retrieve the last tweet ID for the username
let last_tweet_id = read_state(&config.scootaloo.last_tweet_path);
@@ -326,13 +328,15 @@ pub fn run(config: Config) {
let mastodon = get_mastodon_token(&config.mastodon);
// get user timeline feed (Vec<tweet>)
let mut feed = get_user_timeline(&config, token, last_tweet_id).unwrap_or_else(|e|
let mut feed = get_user_timeline(&config, token, last_tweet_id)
.await
.unwrap_or_else(|e|
panic!("Something went wrong when trying to retrieve {}s timeline: {}", &config.twitter.username, e)
);
// empty feed -> exiting
if feed.is_empty() {
println!("Nothing to retrieve since last time, exiting…");
info!("Nothing to retrieve since last time, exiting…");
return;
}
@@ -340,10 +344,12 @@ pub fn run(config: Config) {
feed.reverse();
for tweet in &feed {
debug!("Treating Tweet {} inside feed", tweet.id);
// determine if the tweet is part of a thread (response to self) or a standard response
if let Some(r) = &tweet.in_reply_to_screen_name {
if &r.to_lowercase() != &config.twitter.username.to_lowercase() {
// we are responding not threading
info!("Tweet is a direct response, skipping");
continue;
}
};
@@ -352,33 +358,33 @@ pub fn run(config: Config) {
let mut status_text = match build_basic_status(tweet) {
Ok(t) => t,
Err(e) => {
println!("Could not create status from tweet {}: {}", tweet.id ,e);
error!("Could not create status from tweet {}: {}", tweet.id ,e);
continue;
},
};
let mut status_medias: Vec<String> = vec![];
// reupload the attachments if any
// reupload the attachments if any
if let Some(m) = &tweet.extended_entities {
for media in &m.media {
let local_tweet_media_path = match get_tweet_media(&media, &config.scootaloo.cache_path) {
let local_tweet_media_path = match get_tweet_media(&media, &config.scootaloo.cache_path).await {
Ok(m) => m,
Err(e) => {
println!("Cannot get tweet media for {}: {}", &media.url, e);
error!("Cannot get tweet media for {}: {}", &media.url, e);
continue;
},
};
let mastodon_media_ids = match mastodon.media(Cow::from(String::from(&local_tweet_media_path))) {
Ok(m) => {
remove_file(&local_tweet_media_path).unwrap_or_else(|e|
println!("Attachment for {} has been upload, but Im unable to remove the existing file: {}", &local_tweet_media_path, e)
remove_file(&local_tweet_media_path).await.unwrap_or_else(|e|
warn!("Attachment for {} has been uploaded, but Im unable to remove the existing file: {}", &local_tweet_media_path, e)
);
m.id
},
Err(e) => {
println!("Cannot attach media {} to Mastodon Instance: {}", &local_tweet_media_path, e);
error!("Attachment {} cannot be uploaded to Mastodon Instance: {}", &local_tweet_media_path, e);
continue;
}
};
@@ -389,14 +395,17 @@ pub fn run(config: Config) {
status_text = status_text.replace(&media.url, "");
}
}
// finished reuploading attachments, now lets do the toot baby!
debug!("Building corresponding Mastodon status");
let status = StatusBuilder::new()
.status(&status_text)
.media_ids(status_medias)
.build()
.expect(format!("Cannot build status with text {}", &status_text).as_str());
.expect(&format!("Cannot build status with text {}", &status_text));
// publish status
// again unwrap is safe here as we are in the main thread
mastodon.new_status(status).unwrap();
// this will panic if it cannot publish the status, which is a good thing, it allows the
// last_tweet gathered not to be written

View File

@@ -4,6 +4,13 @@ use scootaloo::*;
// clap
use clap::{App, Arg, SubCommand};
// log
use log::{LevelFilter, error};
use simple_logger::SimpleLogger;
// std
use std::str::FromStr;
fn main() {
let matches = App::new(env!("CARGO_PKG_NAME"))
.version(env!("CARGO_PKG_VERSION"))
@@ -15,6 +22,13 @@ fn main() {
.help("TOML config file for scootaloo (default /usr/local/etc/scootaloo.toml)")
.takes_value(true)
.display_order(1))
.arg(Arg::with_name("log_level")
.short("l")
.long("loglevel")
.value_name("LOGLEVEL")
.help("Log level.Valid values are: Off, Warn, Error, Info, Debug")
.takes_value(true)
.display_order(2))
.subcommand(SubCommand::with_name("register")
.version(env!("CARGO_PKG_VERSION"))
.about("Command to register to a Mastodon Instance")
@@ -32,7 +46,18 @@ fn main() {
return;
}
if matches.is_present("log_level") {
match LevelFilter::from_str(matches.value_of("log_level").unwrap()) {
Ok(level) => { SimpleLogger::new().with_level(level).init().unwrap()},
Err(e) => {
SimpleLogger::new().with_level(LevelFilter::Error).init().unwrap();
error!("Unknown log level filter: {}", e);
}
};
}
let config = parse_toml(matches.value_of("config").unwrap_or("/usr/local/etc/scootaloo.toml"));
run(config);
}