mirror of
https://framagit.org/veretcle/scootaloo.git
synced 2025-07-20 17:11:19 +02:00
Compare commits
24 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
11b629203b | ||
![]() |
16792e515a | ||
![]() |
d228ceaaf6 | ||
![]() |
bd7d4dbbb5 | ||
![]() |
ff03b32f9d | ||
![]() |
533a40f2c2 | ||
![]() |
c301649d49 | ||
![]() |
fd9cc31848 | ||
![]() |
4ef58bda0a | ||
![]() |
912ee25c50 | ||
![]() |
4f03a1a6f3 | ||
![]() |
ac80b67c9f | ||
![]() |
7aec8e0e33 | ||
![]() |
f58edf3c75 | ||
![]() |
394ec5d1f3 | ||
![]() |
c10de76854 | ||
![]() |
020af69fe0 | ||
![]() |
da808b0051 | ||
![]() |
5a4dd5cb99 | ||
![]() |
5b04bd27b9 | ||
![]() |
c52fc52d23 | ||
![]() |
09ed837a1b | ||
![]() |
d4db2933ae | ||
![]() |
2e052ebf6a |
@@ -1,10 +1,15 @@
|
||||
---
|
||||
|
||||
stages:
|
||||
- build
|
||||
- build
|
||||
|
||||
rust-latest:
|
||||
stage: build
|
||||
image: rust:latest
|
||||
script:
|
||||
- cargo build --verbose
|
||||
- cargo test --verbose
|
||||
|
||||
stage: build
|
||||
artifacts:
|
||||
paths:
|
||||
- target/release/scootaloo
|
||||
image: rust:latest
|
||||
script:
|
||||
- cargo test
|
||||
- cargo build --release --verbose
|
||||
- strip target/release/${CI_PROJECT_NAME}
|
||||
|
20
CHANGELOG
20
CHANGELOG
@@ -1,3 +1,23 @@
|
||||
# v0.3.3
|
||||
|
||||
* optimizing the size of the final executable (now ⩽ 6MiB)
|
||||
|
||||
# v0.3.2
|
||||
|
||||
* 100% async version
|
||||
* now media are download in parallel thanks to async
|
||||
* log are introduced into code for your viewing pleasure
|
||||
|
||||
# v0.2.3
|
||||
|
||||
* using the async version of `reqwest`
|
||||
* introducing async functions and make `tokio` the de facto executor for everything async
|
||||
|
||||
# v0.2.1
|
||||
|
||||
* using `tokio-compat` to avoid having 3 different versions of `tokio` in the same executable
|
||||
* encapsulating async calls inside blocking tokio runtime calls
|
||||
|
||||
# v0.1.8
|
||||
|
||||
* fix #1: mentions are treated like decoded urls (this is not really needed to push it this far but it would be easier in case you want to modify it)
|
||||
|
1283
Cargo.lock
generated
1283
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
24
Cargo.toml
24
Cargo.toml
@@ -1,25 +1,21 @@
|
||||
[package]
|
||||
name = "scootaloo"
|
||||
version = "0.2.1"
|
||||
version = "0.4.2"
|
||||
authors = ["VC <veretcle+framagit@mateu.be>"]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
|
||||
toml = "^0.5"
|
||||
|
||||
clap = "^2.33"
|
||||
|
||||
tokio = { version = "1", features = ["rt-multi-thread"]}
|
||||
tokio-compat-02 = "0.2"
|
||||
|
||||
egg-mode = "^0.15"
|
||||
|
||||
clap = "^2.34"
|
||||
futures = "^0.3"
|
||||
egg-mode = "^0.16"
|
||||
tokio = { version = "1", features = ["full"]}
|
||||
elefren = "^0.22"
|
||||
|
||||
reqwest = { version="^0.11", features = ["blocking"] }
|
||||
|
||||
htmlescape = "^0.3"
|
||||
reqwest = "^0.11"
|
||||
log = "^0.4"
|
||||
simple_logger = "^2.1"
|
||||
|
||||
|
@@ -3,7 +3,7 @@ A Twitter to Mastodon copy bot written in Rust
|
||||
It:
|
||||
* copies the content (text) of the original Tweet
|
||||
* dereferences the links
|
||||
* gets every attach media (photo, video or gif)
|
||||
* gets every attached media (photo, video or gif)
|
||||
|
||||
If any of the last steps failed, the Toot gets published with the exact same text as the Tweet.
|
||||
|
||||
@@ -74,4 +74,3 @@ echo -n '8189881949849' > last_tweet
|
||||
|
||||
**This file should only contain the last tweet ID without any other char (no EOL or new line).**
|
||||
|
||||
Oh and everything is sync (and not async) so this does not run at a blazing speed…
|
||||
|
115
src/lib.rs
115
src/lib.rs
@@ -1,18 +1,13 @@
|
||||
// std
|
||||
use std::{
|
||||
path::Path,
|
||||
borrow::Cow,
|
||||
collections::HashMap,
|
||||
io::{stdin, copy},
|
||||
io::stdin,
|
||||
fmt,
|
||||
fs::{read_to_string, write, create_dir_all, File, remove_file},
|
||||
fs::{read_to_string, write},
|
||||
error::Error,
|
||||
};
|
||||
|
||||
//tokio
|
||||
use tokio::runtime::Runtime;
|
||||
use tokio_compat_02::FutureExt;
|
||||
|
||||
// toml
|
||||
use serde::Deserialize;
|
||||
|
||||
@@ -37,34 +32,44 @@ use elefren::{
|
||||
};
|
||||
|
||||
// reqwest
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::Url;
|
||||
|
||||
// tokio
|
||||
use tokio::{
|
||||
io::copy,
|
||||
fs::{File, create_dir_all, remove_file},
|
||||
};
|
||||
|
||||
// htmlescape
|
||||
use htmlescape::decode_html;
|
||||
|
||||
// log
|
||||
use log::{info, warn, error, debug};
|
||||
|
||||
/**********
|
||||
* Generic usage functions
|
||||
***********/
|
||||
/*
|
||||
* Those functions are related to the Twitter side of things
|
||||
*/
|
||||
/// Read last tweet id from a file
|
||||
/// Reads last tweet id from a file
|
||||
fn read_state(s: &str) -> Option<u64> {
|
||||
let state = read_to_string(s);
|
||||
|
||||
if let Ok(s) = state {
|
||||
debug!("Last Tweet ID (from file): {}", &s);
|
||||
return s.parse::<u64>().ok();
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Write last treated tweet id to a file
|
||||
/// Writes last treated tweet id to a file
|
||||
fn write_state(f: &str, s: u64) -> Result<(), std::io::Error> {
|
||||
write(f, format!("{}", s))
|
||||
}
|
||||
|
||||
/// Get twitter oauth2 token
|
||||
/// Gets Twitter oauth2 token
|
||||
fn get_oauth2_token(config: &Config) -> Token {
|
||||
let con_token = KeyPair::new(String::from(&config.twitter.consumer_key), String::from(&config.twitter.consumer_secret));
|
||||
let access_token = KeyPair::new(String::from(&config.twitter.access_key), String::from(&config.twitter.access_secret));
|
||||
@@ -75,19 +80,18 @@ fn get_oauth2_token(config: &Config) -> Token {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get twitter user timeline
|
||||
fn get_user_timeline(config: &Config, token: Token, lid: Option<u64>) -> Result<Vec<Tweet>, Box<dyn Error>> {
|
||||
/// Gets Twitter user timeline
|
||||
async fn get_user_timeline(config: &Config, token: Token, lid: Option<u64>) -> Result<Vec<Tweet>, Box<dyn Error>> {
|
||||
// fix the page size to 200 as it is the maximum Twitter authorizes
|
||||
let rt = Runtime::new()?;
|
||||
let (_timeline, feed) = rt.block_on(user_timeline(UserID::from(String::from(&config.twitter.username)), true, false, &token)
|
||||
let (_, feed) = user_timeline(UserID::from(String::from(&config.twitter.username)), true, false, &token)
|
||||
.with_page_size(200)
|
||||
.older(lid)
|
||||
.compat())?;
|
||||
.await?;
|
||||
|
||||
Ok(feed.to_vec())
|
||||
}
|
||||
|
||||
/// decode urls from UrlEntities
|
||||
/// Decodes urls from UrlEntities
|
||||
fn decode_urls(urls: &Vec<UrlEntity>) -> HashMap<String, String> {
|
||||
let mut decoded_urls = HashMap::new();
|
||||
|
||||
@@ -101,6 +105,8 @@ fn decode_urls(urls: &Vec<UrlEntity>) -> HashMap<String, String> {
|
||||
decoded_urls
|
||||
}
|
||||
|
||||
/// Decodes the Twitter mention to something that will make sense once Twitter has joined the
|
||||
/// Fediverse
|
||||
fn twitter_mentions(ums: &Vec<MentionEntity>) -> HashMap<String, String> {
|
||||
let mut decoded_mentions = HashMap::new();
|
||||
|
||||
@@ -111,24 +117,24 @@ fn twitter_mentions(ums: &Vec<MentionEntity>) -> HashMap<String, String> {
|
||||
decoded_mentions
|
||||
}
|
||||
|
||||
/// Retrieve a single media from a tweet and store it in a temporary file
|
||||
fn get_tweet_media(m: &MediaEntity, t: &str) -> Result<String, Box<dyn Error>> {
|
||||
/// Retrieves a single media from a tweet and store it in a temporary file
|
||||
async fn get_tweet_media(m: &MediaEntity, t: &str) -> Result<String, Box<dyn Error>> {
|
||||
match m.media_type {
|
||||
MediaType::Photo => {
|
||||
return cache_media(&m.media_url_https, t);
|
||||
return cache_media(&m.media_url_https, t).await;
|
||||
},
|
||||
_ => {
|
||||
match &m.video_info {
|
||||
Some(v) => {
|
||||
for variant in &v.variants {
|
||||
if variant.content_type == "video/mp4" {
|
||||
return cache_media(&variant.url, t);
|
||||
return cache_media(&variant.url, t).await;
|
||||
}
|
||||
}
|
||||
return Err(Box::new(ScootalooError::new(format!("Media Type for {} is video but no mp4 file URL is available", &m.url).as_str())));
|
||||
return Err(ScootalooError::new(&format!("Media Type for {} is video but no mp4 file URL is available", &m.url)).into());
|
||||
},
|
||||
None => {
|
||||
return Err(Box::new(ScootalooError::new(format!("Media Type for {} is video but does not contain any video_info", &m.url).as_str())));
|
||||
return Err(ScootalooError::new(&format!("Media Type for {} is video but does not contain any video_info", &m.url)).into());
|
||||
},
|
||||
}
|
||||
},
|
||||
@@ -138,7 +144,7 @@ fn get_tweet_media(m: &MediaEntity, t: &str) -> Result<String, Box<dyn Error>> {
|
||||
/*
|
||||
* Those functions are related to the Mastodon side of things
|
||||
*/
|
||||
/// Get Mastodon Data
|
||||
/// Gets Mastodon Data
|
||||
fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
|
||||
let data = Data {
|
||||
base: Cow::from(String::from(&masto.base)),
|
||||
@@ -151,7 +157,7 @@ fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
|
||||
Mastodon::from(data)
|
||||
}
|
||||
|
||||
/// build toot text from tweet
|
||||
/// Builds toot text from tweet
|
||||
fn build_basic_status(tweet: &Tweet) -> Result<String, Box<dyn Error>> {
|
||||
let mut toot = String::from(&tweet.text);
|
||||
|
||||
@@ -177,31 +183,26 @@ fn build_basic_status(tweet: &Tweet) -> Result<String, Box<dyn Error>> {
|
||||
/*
|
||||
* Generic private functions
|
||||
*/
|
||||
fn cache_media(u: &str, t: &str) -> Result<String, Box<dyn Error>> {
|
||||
/// Gets and caches Twitter Media inside the determined temp dir
|
||||
async fn cache_media(u: &str, t: &str) -> Result<String, Box<dyn Error>> {
|
||||
// create dir
|
||||
if !Path::new(t).is_dir() {
|
||||
create_dir_all(t)?;
|
||||
}
|
||||
create_dir_all(t).await?;
|
||||
|
||||
// get file
|
||||
let client = Client::new();
|
||||
let mut response = client.get(u).send()?;
|
||||
let mut response = reqwest::get(u).await?;
|
||||
|
||||
// create local file
|
||||
let dest_filename = match response.url()
|
||||
.path_segments()
|
||||
.and_then(|segments| segments.last()) {
|
||||
Some(r) => r,
|
||||
None => {
|
||||
return Err(Box::new(ScootalooError::new(format!("Cannot determine the destination filename for {}", u).as_str())));
|
||||
},
|
||||
};
|
||||
let url = Url::parse(u)?;
|
||||
let dest_filename = url.path_segments().ok_or_else(|| ScootalooError::new(&format!("Cannot determine the destination filename for {}", u)))?
|
||||
.last().ok_or_else(|| ScootalooError::new(&format!("Cannot determine the destination filename for {}", u)))?;
|
||||
|
||||
let dest_filepath = format!("{}/{}", t, dest_filename);
|
||||
|
||||
let mut dest_file = File::create(&dest_filepath)?;
|
||||
let mut dest_file = File::create(&dest_filepath).await?;
|
||||
|
||||
copy(&mut response, &mut dest_file)?;
|
||||
while let Some(chunk) = response.chunk().await? {
|
||||
copy(&mut &*chunk, &mut dest_file).await?;
|
||||
}
|
||||
|
||||
Ok(dest_filepath)
|
||||
}
|
||||
@@ -287,7 +288,7 @@ pub fn parse_toml(toml_file: &str) -> Config {
|
||||
|
||||
/// Generic register function
|
||||
/// As this function is supposed to be run only once, it will panic for every error it encounters
|
||||
/// Most of this function is a direct copy/paste of the official `mammut` crate
|
||||
/// Most of this function is a direct copy/paste of the official `elefren` crate
|
||||
pub fn register(host: &str) {
|
||||
let mut builder = App::builder();
|
||||
builder.client_name(Cow::from(String::from(env!("CARGO_PKG_NAME"))))
|
||||
@@ -315,7 +316,8 @@ pub fn register(host: &str) {
|
||||
}
|
||||
|
||||
/// This is where the magic happens
|
||||
pub fn run(config: Config) {
|
||||
#[tokio::main]
|
||||
pub async fn run(config: Config) {
|
||||
// retrieve the last tweet ID for the username
|
||||
let last_tweet_id = read_state(&config.scootaloo.last_tweet_path);
|
||||
|
||||
@@ -326,13 +328,15 @@ pub fn run(config: Config) {
|
||||
let mastodon = get_mastodon_token(&config.mastodon);
|
||||
|
||||
// get user timeline feed (Vec<tweet>)
|
||||
let mut feed = get_user_timeline(&config, token, last_tweet_id).unwrap_or_else(|e|
|
||||
let mut feed = get_user_timeline(&config, token, last_tweet_id)
|
||||
.await
|
||||
.unwrap_or_else(|e|
|
||||
panic!("Something went wrong when trying to retrieve {}’s timeline: {}", &config.twitter.username, e)
|
||||
);
|
||||
|
||||
// empty feed -> exiting
|
||||
if feed.is_empty() {
|
||||
println!("Nothing to retrieve since last time, exiting…");
|
||||
info!("Nothing to retrieve since last time, exiting…");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -340,10 +344,12 @@ pub fn run(config: Config) {
|
||||
feed.reverse();
|
||||
|
||||
for tweet in &feed {
|
||||
debug!("Treating Tweet {} inside feed", tweet.id);
|
||||
// determine if the tweet is part of a thread (response to self) or a standard response
|
||||
if let Some(r) = &tweet.in_reply_to_screen_name {
|
||||
if &r.to_lowercase() != &config.twitter.username.to_lowercase() {
|
||||
// we are responding not threading
|
||||
info!("Tweet is a direct response, skipping");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
@@ -352,33 +358,33 @@ pub fn run(config: Config) {
|
||||
let mut status_text = match build_basic_status(tweet) {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
println!("Could not create status from tweet {}: {}", tweet.id ,e);
|
||||
error!("Could not create status from tweet {}: {}", tweet.id ,e);
|
||||
continue;
|
||||
},
|
||||
};
|
||||
|
||||
let mut status_medias: Vec<String> = vec![];
|
||||
|
||||
// reupload the attachments if any
|
||||
// reupload the attachments if any
|
||||
if let Some(m) = &tweet.extended_entities {
|
||||
for media in &m.media {
|
||||
let local_tweet_media_path = match get_tweet_media(&media, &config.scootaloo.cache_path) {
|
||||
let local_tweet_media_path = match get_tweet_media(&media, &config.scootaloo.cache_path).await {
|
||||
Ok(m) => m,
|
||||
Err(e) => {
|
||||
println!("Cannot get tweet media for {}: {}", &media.url, e);
|
||||
error!("Cannot get tweet media for {}: {}", &media.url, e);
|
||||
continue;
|
||||
},
|
||||
};
|
||||
|
||||
let mastodon_media_ids = match mastodon.media(Cow::from(String::from(&local_tweet_media_path))) {
|
||||
Ok(m) => {
|
||||
remove_file(&local_tweet_media_path).unwrap_or_else(|e|
|
||||
println!("Attachment for {} has been upload, but I’m unable to remove the existing file: {}", &local_tweet_media_path, e)
|
||||
remove_file(&local_tweet_media_path).await.unwrap_or_else(|e|
|
||||
warn!("Attachment for {} has been uploaded, but I’m unable to remove the existing file: {}", &local_tweet_media_path, e)
|
||||
);
|
||||
m.id
|
||||
},
|
||||
Err(e) => {
|
||||
println!("Cannot attach media {} to Mastodon Instance: {}", &local_tweet_media_path, e);
|
||||
error!("Attachment {} cannot be uploaded to Mastodon Instance: {}", &local_tweet_media_path, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
@@ -389,14 +395,17 @@ pub fn run(config: Config) {
|
||||
status_text = status_text.replace(&media.url, "");
|
||||
}
|
||||
}
|
||||
// finished reuploading attachments, now let’s do the toot baby!
|
||||
|
||||
debug!("Building corresponding Mastodon status");
|
||||
let status = StatusBuilder::new()
|
||||
.status(&status_text)
|
||||
.media_ids(status_medias)
|
||||
.build()
|
||||
.expect(format!("Cannot build status with text {}", &status_text).as_str());
|
||||
.expect(&format!("Cannot build status with text {}", &status_text));
|
||||
|
||||
// publish status
|
||||
// again unwrap is safe here as we are in the main thread
|
||||
mastodon.new_status(status).unwrap();
|
||||
// this will panic if it cannot publish the status, which is a good thing, it allows the
|
||||
// last_tweet gathered not to be written
|
||||
|
25
src/main.rs
25
src/main.rs
@@ -4,6 +4,13 @@ use scootaloo::*;
|
||||
// clap
|
||||
use clap::{App, Arg, SubCommand};
|
||||
|
||||
// log
|
||||
use log::{LevelFilter, error};
|
||||
use simple_logger::SimpleLogger;
|
||||
|
||||
// std
|
||||
use std::str::FromStr;
|
||||
|
||||
fn main() {
|
||||
let matches = App::new(env!("CARGO_PKG_NAME"))
|
||||
.version(env!("CARGO_PKG_VERSION"))
|
||||
@@ -15,6 +22,13 @@ fn main() {
|
||||
.help("TOML config file for scootaloo (default /usr/local/etc/scootaloo.toml)")
|
||||
.takes_value(true)
|
||||
.display_order(1))
|
||||
.arg(Arg::with_name("log_level")
|
||||
.short("l")
|
||||
.long("loglevel")
|
||||
.value_name("LOGLEVEL")
|
||||
.help("Log level. Valid values are: Off, Warn, Error, Info, Debug")
|
||||
.takes_value(true)
|
||||
.display_order(2))
|
||||
.subcommand(SubCommand::with_name("register")
|
||||
.version(env!("CARGO_PKG_VERSION"))
|
||||
.about("Command to register to a Mastodon Instance")
|
||||
@@ -32,7 +46,18 @@ fn main() {
|
||||
return;
|
||||
}
|
||||
|
||||
if matches.is_present("log_level") {
|
||||
match LevelFilter::from_str(matches.value_of("log_level").unwrap()) {
|
||||
Ok(level) => { SimpleLogger::new().with_level(level).init().unwrap()},
|
||||
Err(e) => {
|
||||
SimpleLogger::new().with_level(LevelFilter::Error).init().unwrap();
|
||||
error!("Unknown log level filter: {}", e);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
let config = parse_toml(matches.value_of("config").unwrap_or("/usr/local/etc/scootaloo.toml"));
|
||||
|
||||
run(config);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user