17 Commits

Author SHA1 Message Date
VC
13039f5e76 Merge branch 'noasync' into 'master'
getting async in a reasonnable way

See merge request veretcle/scootaloo!12
2021-04-25 05:32:09 +00:00
Clément VERET
59005e8aee last correction 2021-04-25 07:28:17 +02:00
VC
fd9cc31848 Update src/lib.rs 2021-04-24 08:04:52 +00:00
VC
4ef58bda0a Deleted Cargo.lock 2021-04-24 07:58:52 +00:00
VC
912ee25c50 Merge branch 'noasync' of framagit.org:veretcle/scootaloo into noasync 2021-04-24 09:45:50 +02:00
VC
4f03a1a6f3 Adding Cargo.lock 2021-04-24 09:43:51 +02:00
VC
ac80b67c9f Merge branch 'master' into 'noasync'
# Conflicts:
#   .gitlab-ci.yml
#   Cargo.lock
#   Cargo.toml
#   src/lib.rs
2021-04-24 07:40:04 +00:00
VC
7aec8e0e33 adding strip 2021-04-24 09:34:37 +02:00
VC
f58edf3c75 Backporting changes on gitlab-ci.yml 2021-04-24 09:15:31 +02:00
VC
394ec5d1f3 I consider this a good compromise between all async (that works but is pretty complex and honestly a bit useless) and nothing async that is not the most optimal way to deal with things as reqwest and egg-mode are async by nature 2021-04-24 09:08:09 +02:00
VC
c10de76854 Adding logging facility 2021-04-24 09:07:06 +02:00
VC
020af69fe0 Adding the necessary bits and pieces to make things work better 2021-04-24 09:04:50 +02:00
VC
da808b0051 FUUUUUUUUUUUUUUU 2021-04-20 11:46:33 +02:00
VC
5a4dd5cb99 paths 2021-04-20 11:40:49 +02:00
VC
5b04bd27b9 Updating changelog 2021-04-20 11:39:43 +02:00
VC
c52fc52d23 Optimizing size of the final executable 2021-04-20 11:38:50 +02:00
VC
09ed837a1b Updating CHANGELOG to please @meduzen 2021-04-19 21:19:47 +02:00
5 changed files with 516 additions and 694 deletions

View File

@@ -3,10 +3,11 @@ stages:
rust-latest:
stage: build
artifacts:
paths:
- target/release/scootaloo
image: rust:latest
script:
- cargo build --verbose
- cargo build --release --verbose
- strip target/release/${CI_PROJECT_NAME}
- du -h target/release/${CI_PROJECT_NAME}

View File

@@ -1,3 +1,23 @@
# v0.3.3
* optimizing the size of the final executable (now ⩽ 6MiB)
# v0.3.2
* 100% async version
* now media are download in parallel thanks to async
* log are introduced into code for your viewing pleasure
# v0.2.3
* using the async version of `reqwest`
* introducing async functions and make `tokio` the de facto executor for everything async
# v0.2.1
* using `tokio-compat` to avoid having 3 different versions of `tokio` in the same executable
* encapsulating async calls inside blocking tokio runtime calls
# v0.1.8
* fix #1: mentions are treated like decoded urls (this is not really needed to push it this far but it would be easier in case you want to modify it)

1089
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
[package]
name = "scootaloo"
version = "0.3.2"
version = "0.4.0"
authors = ["VC <veretcle+framagit@mateu.be>"]
edition = "2018"
@@ -10,10 +10,12 @@ edition = "2018"
serde = { version = "1.0", features = ["derive"] }
toml = "^0.5"
clap = "^2.33"
futures = "^0.3"
egg-mode = { git = "https://github.com/egg-mode-rs/egg-mode", rev = "6b81073eba9c3b123ca0e80bdb5ef61d1758f131" }
elefren = "^0.22"
tokio = { version = "1", features = ["full"]}
reqwest = "^0.11"
elefren = "^0.20"
htmlescape = "^0.3"
reqwest = "^0.11"
log = "^0.4"
simple_logger = "^1.11"

View File

@@ -6,7 +6,6 @@ use std::{
fmt,
fs::{read_to_string, write},
error::Error,
sync::{Arc, Mutex},
};
// toml
@@ -39,7 +38,6 @@ use reqwest::Url;
use tokio::{
io::copy,
fs::{File, create_dir_all, remove_file},
sync::mpsc,
};
// htmlescape
@@ -133,10 +131,10 @@ async fn get_tweet_media(m: &MediaEntity, t: &str) -> Result<String, Box<dyn Err
return cache_media(&variant.url, t).await;
}
}
return Err(Box::new(ScootalooError::new(format!("Media Type for {} is video but no mp4 file URL is available", &m.url).as_str())));
return Err(ScootalooError::new(&format!("Media Type for {} is video but no mp4 file URL is available", &m.url)).into());
},
None => {
return Err(Box::new(ScootalooError::new(format!("Media Type for {} is video but does not contain any video_info", &m.url).as_str())));
return Err(ScootalooError::new(&format!("Media Type for {} is video but does not contain any video_info", &m.url)).into());
},
}
},
@@ -185,7 +183,6 @@ fn build_basic_status(tweet: &Tweet) -> Result<String, Box<dyn Error>> {
/*
* Generic private functions
*/
/// Gets and caches Twitter Media inside the determined temp dir
async fn cache_media(u: &str, t: &str) -> Result<String, Box<dyn Error>> {
// create dir
@@ -196,8 +193,8 @@ async fn cache_media(u: &str, t: &str) -> Result<String, Box<dyn Error>> {
// create local file
let url = Url::parse(u)?;
let dest_filename = url.path_segments().ok_or_else(|| Box::new(ScootalooError::new(format!("Cannot determine the destination filename for {}", u).as_str())))?
.last().ok_or_else(|| Box::new(ScootalooError::new(format!("Cannot determine the destination filename for {}", u).as_str())))?;
let dest_filename = url.path_segments().ok_or_else(|| ScootalooError::new(&format!("Cannot determine the destination filename for {}", u)))?
.last().ok_or_else(|| ScootalooError::new(&format!("Cannot determine the destination filename for {}", u)))?;
let dest_filepath = format!("{}/{}", t, dest_filename);
@@ -337,7 +334,7 @@ pub async fn run(config: Config) {
let token = get_oauth2_token(&config);
// get Mastodon instance
let mastodon = Arc::new(Mutex::new(get_mastodon_token(&config.mastodon)));
let mastodon = get_mastodon_token(&config.mastodon);
// get user timeline feed (Vec<tweet>)
let mut feed = get_user_timeline(&config, token, last_tweet_id)
@@ -377,61 +374,34 @@ pub async fn run(config: Config) {
let mut status_medias: Vec<String> = vec![];
// reupload the attachments if any
// reupload the attachments if any
if let Some(m) = &tweet.extended_entities {
let (tx, mut rx) = mpsc::channel(4);
for media in &m.media {
// creating a new tx for this initial loop
let tx = tx.clone();
// creating a new mastodon from the original mutex
let mastodon = mastodon.clone();
// unfortunately for this to be thread safe, we need to clone a lot of structures
let media = media.clone();
let cache_path = config.scootaloo.cache_path.clone();
let local_tweet_media_path = match get_tweet_media(&media, &config.scootaloo.cache_path).await {
Ok(m) => m,
Err(e) => {
error!("Cannot get tweet media for {}: {}", &media.url, e);
continue;
},
};
tokio::spawn(async move {
debug!("Spawing new async thread to treat {}", &media.id);
let local_tweet_media_path = match get_tweet_media(&media, &cache_path).await {
Ok(m) => m,
Err(e) => {
// we could have panicked here, no issue, but Im not confortable using
// that for now
warn!("Cannot get tweet media for {}: {}", &media.url, e);
return;
}
};
// we cannot directly do all the stuff inside here because mastodon lock can
// live outside this
let mas_result = mastodon.lock().unwrap().media(Cow::from(String::from(&local_tweet_media_path)));
match mas_result {
Ok(m) => {
remove_file(&local_tweet_media_path).await.unwrap_or_else(|e|
warn!("Attachment {} has been uploaded but Im unable to remove the existing file: {}", &local_tweet_media_path, e)
);
// we can unwrap here because were in a thread
tx.send(ScootalooSpawnResponse {
mastodon_media_id: m.id.clone(),
twitter_media_url: local_tweet_media_path.clone()
}).await.unwrap();
},
Err(e) => {
error!("Attachment {} cannot be uploaded to Mastodon Instance: {}", &local_tweet_media_path, e);
}
let mastodon_media_ids = match mastodon.media(Cow::from(String::from(&local_tweet_media_path))) {
Ok(m) => {
remove_file(&local_tweet_media_path).await.unwrap_or_else(|e|
warn!("Attachment for {} has been uploaded, but Im unable to remove the existing file: {}", &local_tweet_media_path, e)
);
m.id
},
Err(e) => {
error!("Attachment {} cannot be uploaded to Mastodon Instance: {}", &local_tweet_media_path, e);
continue;
}
});
}
};
// dropping the last tx otherwise recv() will wait indefinitely
drop(tx);
status_medias.push(mastodon_media_ids);
while let Some(i) = rx.recv().await {
// pushes the media into the media vec
status_medias.push(i.mastodon_media_id);
// removes the URL from the original Tweet text
status_text = status_text.replace(&i.twitter_media_url, "");
// last step, removing the reference to the media from with the toots text
status_text = status_text.replace(&media.url, "");
}
}
// finished reuploading attachments, now lets do the toot baby!
@@ -441,11 +411,11 @@ pub async fn run(config: Config) {
.status(&status_text)
.media_ids(status_medias)
.build()
.expect(format!("Cannot build status with text {}", &status_text).as_str());
.expect(&format!("Cannot build status with text {}", &status_text));
// publish status
// again unwrap is safe here as we are in the main thread
mastodon.lock().unwrap().new_status(status).unwrap();
mastodon.new_status(status).unwrap();
// this will panic if it cannot publish the status, which is a good thing, it allows the
// last_tweet gathered not to be written