Files
scootaloo/src/lib.rs

231 lines
9.4 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

mod error;
use error::ScootalooError;
mod config;
pub use config::parse_toml;
use config::Config;
mod mastodon;
pub use mastodon::register;
use mastodon::*;
mod twitter;
use twitter::*;
mod util;
use crate::util::generate_media_ids;
mod state;
pub use state::{init_db, migrate_db};
use state::{read_state, write_state, TweetToToot};
use elefren::{prelude::*, status_builder::StatusBuilder, Language};
use futures::StreamExt;
use html_escape::decode_html_entities;
use log::info;
use regex::Regex;
use rusqlite::Connection;
use std::sync::Arc;
use tokio::{spawn, sync::Mutex};
const DEFAULT_RATE_LIMIT: usize = 4;
const DEFAULT_PAGE_SIZE: i32 = 200;
/// This is where the magic happens
#[tokio::main]
pub async fn run(config: Config) {
// open the SQLite connection
let conn = Arc::new(Mutex::new(
Connection::open(&config.scootaloo.db_path).unwrap_or_else(|e| {
panic!(
"Something went wrong when opening the DB {}: {}",
&config.scootaloo.db_path, e
)
}),
));
let global_mastodon_config = Arc::new(Mutex::new(config.mastodon.clone()));
let display_url_re = config
.scootaloo
.show_url_as_display_url_for
.as_ref()
.map(|r|
// we want to panic in case the RE is not valid
Regex::new(r).unwrap());
let mut stream = futures::stream::iter(config.mastodon.into_values())
.map(|mastodon_config| {
// calculate Twitter page size
let page_size = mastodon_config
.twitter_page_size
.unwrap_or_else(|| config.twitter.page_size.unwrap_or(DEFAULT_PAGE_SIZE));
// create temporary value for each task
let scootaloo_cache_path = config.scootaloo.cache_path.clone();
let scootaloo_alt_services = config.scootaloo.alternative_services_for.clone();
let display_url_re = display_url_re.clone();
let token = get_oauth2_token(&config.twitter);
let task_conn = conn.clone();
let global_mastodon_config = global_mastodon_config.clone();
spawn(async move {
info!("Starting treating {}", &mastodon_config.twitter_screen_name);
// retrieve the last tweet ID for the username
let lconn = task_conn.lock().await;
let last_tweet_id = read_state(&lconn, &mastodon_config.twitter_screen_name, None)?
.map(|r| r.tweet_id);
drop(lconn);
// get reversed, curated user timeline
let feed = get_user_timeline(
&mastodon_config.twitter_screen_name,
&token,
last_tweet_id,
page_size,
)
.await?;
// get Mastodon instance
let mastodon = get_mastodon_token(&mastodon_config);
for tweet in &feed {
info!("Treating Tweet {} inside feed", tweet.id);
// basic toot text
let mut status_text = tweet.text.clone();
// add mentions and smart mentions
if !&tweet.entities.user_mentions.is_empty() {
info!("Tweet contains mentions, add them!");
let global_mastodon_config = global_mastodon_config.lock().await;
twitter_mentions(
&mut status_text,
&tweet.entities.user_mentions,
&global_mastodon_config,
);
drop(global_mastodon_config);
}
if !&tweet.entities.urls.is_empty() {
info!("Tweet contains links, add them!");
let mut associated_urls =
associate_urls(&tweet.entities.urls, &display_url_re);
if let Some(q) = &tweet.quoted_status {
if let Some(u) = &q.user {
info!(
"Tweet {} contains a quote, we try to find it within the DB",
tweet.id
);
// we know we have a quote and a user, we can lock both the
// connection to DB and global_config
// we will release them manually as soon as theyre useless
let lconn = task_conn.lock().await;
let global_mastodon_config = global_mastodon_config.lock().await;
if let Ok(Some(r)) = read_state(&lconn, &u.screen_name, Some(q.id))
{
info!("We have found the associated toot({})", &r.toot_id);
// drop conn immediately after the request: we wont need it
// any more and the treatment there might be time-consuming
drop(lconn);
if let Some((m, t)) =
find_mastodon_screen_name_by_twitter_screen_name(
&r.twitter_screen_name,
&global_mastodon_config,
)
{
// drop the global conf, we have all we required, no need
// to block it further
drop(global_mastodon_config);
replace_tweet_by_toot(
&mut associated_urls,
&r.twitter_screen_name,
q.id,
&m,
&t,
&r.toot_id,
);
}
}
}
}
if let Some(a) = &scootaloo_alt_services {
replace_alt_services(&mut associated_urls, a);
}
decode_urls(&mut status_text, &associated_urls);
}
// building associative media list
let (media_url, status_medias) =
generate_media_ids(tweet, &scootaloo_cache_path, &mastodon).await;
status_text = status_text.replace(&media_url, "");
// now that the text wont be altered anymore, we can safely remove HTML
// entities
status_text = decode_html_entities(&status_text).to_string();
info!("Building corresponding Mastodon status");
let mut status_builder = StatusBuilder::new();
status_builder.status(status_text).media_ids(status_medias);
// thread if necessary
if tweet.in_reply_to_user_id.is_some() {
let lconn = task_conn.lock().await;
if let Ok(Some(r)) = read_state(
&lconn,
&mastodon_config.twitter_screen_name,
tweet.in_reply_to_status_id,
) {
status_builder.in_reply_to(&r.toot_id);
}
drop(lconn);
}
// language if any
if let Some(l) = &tweet.lang {
if let Some(r) = Language::from_639_1(l) {
status_builder.language(r);
}
}
// can be activated for test purposes
// status_builder.visibility(elefren::status_builder::Visibility::Private);
let status = status_builder.build()?;
let published_status = mastodon.new_status(status)?;
// this will return if it cannot publish the status preventing the last_tweet from
// being written into db
let ttt_towrite = TweetToToot {
twitter_screen_name: mastodon_config.twitter_screen_name.clone(),
tweet_id: tweet.id,
toot_id: published_status.id,
};
// write the current state (tweet ID and toot ID) to avoid copying it another time
let lconn = task_conn.lock().await;
write_state(&lconn, ttt_towrite)?;
drop(lconn);
}
Ok::<(), ScootalooError>(())
})
})
.buffer_unordered(config.scootaloo.rate_limit.unwrap_or(DEFAULT_RATE_LIMIT));
// launch and wait for every handle
while let Some(result) = stream.next().await {
match result {
Ok(Err(e)) => eprintln!("Error within thread: {}", e),
Err(e) => eprintln!("Error with thread: {}", e),
_ => (),
}
}
}