19 Commits

Author SHA1 Message Date
VC
47d7fdbd42 Merge branch 'feat/refactor_embeds' into 'main'
♻️: better handle of quotes and media embedded into quotes

See merge request veretcle/oolatoocs!39
2025-12-01 12:07:42 +00:00
VC
7334fb3d09 ♻️: better handle of quotes and media embedded into quotes 2025-12-01 11:37:58 +01:00
VC
79ac915347 Merge branch 'feat/megalodon_1_1' into 'main'
⬆️: upgrade megalodon v1.1

See merge request veretcle/oolatoocs!38
2025-11-27 08:13:17 +00:00
VC
e89e6e51ec ⬆️: upgrade megalodon v1.1 2025-11-27 09:08:27 +01:00
VC
7b21a0e3a7 Merge branch 'feat/add_quotes' into 'main'
Add quotes

See merge request veretcle/oolatoocs!37
2025-11-26 06:36:08 +00:00
VC
43aa6dcd99 : add mastodon quotes 2025-11-25 21:42:10 +01:00
VC
cf5fe11b56 Merge branch 'feat_rust_1_90' into 'main'
⬆️: rust 1.90

See merge request veretcle/oolatoocs!36
2025-09-26 13:41:15 +00:00
VC
7bd0843cf6 ⬆️: rust 1.90 2025-09-26 15:33:47 +02:00
VC
402fcffc75 Merge branch 'doc_update' into 'main'
📝: update README.md

See merge request veretcle/oolatoocs!35
2025-06-21 11:01:28 +00:00
VC
b295cc5b94 📝: update README.md 2025-06-21 12:56:09 +02:00
VC
a882aaa59d Merge branch '15-feat-make-hashtags-removal-optional' into 'main'
: hashtag removal is now optional

Closes #15

See merge request veretcle/oolatoocs!34
2025-06-21 10:53:43 +00:00
VC
259032a7b9 : hashtag removal is now optional 2025-06-21 12:48:59 +02:00
VC
e7f0c9c6f5 Merge branch '14-image-size-find-a-way-to-optimize-image-more' into 'main'
🐛: add progressively more compression to WebP to avoid getting rejected with...

Closes #14

See merge request veretcle/oolatoocs!33
2025-06-20 12:33:21 +00:00
VC
83c8da46e8 🐛: add progressively more compression to WebP to avoid getting rejected with 1Mb limit image file size 2025-06-20 14:26:12 +02:00
VC
823f80729f Merge branch '13-update-bsky-sdk-dependency' into 'main'
⬆: bsky-sdk v0.1.20 + atrium_api v0.25.4

Closes #13

See merge request veretcle/oolatoocs!32
2025-06-16 06:28:38 +00:00
VC
5969e3a56a ⬆: bsky-sdk v0.1.20 + atrium_api v0.25.4 2025-06-12 15:16:35 +02:00
VC
3ea2478512 fix: count 26 chars per url each time 2025-06-12 14:37:02 +02:00
VC
5606d00da2 Merge branch '10-better-embed-links-for-bsky' into 'main'
: add embed card when available

Closes #10

See merge request veretcle/oolatoocs!29
2025-01-26 08:50:43 +00:00
VC
4cb80b0607 : add embed card when available 2025-01-26 09:33:20 +01:00
10 changed files with 1444 additions and 1083 deletions

1
.gitignore vendored
View File

@@ -2,3 +2,4 @@
.last_tweet
.config.toml
.config.json
.bsky.json

2250
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
[package]
name = "oolatoocs"
version = "4.1.4"
version = "4.4.2"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -12,7 +12,6 @@ env_logger = "^0.11"
futures = "^0.3"
html-escape = "^0.2"
log = "^0.4"
megalodon = "^1.0"
oauth1-request = "^0.6"
regex = "^1.10"
reqwest = { version = "^0.12", features = ["json", "stream", "multipart"] }
@@ -21,9 +20,10 @@ serde = { version = "^1.0", features = ["derive"] }
tokio = { version = "^1.33", features = ["rt-multi-thread", "macros"] }
toml = "^0.8"
bsky-sdk = "^0.1"
atrium-api = "^0.24"
atrium-api = { version = "^0.25", features = ["namespace-appbsky"] }
image = "^0.25"
webp = "^0.3"
megalodon = "^1.1"
[profile.release]
strip = true

View File

@@ -16,10 +16,13 @@ Since 2025-01-20, Twitter is now longer supported.
What it can do:
* Reproduces the Toot content into the Record;
* Cuts (poorly) the Toot in half in its too long for Bluesky and thread it (this is cut using a word count, not the best method, but it gets the job done);
* Reuploads images/gifs/videos from Mastodon to Bluesky
* Reuploads images/gifs/videos/webcards from Mastodon to Bluesky
* ⚠️ Bluesky does not support mixing images and videos. You can have up to 4 images on a Bsky record **or** 1 video but not mix around. If you do so, only the video will be posted on Bluesky.
* ⚠️ Bluesky does not support images greater than 1Mb (that is 1,000,000,000 bytes or 976.6 KiB). I might incorporate soon a image quality reducer or WebP transcoding to avoid this issue.
* ⚠️ Bluesky does not support images greater than 1Mb (that is 1,000,000 bytes or 976.6 KiB), so Oolatoocs converts the image to WebP and progressively reduces the quality to fit that limitation.
* ⚠️ Bluesky does not support webcards with any other media/quote, so webcards have the last priority
* Can reproduce threads from Mastodon to Bluesky
* Can reproduce (self-)quotes from Mastodon to Bluesky
* ⚠️ Bluesky cant do quotes with webcards, you can only embed images **or** a video with quotes
* ⚠️ Bluesky does support polls for now. So the poll itself is just presented as text from Mastodon instead which is not the most elegant.
* Can prevent a Toot from being recorded to Bluesky by using the #NoTweet (case-insensitive) hashtag in Mastodon
@@ -30,6 +33,7 @@ The configuration is relatively easy to follow:
```toml
[oolatoocs]
db_path = "/var/lib/oolatoocs/db.sqlite3" # the path to the DB where toots/tweets/records are stored
remove_hashtags = false # optional, default to false
[mastodon] # This part can be generated, see below
base = "https://m.nintendojo.fr"

View File

@@ -1,7 +1,7 @@
use crate::config::BlueskyConfig;
use crate::{config::BlueskyConfig, OolatoocsError};
use atrium_api::{
app::bsky::feed::post::RecordData, com::atproto::repo::upload_blob::Output,
types::string::Datetime, types::string::Language,
types::string::Datetime, types::string::Language, types::string::RecordKey,
};
use bsky_sdk::{
agent::config::{Config, FileStore},
@@ -11,7 +11,10 @@ use bsky_sdk::{
use futures::{stream, StreamExt};
use image::ImageReader;
use log::{debug, error, warn};
use megalodon::entities::attachment::{Attachment, AttachmentType};
use megalodon::entities::{
attachment::{Attachment, AttachmentType},
card::Card,
};
use regex::Regex;
use std::{error::Error, fs::exists, io::Cursor};
use webp::*;
@@ -136,7 +139,7 @@ async fn get_record(
cid: None,
collection: atrium_api::types::string::Nsid::new("app.bsky.feed.post".to_string())?,
repo: atrium_api::types::string::Handle::new(config.to_string())?.into(),
rkey: rkey.to_string(),
rkey: RecordKey::new(rkey.to_string())?,
}
.into(),
)
@@ -145,15 +148,64 @@ async fn get_record(
Ok(record)
}
// its ugly af but it gets the job done for now
/// Generate an quote embed record
/// it is encapsulated in Option to prevent this function from failing
pub async fn generate_quote_records(
config: &BlueskyConfig,
quote_id: &str,
) -> Result<atrium_api::app::bsky::feed::post::RecordEmbedRefs, Box<dyn Error>> {
// if we cant match the quote_id, simply return None
let quote_record = get_record(&config.handle, &rkey(quote_id)).await?;
Ok(
atrium_api::app::bsky::feed::post::RecordEmbedRefs::AppBskyEmbedRecordMain(Box::new(
atrium_api::app::bsky::embed::record::MainData {
record: atrium_api::com::atproto::repo::strong_ref::MainData {
cid: quote_record.data.cid.unwrap(),
uri: quote_record.data.uri.to_owned(),
}
.into(),
}
.into(),
)),
)
}
/// Generate an embed webcard record into Bsky
/// If the preview image does not exist or fails to upload, it is simply ignored
pub async fn generate_webcard_records(
bsky: &BskyAgent,
card: &Card,
) -> Result<atrium_api::app::bsky::feed::post::RecordEmbedRefs, Box<dyn Error + Send + Sync>> {
let blob = match &card.image {
Some(url) => upload_media(true, bsky, url).await?.blob.clone().into(),
None => None,
};
let record_card = atrium_api::app::bsky::embed::external::ExternalData {
description: card.description.clone(),
thumb: blob,
title: card.title.clone(),
uri: card.url.clone(),
};
Ok(
atrium_api::app::bsky::feed::post::RecordEmbedRefs::AppBskyEmbedExternalMain(Box::new(
atrium_api::app::bsky::embed::external::MainData {
external: record_card.into(),
}
.into(),
)),
)
}
/// Generate an array of Bsky media records
/// As Bsky does not support multiple video in a record or mix of video and images, video has the
/// highest priority
pub async fn generate_media_records(
bsky: &BskyAgent,
media_attach: &[Attachment],
) -> Option<atrium_api::types::Union<atrium_api::app::bsky::feed::post::RecordEmbedRefs>> {
let mut embed: Option<
atrium_api::types::Union<atrium_api::app::bsky::feed::post::RecordEmbedRefs>,
> = None;
) -> Result<atrium_api::app::bsky::feed::post::RecordEmbedRefs, Box<dyn Error + Send + Sync>> {
let image_media_attach: Vec<_> = media_attach
.iter()
.filter(|x| x.r#type == AttachmentType::Image)
@@ -161,7 +213,7 @@ pub async fn generate_media_records(
.collect();
let video_media_attach: Vec<_> = media_attach
.iter()
.filter(|x| (x.r#type == AttachmentType::Video || x.r#type == AttachmentType::Gifv))
.filter(|x| x.r#type == AttachmentType::Video || x.r#type == AttachmentType::Gifv)
.cloned()
.collect();
@@ -169,9 +221,9 @@ pub async fn generate_media_records(
if !video_media_attach.is_empty() {
// treat only the very first video, ignore the rest
let media = &video_media_attach[0];
let blob = upload_media(false, bsky, &media.url).await.unwrap();
let blob = upload_media(false, bsky, &media.url).await?;
embed = Some(atrium_api::types::Union::Refs(
return Ok(
atrium_api::app::bsky::feed::post::RecordEmbedRefs::AppBskyEmbedVideoMain(Box::new(
atrium_api::app::bsky::embed::video::MainData {
alt: media.description.clone(),
@@ -181,12 +233,10 @@ pub async fn generate_media_records(
}
.into(),
)),
));
// returns immediately, we dont want to treat the other medias
return embed;
);
}
// It wasnt a video, then its an image or a gallery of 4 images
let mut stream = stream::iter(image_media_attach)
.map(|media| {
let bsky = bsky.clone();
@@ -217,14 +267,14 @@ pub async fn generate_media_records(
}
if !images.is_empty() {
embed = Some(atrium_api::types::Union::Refs(
return Ok(
atrium_api::app::bsky::feed::post::RecordEmbedRefs::AppBskyEmbedImagesMain(Box::new(
atrium_api::app::bsky::embed::images::MainData { images }.into(),
)),
));
);
}
embed
Err(OolatoocsError::new("Cannot embed media").into())
}
async fn upload_media(
@@ -239,11 +289,20 @@ async fn upload_media(
} else {
// this is an image and its over 1Mb long
debug!("Img file too large: {}", content_length);
// defaults to 95% quality for WebP compression
let mut default_quality = 95f32;
let img = ImageReader::new(Cursor::new(dl.bytes().await?))
.with_guessed_format()?
.decode()?;
let encoder: Encoder = Encoder::from_image(&img)?;
let webp: WebPMemory = encoder.encode(90f32);
let mut webp: WebPMemory = encoder.encode(default_quality);
while webp.len() > 1_000_000 {
debug!("Img file too large at {}%, reducing…", default_quality);
default_quality -= 5.0;
webp = encoder.encode(default_quality);
}
webp.to_vec()
};

View File

@@ -11,6 +11,17 @@ pub struct Config {
#[derive(Debug, Deserialize)]
pub struct OolatoocsConfig {
pub db_path: String,
#[serde(default)]
pub remove_hashtags: bool,
}
impl Default for OolatoocsConfig {
fn default() -> Self {
OolatoocsConfig {
db_path: "/var/lib/oolatoocs/db".to_string(),
remove_hashtags: false,
}
}
}
#[derive(Debug, Deserialize)]

View File

@@ -18,7 +18,10 @@ mod utils;
use utils::{generate_multi_tweets, strip_everything};
mod bsky;
use bsky::{build_post_record, generate_media_records, get_session, BskyReply};
use bsky::{
build_post_record, generate_media_records, generate_quote_records, generate_webcard_records,
get_session, BskyReply,
};
use rusqlite::Connection;
@@ -86,7 +89,14 @@ pub async fn run(config: &Config) {
}
// form tweet_content and strip everything useless in it
let Ok(mut tweet_content) = strip_everything(&toot.content, &toot.tags) else {
let toot_tags: Vec<megalodon::entities::status::Tag> =
match &config.oolatoocs.remove_hashtags {
true => toot.tags.clone(),
false => vec![],
};
let Ok(mut tweet_content) =
strip_everything(&toot.content, &toot_tags, &config.mastodon.base)
else {
continue; // skip in case we cant strip something
};
@@ -151,15 +161,76 @@ pub async fn run(config: &Config) {
});
};
// treats medias
let record_medias = generate_media_records(&bluesky, &toot.media_attachments).await;
// handle quote if any
let quote_embed = match toot.reblog {
Some(r) => {
let quote_record = read_state(&conn, Some(r.id.parse::<u64>().unwrap()));
match quote_record {
Ok(Some(q)) => generate_quote_records(&config.bluesky, &q.record_uri)
.await
.ok(),
_ => None,
}
}
None => None,
};
// handle medias if any
let media_embed = if toot.media_attachments.len() > usize::from(0u8) {
generate_media_records(&bluesky, &toot.media_attachments)
.await
.ok()
} else {
None
};
// handle webcard if any
let webcard_embed = match toot.card {
Some(t) => generate_webcard_records(&bluesky, &t).await.ok(),
None => None,
};
let record_embed = if quote_embed.is_some() {
if media_embed.is_some() {
let medias_mapped = match media_embed.unwrap() {
atrium_api::app::bsky::feed::post::RecordEmbedRefs::AppBskyEmbedImagesMain(a) => atrium_api::app::bsky::embed::record_with_media::MainMediaRefs::AppBskyEmbedImagesMain(a),
atrium_api::app::bsky::feed::post::RecordEmbedRefs::AppBskyEmbedVideoMain(a) => atrium_api::app::bsky::embed::record_with_media::MainMediaRefs::AppBskyEmbedVideoMain(a),
_ => continue, // this should NEVER happen as Media are either Video or
// Images at this point
};
let quote_mapped = match quote_embed.unwrap() {
atrium_api::app::bsky::feed::post::RecordEmbedRefs::AppBskyEmbedRecordMain(
a,
) => a,
_ => continue, // again, this should NEVER happen
};
Some(atrium_api::types::Union::Refs(
atrium_api::app::bsky::feed::post::RecordEmbedRefs::AppBskyEmbedRecordWithMediaMain(
Box::new(
atrium_api::app::bsky::embed::record_with_media::MainData {
media: atrium_api::types::Union::Refs(medias_mapped),
record: (*quote_mapped),
}.into()
)
)
))
} else {
quote_embed.map(atrium_api::types::Union::Refs)
}
} else if media_embed.is_some() {
media_embed.map(atrium_api::types::Union::Refs)
} else if webcard_embed.is_some() {
webcard_embed.map(atrium_api::types::Union::Refs)
} else {
None
};
// posts corresponding tweet
let record = build_post_record(
&config.bluesky,
&tweet_content,
&toot.language,
record_medias,
record_embed,
&record_reply_to,
)
.await

View File

@@ -1,7 +1,7 @@
use crate::config::MastodonConfig;
use chrono::{DateTime, Utc};
use megalodon::{
entities::{Status, StatusVisibility},
entities::{QuotedStatus, Status, StatusVisibility},
generator,
mastodon::mastodon::Mastodon,
megalodon::AppInputOptions,
@@ -55,9 +55,19 @@ pub async fn get_mastodon_timeline_since(
.clone()
.is_some_and(|r| r == t.account.id)
})
.filter(|t| t.visibility == StatusVisibility::Public) // excludes everything that isnt
// public
.filter(|t| t.reblog.is_none()) // excludes reblogs
.filter(|t| t.visibility == StatusVisibility::Public) // excludes everything that isnt public
.filter(|t| t.reblog.is_none()) // exclude reblogs
.filter(|t| {
// exclude quotes that arent ours
t.quote.is_none()
|| t.quote.clone().is_some_and(|r| match r {
QuotedStatus::Quote(q) => q
.quoted_status
.clone()
.is_some_and(|iq| iq.account.id == t.account.id),
_ => false,
})
})
.cloned()
.collect();

View File

@@ -82,10 +82,7 @@ pub fn write_state(conn: &Connection, t: TootRecord) -> Result<(), Box<dyn Error
/// Initiates the DB from path
pub fn init_db(d: &str) -> Result<(), Box<dyn Error>> {
debug!(
"{}",
format!("Initializing DB for {}", env!("CARGO_PKG_NAME"))
);
debug!("Initializing DB for {}", env!("CARGO_PKG_NAME"));
let conn = Connection::open(d)?;
conn.execute(

View File

@@ -38,7 +38,13 @@ fn twitter_count(content: &str) -> usize {
for word in split_content {
if word.starts_with("http://") || word.starts_with("https://") {
count += 23;
// Its not that simple. Bsky adapts itself to the URL.
// https://github.com -> 10 chars
// https://github.com/ -> 10 chars
// https://github.com/NVNTLabs -> 19 chars
// https://github.com/NVNTLabs/ -> 20 chars
// so taking the maximum here to simplify things
count += 26;
} else {
count += word.chars().count();
}
@@ -47,10 +53,16 @@ fn twitter_count(content: &str) -> usize {
count
}
pub fn strip_everything(content: &str, tags: &Vec<Tag>) -> Result<String, Box<dyn Error>> {
pub fn strip_everything(
content: &str,
tags: &Vec<Tag>,
mastodon_base: &str,
) -> Result<String, Box<dyn Error>> {
let mut res = strip_html_tags(&content.replace("</p><p>", "\n\n").replace("<br />", "\n"));
strip_mastodon_tags(&mut res, tags).unwrap();
strip_quote_header(&mut res, mastodon_base)?;
strip_mastodon_tags(&mut res, tags)?;
res = res.trim_end_matches('\n').trim_end_matches(' ').to_string();
res = decode_html_entities(&res).to_string();
@@ -58,6 +70,16 @@ pub fn strip_everything(content: &str, tags: &Vec<Tag>) -> Result<String, Box<dy
Ok(res)
}
fn strip_quote_header(content: &mut String, mastodon_base: &str) -> Result<(), Box<dyn Error>> {
let re = Regex::new(&format!(
r"^RE: {}\S+\n\n",
mastodon_base.replace(".", r"\.")
))?;
*content = re.replace(content, "").to_string();
Ok(())
}
fn strip_mastodon_tags(content: &mut String, tags: &Vec<Tag>) -> Result<(), Box<dyn Error>> {
for tag in tags {
let re = Regex::new(&format!("(?i)(#{} ?)", &tag.name))?;
@@ -100,11 +122,11 @@ mod tests {
let content = "Shoot out to https://y.ml/ !";
assert_eq!(twitter_count(content), 38);
assert_eq!(twitter_count(content), 41);
let content = "this is the link https://www.google.com/tamerelol/youpi/tonperemdr/tarace.html if you like! What if I shit a final";
assert_eq!(twitter_count(content), 76);
assert_eq!(twitter_count(content), 79);
let content = "multi ple space";
@@ -112,7 +134,7 @@ mod tests {
let content = "This link is LEEEEET\n\nhttps://www.factornews.com/actualites/ca-sent-le-sapin-pour-free-radical-design-49985.html";
assert_eq!(twitter_count(content), 45);
assert_eq!(twitter_count(content), 48);
}
#[test]
@@ -180,9 +202,19 @@ mod tests {
#[test]
fn test_strip_everything() {
// a classic toot
let content = "<p>Ce soir à 21h, c&#39;est le Dojobar ! Au programme ce soir, une rétrospective sur la série Mario &amp; Luigi.<br />Comme d&#39;hab, le Twitch sera ici : <a href=\"https://twitch.tv/nintendojofr\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"><span class=\"invisible\">https://</span><span class=\"\">twitch.tv/nintendojofr</span><span class=\"invisible\"></span></a><br />Ou juste l&#39;audio là : <a href=\"https://nintendojo.fr/dojobar\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"><span class=\"invisible\">https://</span><span class=\"\">nintendojo.fr/dojobar</span><span class=\"invisible\"></span></a><br />A toute !</p>";
let expected_result = "Ce soir à 21h, c'est le Dojobar ! Au programme ce soir, une rétrospective sur la série Mario & Luigi.\nComme d'hab, le Twitch sera ici : https://twitch.tv/nintendojofr\nOu juste l'audio là : https://nintendojo.fr/dojobar\nA toute !".to_string();
let result = strip_everything(content, &vec![]).unwrap();
let result = strip_everything(content, &vec![], "https://m.nintendojo.fr").unwrap();
assert_eq!(result, expected_result);
// a quoted toot
let content = "<p class=\"quote-inline\">RE: <a href=\"https://m.nintendojo.fr/@nintendojofr/115446347351491651\" target=\"_blank\" rel=\"nofollow noopener\" translate=\"no\"><span class=\"invisible\">https://</span><span class=\"ellipsis\">m.nintendojo.fr/@nintendojofr/</span><span class=\"invisible\">115446347351491651</span></a></p><p>Assassins Creed Shadows pèsera environ 62,8 Go sur Switch 2 (et un peu plus de 100 Go sur les autres supports), soit tout juste pour rentrer sur une cartouche de 64 Go.</p><p>Ou pas, pour rappel…</p><p><a href=\"https://m.nintendojo.fr/tags/AssassinsCreedShadows\" class=\"mention hashtag\" rel=\"tag\">#<span>AssassinsCreedShadows</span></a> <a href=\"https://m.nintendojo.fr/tags/Ubisoft\" class=\"mention hashtag\" rel=\"tag\">#<span>Ubisoft</span></a> <a href=\"https://m.nintendojo.fr/tags/NintendoSwitch2\" class=\"mention hashtag\" rel=\"tag\">#<span>NintendoSwitch2</span></a></p>";
let expected_result = "Assassins Creed Shadows pèsera environ 62,8 Go sur Switch 2 (et un peu plus de 100 Go sur les autres supports), soit tout juste pour rentrer sur une cartouche de 64 Go.\n\nOu pas, pour rappel…\n\n#AssassinsCreedShadows #Ubisoft #NintendoSwitch2";
let result = strip_everything(content, &vec![], "https://m.nintendojo.fr").unwrap();
assert_eq!(result, expected_result);
}