mirror of
https://framagit.org/veretcle/scootaloo.git
synced 2025-07-21 17:34:37 +02:00
Merge branch 'fix_french_inclusive_writing' into 'master'
feat: allow user to remove some links, replace some links by others See merge request veretcle/scootaloo!38
This commit is contained in:
3
Cargo.lock
generated
3
Cargo.lock
generated
@@ -2103,7 +2103,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "scootaloo"
|
name = "scootaloo"
|
||||||
version = "0.10.1"
|
version = "0.11.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
@@ -2113,6 +2113,7 @@ dependencies = [
|
|||||||
"html-escape",
|
"html-escape",
|
||||||
"log",
|
"log",
|
||||||
"mime",
|
"mime",
|
||||||
|
"regex",
|
||||||
"reqwest 0.11.12",
|
"reqwest 0.11.12",
|
||||||
"rusqlite",
|
"rusqlite",
|
||||||
"serde",
|
"serde",
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "scootaloo"
|
name = "scootaloo"
|
||||||
version = "0.10.1"
|
version = "0.11.0"
|
||||||
authors = ["VC <veretcle+framagit@mateu.be>"]
|
authors = ["VC <veretcle+framagit@mateu.be>"]
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
@@ -8,6 +8,7 @@ edition = "2021"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
chrono = "^0.4"
|
chrono = "^0.4"
|
||||||
|
regex = "^1"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
toml = "^0.5"
|
toml = "^0.5"
|
||||||
clap = "^4"
|
clap = "^4"
|
||||||
|
13
README.md
13
README.md
@@ -19,6 +19,19 @@ First up, create a configuration file (default path is `/usr/local/etc/scootaloo
|
|||||||
db_path = "/var/lib/scootaloo/scootaloo.sqlite" ## file containing the SQLite Tweet corresponding Toot DB, must be writeable
|
db_path = "/var/lib/scootaloo/scootaloo.sqlite" ## file containing the SQLite Tweet corresponding Toot DB, must be writeable
|
||||||
cache_path = "/tmp/scootaloo" ## a dir where the temporary files will be download, must be writeable
|
cache_path = "/tmp/scootaloo" ## a dir where the temporary files will be download, must be writeable
|
||||||
rate_limiting = 4 ## optional, default 4, number of accounts handled simultaneously
|
rate_limiting = 4 ## optional, default 4, number of accounts handled simultaneously
|
||||||
|
## optional, this should be omitted the majority of the time
|
||||||
|
## sometimes, twitter try to use french inclusive writting, but instead of using `·` (median point), they’re using `.`
|
||||||
|
## this makes twitter interpret it as a URL, which is wrong
|
||||||
|
## this parameter allows you to catch such URLs and apply the `display_url` (i.e. `tout.es`) instead of the `expanded_url` (i.e. `http://tout.es`)
|
||||||
|
## in those particular cases
|
||||||
|
## (!) use with caution, it might have some undesired effects
|
||||||
|
show_url_as_display_url_for = "^http(s)://(.+)\\.es$"
|
||||||
|
## optional, this allows you to replace the host for popular services such as YouTube of Twitter, or any other
|
||||||
|
## with their more freely accessible equivalent
|
||||||
|
[scootaloo.alternative_services_for]
|
||||||
|
"tamere.lol" = "tonpere.mdr" ## quotes are necessary for both parameters
|
||||||
|
"you.pi" = "you.pla"
|
||||||
|
"www.you.pi" = "you.pla" ## this is an exact match, so you’ll need to lay out all the possibilities
|
||||||
|
|
||||||
[twitter]
|
[twitter]
|
||||||
## Consumer/Access key for Twitter (can be generated at https://developer.twitter.com/en/apps)
|
## Consumer/Access key for Twitter (can be generated at https://developer.twitter.com/en/apps)
|
||||||
|
@@ -36,6 +36,8 @@ pub struct ScootalooConfig {
|
|||||||
pub db_path: String,
|
pub db_path: String,
|
||||||
pub cache_path: String,
|
pub cache_path: String,
|
||||||
pub rate_limit: Option<usize>,
|
pub rate_limit: Option<usize>,
|
||||||
|
pub show_url_as_display_url_for: Option<String>,
|
||||||
|
pub alternative_services_for: Option<HashMap<String, String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses the TOML file into a Config Struct
|
/// Parses the TOML file into a Config Struct
|
||||||
|
18
src/lib.rs
18
src/lib.rs
@@ -22,6 +22,7 @@ use state::{read_state, write_state, TweetToToot};
|
|||||||
use elefren::{prelude::*, status_builder::StatusBuilder, Language};
|
use elefren::{prelude::*, status_builder::StatusBuilder, Language};
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use log::info;
|
use log::info;
|
||||||
|
use regex::Regex;
|
||||||
use rusqlite::Connection;
|
use rusqlite::Connection;
|
||||||
use std::{collections::HashMap, sync::Arc};
|
use std::{collections::HashMap, sync::Arc};
|
||||||
use tokio::{spawn, sync::Mutex};
|
use tokio::{spawn, sync::Mutex};
|
||||||
@@ -58,6 +59,14 @@ pub async fn run(config: Config) {
|
|||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
let display_url_re = config
|
||||||
|
.scootaloo
|
||||||
|
.show_url_as_display_url_for
|
||||||
|
.as_ref()
|
||||||
|
.map(|r|
|
||||||
|
// we want to panic in case the RE is not valid
|
||||||
|
Regex::new(r).unwrap());
|
||||||
|
|
||||||
let mut stream = futures::stream::iter(config.mastodon.into_values())
|
let mut stream = futures::stream::iter(config.mastodon.into_values())
|
||||||
.map(|mastodon_config| {
|
.map(|mastodon_config| {
|
||||||
// calculate Twitter page size
|
// calculate Twitter page size
|
||||||
@@ -68,6 +77,8 @@ pub async fn run(config: Config) {
|
|||||||
// create temporary value for each task
|
// create temporary value for each task
|
||||||
let scootaloo_cache_path = config.scootaloo.cache_path.clone();
|
let scootaloo_cache_path = config.scootaloo.cache_path.clone();
|
||||||
let scootaloo_mentions = scootaloo_mentions.clone();
|
let scootaloo_mentions = scootaloo_mentions.clone();
|
||||||
|
let scootaloo_alt_services = config.scootaloo.alternative_services_for.clone();
|
||||||
|
let display_url_re = display_url_re.clone();
|
||||||
let token = get_oauth2_token(&config.twitter);
|
let token = get_oauth2_token(&config.twitter);
|
||||||
let task_conn = conn.clone();
|
let task_conn = conn.clone();
|
||||||
|
|
||||||
@@ -108,7 +119,12 @@ pub async fn run(config: Config) {
|
|||||||
drop(lconn);
|
drop(lconn);
|
||||||
|
|
||||||
// build basic status by just yielding text and dereferencing contained urls
|
// build basic status by just yielding text and dereferencing contained urls
|
||||||
let mut status_text = build_basic_status(tweet, &scootaloo_mentions);
|
let mut status_text = build_basic_status(
|
||||||
|
tweet,
|
||||||
|
&scootaloo_mentions,
|
||||||
|
&display_url_re,
|
||||||
|
&scootaloo_alt_services,
|
||||||
|
);
|
||||||
|
|
||||||
// building associative media list
|
// building associative media list
|
||||||
let (media_url, status_medias) =
|
let (media_url, status_medias) =
|
||||||
|
@@ -6,6 +6,7 @@ use egg_mode::{
|
|||||||
};
|
};
|
||||||
use elefren::{apps::App, prelude::*, scopes::Read, scopes::Scopes, scopes::Write};
|
use elefren::{apps::App, prelude::*, scopes::Read, scopes::Scopes, scopes::Write};
|
||||||
use html_escape::decode_html_entities;
|
use html_escape::decode_html_entities;
|
||||||
|
use regex::Regex;
|
||||||
use std::{borrow::Cow, collections::HashMap, io::stdin};
|
use std::{borrow::Cow, collections::HashMap, io::stdin};
|
||||||
|
|
||||||
/// Decodes the Twitter mention to something that will make sense once Twitter has joined the
|
/// Decodes the Twitter mention to something that will make sense once Twitter has joined the
|
||||||
@@ -22,14 +23,34 @@ fn twitter_mentions(ums: &[MentionEntity]) -> HashMap<String, String> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Decodes urls from UrlEntities
|
/// Decodes urls from UrlEntities
|
||||||
fn decode_urls(urls: &[UrlEntity]) -> HashMap<String, String> {
|
fn decode_urls(
|
||||||
|
urls: &[UrlEntity],
|
||||||
|
re: &Option<Regex>,
|
||||||
|
alt_urls: &Option<HashMap<String, String>>,
|
||||||
|
) -> HashMap<String, String> {
|
||||||
urls.iter()
|
urls.iter()
|
||||||
.filter(|s| s.expanded_url.is_some())
|
.filter(|s| s.expanded_url.is_some())
|
||||||
.map(|s| {
|
.map(|s| {
|
||||||
(
|
(s.url.to_owned(), {
|
||||||
s.url.to_owned(),
|
let mut def = s.expanded_url.as_deref().unwrap().to_owned();
|
||||||
s.expanded_url.as_deref().unwrap().to_owned(),
|
|
||||||
)
|
if let Some(a) = &alt_urls {
|
||||||
|
for (url_source, url_destination) in a {
|
||||||
|
def = def.replace(
|
||||||
|
&format!("https://{}", url_source),
|
||||||
|
&format!("https://{}", url_destination),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(r) = &re {
|
||||||
|
if r.is_match(s.expanded_url.as_deref().unwrap()) {
|
||||||
|
def = s.display_url.clone();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def
|
||||||
|
})
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
@@ -48,10 +69,15 @@ pub fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Builds toot text from tweet
|
/// Builds toot text from tweet
|
||||||
pub fn build_basic_status(tweet: &Tweet, mentions: &HashMap<String, String>) -> String {
|
pub fn build_basic_status(
|
||||||
|
tweet: &Tweet,
|
||||||
|
mentions: &HashMap<String, String>,
|
||||||
|
url_regex_filter: &Option<Regex>,
|
||||||
|
url_alt_services: &Option<HashMap<String, String>>,
|
||||||
|
) -> String {
|
||||||
let mut toot = tweet.text.to_owned();
|
let mut toot = tweet.text.to_owned();
|
||||||
|
|
||||||
for decoded_url in decode_urls(&tweet.entities.urls) {
|
for decoded_url in decode_urls(&tweet.entities.urls, url_regex_filter, url_alt_services) {
|
||||||
toot = toot.replace(&decoded_url.0, &decoded_url.1);
|
toot = toot.replace(&decoded_url.0, &decoded_url.1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -170,15 +196,51 @@ mod tests {
|
|||||||
url: "https://t.me/tamerelol".to_string(),
|
url: "https://t.me/tamerelol".to_string(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let twitter_urls = vec![url_entity1, url_entity2];
|
let wrong_url_entity = UrlEntity {
|
||||||
|
display_url: "invité.es".to_string(),
|
||||||
|
expanded_url: Some("http://xn--invit-fsa.es".to_string()),
|
||||||
|
range: (85, 108),
|
||||||
|
url: "https://t.co/WAUgnpHLmo".to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
let mut expected_urls = HashMap::new();
|
let rewritten_url_entity = UrlEntity {
|
||||||
expected_urls.insert(
|
display_url: "youtu.be/w5TrSaoYmZ8".to_string(),
|
||||||
|
expanded_url: Some("https://youtu.be/w5TrSaoYmZ8".to_string()),
|
||||||
|
range: (0, 23),
|
||||||
|
url: "https://t.co/fUVYXuF7tg".to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let re = Regex::new("(.+)\\.es$").ok();
|
||||||
|
|
||||||
|
let alt: HashMap<String, String> = HashMap::from([
|
||||||
|
("youtube.com".to_string(), "invidio.us".to_string()),
|
||||||
|
("youtu.be".to_string(), "invidio.us".to_string()),
|
||||||
|
("www.youtube.com".to_string(), "invidio.us".to_string()),
|
||||||
|
]);
|
||||||
|
|
||||||
|
let twitter_urls = vec![
|
||||||
|
url_entity1,
|
||||||
|
url_entity2,
|
||||||
|
wrong_url_entity,
|
||||||
|
rewritten_url_entity,
|
||||||
|
];
|
||||||
|
|
||||||
|
let expected_urls = HashMap::from([
|
||||||
|
(
|
||||||
"https://t.me/tamerelol".to_string(),
|
"https://t.me/tamerelol".to_string(),
|
||||||
"https://www.nintendojo.fr/dojobar".to_string(),
|
"https://www.nintendojo.fr/dojobar".to_string(),
|
||||||
);
|
),
|
||||||
|
(
|
||||||
|
"https://t.co/WAUgnpHLmo".to_string(),
|
||||||
|
"invité.es".to_string(),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"https://t.co/fUVYXuF7tg".to_string(),
|
||||||
|
"https://invidio.us/w5TrSaoYmZ8".to_string(),
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
|
||||||
let decoded_urls = decode_urls(&twitter_urls);
|
let decoded_urls = decode_urls(&twitter_urls, &re, &Some(alt));
|
||||||
|
|
||||||
assert_eq!(expected_urls, decoded_urls);
|
assert_eq!(expected_urls, decoded_urls);
|
||||||
}
|
}
|
||||||
@@ -247,7 +309,7 @@ mod tests {
|
|||||||
"@WillyWonka@chocolatefactory.org".to_string(),
|
"@WillyWonka@chocolatefactory.org".to_string(),
|
||||||
)]);
|
)]);
|
||||||
|
|
||||||
let t_out = build_basic_status(&t, &s);
|
let t_out = build_basic_status(&t, &s, &None, &None);
|
||||||
|
|
||||||
assert_eq!(&t_out, "Mother 1 & 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance@twitter.com @WillyWonka@chocolatefactory.org https://www.youtube.com/watch?v=w5TrSaoYmZ8");
|
assert_eq!(&t_out, "Mother 1 & 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance@twitter.com @WillyWonka@chocolatefactory.org https://www.youtube.com/watch?v=w5TrSaoYmZ8");
|
||||||
}
|
}
|
||||||
|
@@ -1,4 +1,33 @@
|
|||||||
use scootaloo::parse_toml;
|
use scootaloo::parse_toml;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_alt_services() {
|
||||||
|
let toml = parse_toml("tests/no_test_alt_services.toml");
|
||||||
|
assert_eq!(toml.scootaloo.alternative_services_for, None);
|
||||||
|
|
||||||
|
let toml = parse_toml("tests/test_alt_services.toml");
|
||||||
|
assert_eq!(
|
||||||
|
toml.scootaloo.alternative_services_for,
|
||||||
|
Some(HashMap::from([
|
||||||
|
("tamere.lol".to_string(), "tonpere.mdr".to_string()),
|
||||||
|
("you.pi".to_string(), "you.pla".to_string())
|
||||||
|
]))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_re_display() {
|
||||||
|
let toml = parse_toml("tests/no_show_url_as_display_url_for.toml");
|
||||||
|
assert_eq!(toml.scootaloo.show_url_as_display_url_for, None);
|
||||||
|
|
||||||
|
let toml = parse_toml("tests/show_url_as_display_url_for.toml");
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
toml.scootaloo.show_url_as_display_url_for,
|
||||||
|
Some("^(.+)\\.es$".to_string())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_page_size() {
|
fn test_page_size() {
|
||||||
|
19
tests/no_show_url_as_display_url_for.toml
Normal file
19
tests/no_show_url_as_display_url_for.toml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
[scootaloo]
|
||||||
|
|
||||||
|
db_path="/var/random/scootaloo.sqlite"
|
||||||
|
cache_path="/tmp/scootaloo"
|
||||||
|
|
||||||
|
[twitter]
|
||||||
|
consumer_key="rand consumer key"
|
||||||
|
consumer_secret="secret"
|
||||||
|
access_key="rand access key"
|
||||||
|
access_secret="super secret"
|
||||||
|
|
||||||
|
[mastodon]
|
||||||
|
[mastodon.tamerelol]
|
||||||
|
twitter_screen_name="tamerelol"
|
||||||
|
base = "https://m.nintendojo.fr"
|
||||||
|
client_id = "rand client id"
|
||||||
|
client_secret = "secret"
|
||||||
|
redirect = "urn:ietf:wg:oauth:2.0:oob"
|
||||||
|
token = "super secret"
|
19
tests/no_test_alt_services.toml
Normal file
19
tests/no_test_alt_services.toml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
[scootaloo]
|
||||||
|
|
||||||
|
db_path="/var/random/scootaloo.sqlite"
|
||||||
|
cache_path="/tmp/scootaloo"
|
||||||
|
|
||||||
|
[twitter]
|
||||||
|
consumer_key="rand consumer key"
|
||||||
|
consumer_secret="secret"
|
||||||
|
access_key="rand access key"
|
||||||
|
access_secret="super secret"
|
||||||
|
|
||||||
|
[mastodon]
|
||||||
|
[mastodon.tamerelol]
|
||||||
|
twitter_screen_name="tamerelol"
|
||||||
|
base = "https://m.nintendojo.fr"
|
||||||
|
client_id = "rand client id"
|
||||||
|
client_secret = "secret"
|
||||||
|
redirect = "urn:ietf:wg:oauth:2.0:oob"
|
||||||
|
token = "super secret"
|
20
tests/show_url_as_display_url_for.toml
Normal file
20
tests/show_url_as_display_url_for.toml
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
[scootaloo]
|
||||||
|
|
||||||
|
db_path="/var/random/scootaloo.sqlite"
|
||||||
|
cache_path="/tmp/scootaloo"
|
||||||
|
show_url_as_display_url_for = "^(.+)\\.es$"
|
||||||
|
|
||||||
|
[twitter]
|
||||||
|
consumer_key="rand consumer key"
|
||||||
|
consumer_secret="secret"
|
||||||
|
access_key="rand access key"
|
||||||
|
access_secret="super secret"
|
||||||
|
|
||||||
|
[mastodon]
|
||||||
|
[mastodon.tamerelol]
|
||||||
|
twitter_screen_name="tamerelol"
|
||||||
|
base = "https://m.nintendojo.fr"
|
||||||
|
client_id = "rand client id"
|
||||||
|
client_secret = "secret"
|
||||||
|
redirect = "urn:ietf:wg:oauth:2.0:oob"
|
||||||
|
token = "super secret"
|
22
tests/test_alt_services.toml
Normal file
22
tests/test_alt_services.toml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
[scootaloo]
|
||||||
|
|
||||||
|
db_path="/var/random/scootaloo.sqlite"
|
||||||
|
cache_path="/tmp/scootaloo"
|
||||||
|
[scootaloo.alternative_services_for]
|
||||||
|
"tamere.lol" = "tonpere.mdr"
|
||||||
|
"you.pi" = "you.pla"
|
||||||
|
|
||||||
|
[twitter]
|
||||||
|
consumer_key="rand consumer key"
|
||||||
|
consumer_secret="secret"
|
||||||
|
access_key="rand access key"
|
||||||
|
access_secret="super secret"
|
||||||
|
|
||||||
|
[mastodon]
|
||||||
|
[mastodon.tamerelol]
|
||||||
|
twitter_screen_name="tamerelol"
|
||||||
|
base = "https://m.nintendojo.fr"
|
||||||
|
client_id = "rand client id"
|
||||||
|
client_secret = "secret"
|
||||||
|
redirect = "urn:ietf:wg:oauth:2.0:oob"
|
||||||
|
token = "super secret"
|
Reference in New Issue
Block a user