24 Commits

Author SHA1 Message Date
VC
31aea7e1a6 Merge branch 'fix_french_inclusive_writing' into 'master'
feat: allow user to remove some links, replace some links by others

See merge request veretcle/scootaloo!38
2022-11-23 12:49:46 +00:00
Clément VERET
851f95d516 doc: regexp + alt services 2022-11-23 13:17:00 +01:00
Clément VERET
ffb9522ce2 feat: main logic for regex + url filtering 2022-11-23 13:16:56 +01:00
Clément VERET
b6df8c6230 test: add tests for scootaloo alt services + regexp 2022-11-23 13:09:03 +01:00
VC
9227850c99 feat: add necessary changes to decode_urls fn 2022-11-23 13:09:00 +01:00
VC
64d72ea69d chore: bump version + add regex 2022-11-23 09:00:44 +01:00
VC
9dd6ab8370 Merge branch 'fix_smart_mentions' into 'master'
Fix smart mentions

See merge request veretcle/scootaloo!37
2022-11-21 20:33:32 +00:00
VC
4679578101 chore: bump version 2022-11-21 21:28:09 +01:00
VC
2501d5990f fix: typo in the scootaloo_mentions var 2022-11-21 21:27:40 +01:00
VC
cb36730151 Merge branch 'fix_docs' into 'master'
docs: add mastodon_screen_name

See merge request veretcle/scootaloo!36
2022-11-21 09:31:56 +00:00
VC
a9942fad5c docs: add mastodon_screen_name 2022-11-21 10:31:38 +01:00
VC
522d4e3ea5 Merge branch '7-implement-smart-mentions' into 'master'
Implement smart mentions

Closes #7

See merge request veretcle/scootaloo!35
2022-11-21 09:29:05 +00:00
VC
91e3cd04a0 chore: bump version 2022-11-21 10:18:32 +01:00
VC
87a7574d42 feat: add mastodon_screen_name automatically/revise necessary permissions 2022-11-21 10:03:03 +01:00
VC
18e8b9d306 feat: add scootaloo_mentions hash from config file to be inserted into mentions 2022-11-21 08:40:52 +01:00
VC
1e9c768a74 test: add tests for mastodon_screen_name in config struct 2022-11-21 08:40:52 +01:00
VC
83a133bb86 feat: add mastodon_screen_name to config struct 2022-11-21 08:40:52 +01:00
VC
92d5fdffad Merge branch 'fix_lang' into 'master'
fix: visibility

See merge request veretcle/scootaloo!33
2022-11-19 16:46:06 +00:00
VC
331adec60f fix: visibility 2022-11-19 17:45:52 +01:00
VC
9a341310da Merge branch 'fix_lang' into 'master'
Fix lang

See merge request veretcle/scootaloo!32
2022-11-19 16:39:05 +00:00
VC
2c77a0e5fc chore: bump version 2022-11-19 17:34:09 +01:00
VC
032e3cf8dd fix: lang is not the default one anymore 2022-11-19 17:33:50 +01:00
VC
a854243cf6 Merge branch 'command_help' into 'master'
fix: remove unnecessary information in help commands

See merge request veretcle/scootaloo!31
2022-11-18 12:31:17 +00:00
VC
b33ffa4401 fix: remove unnecessary information in help commands 2022-11-18 13:27:18 +01:00
13 changed files with 341 additions and 37 deletions

3
Cargo.lock generated
View File

@@ -2103,7 +2103,7 @@ dependencies = [
[[package]]
name = "scootaloo"
version = "0.9.2"
version = "0.11.0"
dependencies = [
"chrono",
"clap",
@@ -2113,6 +2113,7 @@ dependencies = [
"html-escape",
"log",
"mime",
"regex",
"reqwest 0.11.12",
"rusqlite",
"serde",

View File

@@ -1,6 +1,6 @@
[package]
name = "scootaloo"
version = "0.9.2"
version = "0.11.0"
authors = ["VC <veretcle+framagit@mateu.be>"]
edition = "2021"
@@ -8,6 +8,7 @@ edition = "2021"
[dependencies]
chrono = "^0.4"
regex = "^1"
serde = { version = "1.0", features = ["derive"] }
toml = "^0.5"
clap = "^4"

View File

@@ -19,6 +19,19 @@ First up, create a configuration file (default path is `/usr/local/etc/scootaloo
db_path = "/var/lib/scootaloo/scootaloo.sqlite" ## file containing the SQLite Tweet corresponding Toot DB, must be writeable
cache_path = "/tmp/scootaloo" ## a dir where the temporary files will be download, must be writeable
rate_limiting = 4 ## optional, default 4, number of accounts handled simultaneously
## optional, this should be omitted the majority of the time
## sometimes, twitter try to use french inclusive writting, but instead of using `·` (median point), theyre using `.`
## this makes twitter interpret it as a URL, which is wrong
## this parameter allows you to catch such URLs and apply the `display_url` (i.e. `tout.es`) instead of the `expanded_url` (i.e. `http://tout.es`)
## in those particular cases
## (!) use with caution, it might have some undesired effects
show_url_as_display_url_for = "^http(s)://(.+)\\.es$"
## optional, this allows you to replace the host for popular services such as YouTube of Twitter, or any other
## with their more freely accessible equivalent
[scootaloo.alternative_services_for]
"tamere.lol" = "tonpere.mdr" ## quotes are necessary for both parameters
"you.pi" = "you.pla"
"www.you.pi" = "you.pla" ## this is an exact match, so youll need to lay out all the possibilities
[twitter]
## Consumer/Access key for Twitter (can be generated at https://developer.twitter.com/en/apps)
@@ -48,6 +61,7 @@ This will give you the end of the TOML file. It will look like this:
```toml
[mastodon.nintendojofr] ## account
twitter_screen_name = "NintendojoFR" ## User Timeline to copy
mastodon_screen_name = "nintendojofr" ## optional, Mastodon account name used for smart mentions
base = "https://m.nintendojo.fr"
client_id = "MYCLIENTID"
client_secret = "MYCLIENTSECRET"

View File

@@ -22,6 +22,7 @@ pub struct TwitterConfig {
#[derive(Debug, Deserialize)]
pub struct MastodonConfig {
pub twitter_screen_name: String,
pub mastodon_screen_name: Option<String>,
pub twitter_page_size: Option<i32>,
pub base: String,
pub client_id: String,
@@ -35,6 +36,8 @@ pub struct ScootalooConfig {
pub db_path: String,
pub cache_path: String,
pub rate_limit: Option<usize>,
pub show_url_as_display_url_for: Option<String>,
pub alternative_services_for: Option<HashMap<String, String>>,
}
/// Parses the TOML file into a Config Struct

View File

@@ -19,13 +19,13 @@ mod state;
pub use state::{init_db, migrate_db};
use state::{read_state, write_state, TweetToToot};
use elefren::{prelude::*, status_builder::StatusBuilder};
use log::info;
use rusqlite::Connection;
use std::sync::Arc;
use tokio::{spawn, sync::Mutex};
use elefren::{prelude::*, status_builder::StatusBuilder, Language};
use futures::StreamExt;
use log::info;
use regex::Regex;
use rusqlite::Connection;
use std::{collections::HashMap, sync::Arc};
use tokio::{spawn, sync::Mutex};
const DEFAULT_RATE_LIMIT: usize = 4;
const DEFAULT_PAGE_SIZE: i32 = 200;
@@ -43,6 +43,30 @@ pub async fn run(config: Config) {
}),
));
let scootaloo_mentions: HashMap<String, String> = config
.mastodon
.values()
.filter(|s| s.mastodon_screen_name.is_some())
.map(|s| {
(
format!("@{}", s.twitter_screen_name),
format!(
"@{}@{}",
s.mastodon_screen_name.as_ref().unwrap(),
s.base.split('/').last().unwrap()
),
)
})
.collect();
let display_url_re = config
.scootaloo
.show_url_as_display_url_for
.as_ref()
.map(|r|
// we want to panic in case the RE is not valid
Regex::new(r).unwrap());
let mut stream = futures::stream::iter(config.mastodon.into_values())
.map(|mastodon_config| {
// calculate Twitter page size
@@ -52,12 +76,14 @@ pub async fn run(config: Config) {
// create temporary value for each task
let scootaloo_cache_path = config.scootaloo.cache_path.clone();
let scootaloo_mentions = scootaloo_mentions.clone();
let scootaloo_alt_services = config.scootaloo.alternative_services_for.clone();
let display_url_re = display_url_re.clone();
let token = get_oauth2_token(&config.twitter);
let task_conn = conn.clone();
spawn(async move {
info!("Starting treating {}", &mastodon_config.twitter_screen_name);
// retrieve the last tweet ID for the username
let lconn = task_conn.lock().await;
let last_tweet_id = read_state(&lconn, &mastodon_config.twitter_screen_name, None)?
@@ -78,6 +104,7 @@ pub async fn run(config: Config) {
for tweet in &feed {
info!("Treating Tweet {} inside feed", tweet.id);
let lconn = task_conn.lock().await;
// initiate the toot_reply_id var and retrieve the corresponding toot_id
let toot_reply_id: Option<String> = tweet.in_reply_to_user_id.and_then(|_| {
@@ -92,7 +119,12 @@ pub async fn run(config: Config) {
drop(lconn);
// build basic status by just yielding text and dereferencing contained urls
let mut status_text = build_basic_status(tweet);
let mut status_text = build_basic_status(
tweet,
&scootaloo_mentions,
&display_url_re,
&scootaloo_alt_services,
);
// building associative media list
let (media_url, status_medias) =
@@ -106,10 +138,18 @@ pub async fn run(config: Config) {
status_builder.status(&status_text).media_ids(status_medias);
// theard if necessary
if let Some(i) = toot_reply_id {
status_builder.in_reply_to(&i);
}
// language if any
if let Some(l) = &tweet.lang {
if let Some(r) = Language::from_639_1(l) {
status_builder.language(r);
}
}
// can be activated for test purposes
// status_builder.visibility(elefren::status_builder::Visibility::Private);

View File

@@ -15,10 +15,7 @@ fn main() {
.short('c')
.long("config")
.value_name("CONFIG_FILE")
.help(&format!(
"TOML config file for scootaloo (default {})",
DEFAULT_CONFIG_PATH
))
.help("TOML config file for scootaloo")
.num_args(1)
.default_value(DEFAULT_CONFIG_PATH)
.display_order(1),
@@ -28,7 +25,7 @@ fn main() {
.short('l')
.long("loglevel")
.value_name("LOGLEVEL")
.help("Log level.Valid values are: Off, Warn, Error, Info, Debug")
.help("Log level")
.num_args(1)
.value_parser(["Off", "Warn", "Error", "Info", "Debug"])
.display_order(2),

View File

@@ -4,8 +4,9 @@ use egg_mode::{
entities::{MentionEntity, UrlEntity},
tweet::Tweet,
};
use elefren::{apps::App, prelude::*, scopes::Scopes};
use elefren::{apps::App, prelude::*, scopes::Read, scopes::Scopes, scopes::Write};
use html_escape::decode_html_entities;
use regex::Regex;
use std::{borrow::Cow, collections::HashMap, io::stdin};
/// Decodes the Twitter mention to something that will make sense once Twitter has joined the
@@ -22,14 +23,34 @@ fn twitter_mentions(ums: &[MentionEntity]) -> HashMap<String, String> {
}
/// Decodes urls from UrlEntities
fn decode_urls(urls: &[UrlEntity]) -> HashMap<String, String> {
fn decode_urls(
urls: &[UrlEntity],
re: &Option<Regex>,
alt_urls: &Option<HashMap<String, String>>,
) -> HashMap<String, String> {
urls.iter()
.filter(|s| s.expanded_url.is_some())
.map(|s| {
(
s.url.to_owned(),
s.expanded_url.as_deref().unwrap().to_owned(),
)
(s.url.to_owned(), {
let mut def = s.expanded_url.as_deref().unwrap().to_owned();
if let Some(a) = &alt_urls {
for (url_source, url_destination) in a {
def = def.replace(
&format!("https://{}", url_source),
&format!("https://{}", url_destination),
);
}
}
if let Some(r) = &re {
if r.is_match(s.expanded_url.as_deref().unwrap()) {
def = s.display_url.clone();
}
}
def
})
})
.collect()
}
@@ -48,14 +69,23 @@ pub fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
}
/// Builds toot text from tweet
pub fn build_basic_status(tweet: &Tweet) -> String {
pub fn build_basic_status(
tweet: &Tweet,
mentions: &HashMap<String, String>,
url_regex_filter: &Option<Regex>,
url_alt_services: &Option<HashMap<String, String>>,
) -> String {
let mut toot = tweet.text.to_owned();
for decoded_url in decode_urls(&tweet.entities.urls) {
for decoded_url in decode_urls(&tweet.entities.urls, url_regex_filter, url_alt_services) {
toot = toot.replace(&decoded_url.0, &decoded_url.1);
}
for decoded_mention in twitter_mentions(&tweet.entities.user_mentions) {
for decoded_mention in twitter_mentions(&tweet.entities.user_mentions)
.into_iter()
.chain(mentions.to_owned())
.collect::<HashMap<String, String>>()
{
toot = toot.replace(&decoded_mention.0, &decoded_mention.1);
}
@@ -70,7 +100,12 @@ pub fn register(host: &str, screen_name: &str) {
builder
.client_name(Cow::from(env!("CARGO_PKG_NAME").to_string()))
.redirect_uris(Cow::from("urn:ietf:wg:oauth:2.0:oob".to_string()))
.scopes(Scopes::write_all())
.scopes(
Scopes::write(Write::Accounts)
.and(Scopes::write(Write::Media))
.and(Scopes::write(Write::Statuses))
.and(Scopes::read(Read::Accounts)),
)
.website(Cow::from(
"https://framagit.org/veretcle/scootaloo".to_string(),
));
@@ -99,13 +134,19 @@ pub fn register(host: &str, screen_name: &str) {
let toml = toml::to_string(&*mastodon).unwrap();
let current_account = mastodon
.verify_credentials()
.expect("Unable to access account information!");
println!(
"Please insert the following block at the end of your configuration file:
[mastodon.{}]
twitter_screen_name = \"{}\"
mastodon_screen_name = \"{}\"
{}",
screen_name.to_lowercase(),
screen_name,
current_account.username,
toml
);
}
@@ -155,15 +196,51 @@ mod tests {
url: "https://t.me/tamerelol".to_string(),
};
let twitter_urls = vec![url_entity1, url_entity2];
let wrong_url_entity = UrlEntity {
display_url: "invité.es".to_string(),
expanded_url: Some("http://xn--invit-fsa.es".to_string()),
range: (85, 108),
url: "https://t.co/WAUgnpHLmo".to_string(),
};
let mut expected_urls = HashMap::new();
expected_urls.insert(
let rewritten_url_entity = UrlEntity {
display_url: "youtu.be/w5TrSaoYmZ8".to_string(),
expanded_url: Some("https://youtu.be/w5TrSaoYmZ8".to_string()),
range: (0, 23),
url: "https://t.co/fUVYXuF7tg".to_string(),
};
let re = Regex::new("(.+)\\.es$").ok();
let alt: HashMap<String, String> = HashMap::from([
("youtube.com".to_string(), "invidio.us".to_string()),
("youtu.be".to_string(), "invidio.us".to_string()),
("www.youtube.com".to_string(), "invidio.us".to_string()),
]);
let twitter_urls = vec![
url_entity1,
url_entity2,
wrong_url_entity,
rewritten_url_entity,
];
let expected_urls = HashMap::from([
(
"https://t.me/tamerelol".to_string(),
"https://www.nintendojo.fr/dojobar".to_string(),
);
),
(
"https://t.co/WAUgnpHLmo".to_string(),
"invité.es".to_string(),
),
(
"https://t.co/fUVYXuF7tg".to_string(),
"https://invidio.us/w5TrSaoYmZ8".to_string(),
),
]);
let decoded_urls = decode_urls(&twitter_urls);
let decoded_urls = decode_urls(&twitter_urls, &re, &Some(alt));
assert_eq!(expected_urls, decoded_urls);
}
@@ -192,7 +269,13 @@ mod tests {
range: (80, 95),
name: "Nintendo France".to_string(),
screen_name: "NintendoFrance".to_string(),
}
},
MentionEntity {
id: 999999999,
range: (80, 95),
name: "Willy Wonka".to_string(),
screen_name: "WillyWonka".to_string(),
},
],
media: None,
},
@@ -213,7 +296,7 @@ mod tests {
retweeted: None,
retweeted_status: None,
source: None,
text: "Mother 1 &amp; 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance https://t.co/zXw0FfX2Nt".to_string(),
text: "Mother 1 &amp; 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance @WillyWonka https://t.co/zXw0FfX2Nt".to_string(),
truncated: false,
user: None,
withheld_copyright: false,
@@ -221,8 +304,13 @@ mod tests {
withheld_scope: None,
};
let t_out = build_basic_status(&t);
let s: HashMap<String, String> = HashMap::from([(
"@WillyWonka".to_string(),
"@WillyWonka@chocolatefactory.org".to_string(),
)]);
assert_eq!(&t_out, "Mother 1 & 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance@twitter.com https://www.youtube.com/watch?v=w5TrSaoYmZ8");
let t_out = build_basic_status(&t, &s, &None, &None);
assert_eq!(&t_out, "Mother 1 & 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance@twitter.com @WillyWonka@chocolatefactory.org https://www.youtube.com/watch?v=w5TrSaoYmZ8");
}
}

View File

@@ -1,4 +1,33 @@
use scootaloo::parse_toml;
use std::collections::HashMap;
#[test]
fn test_alt_services() {
let toml = parse_toml("tests/no_test_alt_services.toml");
assert_eq!(toml.scootaloo.alternative_services_for, None);
let toml = parse_toml("tests/test_alt_services.toml");
assert_eq!(
toml.scootaloo.alternative_services_for,
Some(HashMap::from([
("tamere.lol".to_string(), "tonpere.mdr".to_string()),
("you.pi".to_string(), "you.pla".to_string())
]))
);
}
#[test]
fn test_re_display() {
let toml = parse_toml("tests/no_show_url_as_display_url_for.toml");
assert_eq!(toml.scootaloo.show_url_as_display_url_for, None);
let toml = parse_toml("tests/show_url_as_display_url_for.toml");
assert_eq!(
toml.scootaloo.show_url_as_display_url_for,
Some("^(.+)\\.es$".to_string())
);
}
#[test]
fn test_page_size() {
@@ -52,6 +81,29 @@ fn test_parse_good_toml_rate_limit() {
assert_eq!(parse_good_toml.scootaloo.rate_limit, Some(69 as usize));
}
#[test]
fn test_parse_good_toml_mastodon_screen_name() {
let parse_good_toml = parse_toml("tests/good_test_mastodon_screen_name.toml");
assert_eq!(
parse_good_toml
.mastodon
.get("0")
.unwrap()
.mastodon_screen_name,
Some("tarace".to_string())
);
assert_eq!(
parse_good_toml
.mastodon
.get("1")
.unwrap()
.mastodon_screen_name,
None
);
}
#[test]
fn test_parse_good_toml() {
let parse_good_toml = parse_toml("tests/good_test.toml");

View File

@@ -0,0 +1,28 @@
[scootaloo]
db_path="/var/random/scootaloo.sqlite"
cache_path="/tmp/scootaloo"
[twitter]
consumer_key="rand consumer key"
consumer_secret="secret"
access_key="rand access key"
access_secret="super secret"
[mastodon]
[mastodon.0]
twitter_screen_name="tamerelol"
mastodon_screen_name="tarace"
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"
[mastodon.1]
twitter_screen_name="tamerelol"
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"

View File

@@ -0,0 +1,19 @@
[scootaloo]
db_path="/var/random/scootaloo.sqlite"
cache_path="/tmp/scootaloo"
[twitter]
consumer_key="rand consumer key"
consumer_secret="secret"
access_key="rand access key"
access_secret="super secret"
[mastodon]
[mastodon.tamerelol]
twitter_screen_name="tamerelol"
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"

View File

@@ -0,0 +1,19 @@
[scootaloo]
db_path="/var/random/scootaloo.sqlite"
cache_path="/tmp/scootaloo"
[twitter]
consumer_key="rand consumer key"
consumer_secret="secret"
access_key="rand access key"
access_secret="super secret"
[mastodon]
[mastodon.tamerelol]
twitter_screen_name="tamerelol"
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"

View File

@@ -0,0 +1,20 @@
[scootaloo]
db_path="/var/random/scootaloo.sqlite"
cache_path="/tmp/scootaloo"
show_url_as_display_url_for = "^(.+)\\.es$"
[twitter]
consumer_key="rand consumer key"
consumer_secret="secret"
access_key="rand access key"
access_secret="super secret"
[mastodon]
[mastodon.tamerelol]
twitter_screen_name="tamerelol"
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"

View File

@@ -0,0 +1,22 @@
[scootaloo]
db_path="/var/random/scootaloo.sqlite"
cache_path="/tmp/scootaloo"
[scootaloo.alternative_services_for]
"tamere.lol" = "tonpere.mdr"
"you.pi" = "you.pla"
[twitter]
consumer_key="rand consumer key"
consumer_secret="secret"
access_key="rand access key"
access_secret="super secret"
[mastodon]
[mastodon.tamerelol]
twitter_screen_name="tamerelol"
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"