13 Commits

Author SHA1 Message Date
VC
6e23e0ab14 Merge branch 'cargo_lock' into 'master'
chore: bump version

See merge request veretcle/scootaloo!46
2022-11-25 20:00:55 +00:00
VC
c3862fea55 chore: bump version 2022-11-25 21:00:35 +01:00
VC
c0ae9dc52f Merge branch '10-implement-smart-quoting' into 'master'
feat: when quoting another Scootaloo instance user, try to find the...

Closes #10

See merge request veretcle/scootaloo!45
2022-11-25 19:50:41 +00:00
VC
2ae87b2767 feat: when quoting another Scootaloo instance user, try to find the corresponding toot to replace it inside the url entities 2022-11-25 19:57:04 +01:00
VC
0399623cfa chore: bump version 2022-11-25 19:57:02 +01:00
VC
895c41c75f chore: update crate + bump version 2022-11-25 09:53:01 +01:00
VC
63830be0d5 Merge branch '9-refactor-text-status-building' into 'master'
Refactor text status building

Closes #9

See merge request veretcle/scootaloo!44
2022-11-24 22:14:55 +00:00
VC
5633bf9187 refactor: twitter threading no longer on top 2022-11-24 23:10:30 +01:00
VC
f42aa8cbb6 refactor: build status text more progressively 2022-11-24 15:41:50 +01:00
VC
1132f41b9e chore: bump version 2022-11-24 15:25:17 +01:00
VC
70f8c14e99 doc: regexp + alt services 2022-11-24 08:00:46 +01:00
VC
faab50d1ea feat: main logic for regex + url filtering 2022-11-24 08:00:45 +01:00
VC
9cafa2bf07 test: add tests for scootaloo alt services + regexp 2022-11-24 08:00:43 +01:00
11 changed files with 696 additions and 270 deletions

192
Cargo.lock generated
View File

@@ -30,9 +30,9 @@ dependencies = [
[[package]]
name = "aho-corasick"
version = "0.7.19"
version = "0.7.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e"
checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
dependencies = [
"memchr",
]
@@ -52,7 +52,7 @@ version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"hermit-abi 0.1.19",
"libc",
"winapi 0.3.9",
]
@@ -82,7 +82,7 @@ dependencies = [
"cc",
"cfg-if 1.0.0",
"libc",
"miniz_oxide",
"miniz_oxide 0.5.4",
"object",
"rustc-demangle",
]
@@ -197,9 +197,9 @@ checksum = "0e4cec68f03f32e44924783795810fa50a7035d8c8ebe78580ad7e6c703fba38"
[[package]]
name = "bytes"
version = "1.2.1"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c"
[[package]]
name = "camino"
@@ -234,9 +234,9 @@ dependencies = [
[[package]]
name = "cc"
version = "1.0.76"
version = "1.0.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76a284da2e6fe2092f2353e51713435363112dfd60030e22add80be333fb928f"
checksum = "e9f73505338f7d905b19d18738976aae232eb46b8efc15554ffc56deb5d9ebe4"
[[package]]
name = "cfg-if"
@@ -268,13 +268,13 @@ dependencies = [
[[package]]
name = "clap"
version = "4.0.24"
version = "4.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60494cedb60cb47462c0ff7be53de32c0e42a6fc2c772184554fa12bd9489c03"
checksum = "0acbd8d28a0a60d7108d7ae850af6ba34cf2d1257fc646980e5f97ce14275966"
dependencies = [
"atty",
"bitflags",
"clap_lex",
"is-terminal",
"strsim",
"termcolor",
]
@@ -446,9 +446,9 @@ dependencies = [
[[package]]
name = "cxx"
version = "1.0.81"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97abf9f0eca9e52b7f81b945524e76710e6cb2366aead23b7d4fbf72e281f888"
checksum = "d4a41a86530d0fe7f5d9ea779916b7cadd2d4f9add748b99c2c029cbbdfaf453"
dependencies = [
"cc",
"cxxbridge-flags",
@@ -458,9 +458,9 @@ dependencies = [
[[package]]
name = "cxx-build"
version = "1.0.81"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7cc32cc5fea1d894b77d269ddb9f192110069a8a9c1f1d441195fba90553dea3"
checksum = "06416d667ff3e3ad2df1cd8cd8afae5da26cf9cec4d0825040f88b5ca659a2f0"
dependencies = [
"cc",
"codespan-reporting",
@@ -473,15 +473,15 @@ dependencies = [
[[package]]
name = "cxxbridge-flags"
version = "1.0.81"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ca220e4794c934dc6b1207c3b42856ad4c302f2df1712e9f8d2eec5afaacf1f"
checksum = "820a9a2af1669deeef27cb271f476ffd196a2c4b6731336011e0ba63e2c7cf71"
[[package]]
name = "cxxbridge-macro"
version = "1.0.81"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b846f081361125bfc8dc9d3940c84e1fd83ba54bbca7b17cd29483c828be0704"
checksum = "a08a6e2fcc370a089ad3b4aaf54db3b1b4cee38ddabce5896b33eb693275f470"
dependencies = [
"proc-macro2",
"quote",
@@ -560,7 +560,7 @@ dependencies = [
"serde_json",
"sha-1 0.9.8",
"thiserror",
"tokio 1.21.2",
"tokio 1.22.0",
"url 2.3.1",
]
@@ -603,6 +603,27 @@ dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "errno"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1"
dependencies = [
"errno-dragonfly",
"libc",
"winapi 0.3.9",
]
[[package]]
name = "errno-dragonfly"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "error-chain"
version = "0.12.4"
@@ -664,12 +685,12 @@ dependencies = [
[[package]]
name = "flate2"
version = "1.0.24"
version = "1.0.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6"
checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841"
dependencies = [
"crc32fast",
"miniz_oxide",
"miniz_oxide 0.6.2",
]
[[package]]
@@ -906,7 +927,7 @@ version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4"
dependencies = [
"bytes 1.2.1",
"bytes 1.3.0",
"fnv",
"futures-core",
"futures-sink",
@@ -914,7 +935,7 @@ dependencies = [
"http 0.2.8",
"indexmap",
"slab",
"tokio 1.21.2",
"tokio 1.22.0",
"tokio-util",
"tracing",
]
@@ -952,6 +973,15 @@ dependencies = [
"libc",
]
[[package]]
name = "hermit-abi"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
dependencies = [
"libc",
]
[[package]]
name = "hmac"
version = "0.11.0"
@@ -988,7 +1018,7 @@ version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399"
dependencies = [
"bytes 1.2.1",
"bytes 1.3.0",
"fnv",
"itoa 1.0.4",
]
@@ -1011,7 +1041,7 @@ version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1"
dependencies = [
"bytes 1.2.1",
"bytes 1.3.0",
"http 0.2.8",
"pin-project-lite",
]
@@ -1064,7 +1094,7 @@ version = "0.14.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "034711faac9d2166cb1baf1a2fb0b60b1f277f8492fd72176c17f3515e1abd3c"
dependencies = [
"bytes 1.2.1",
"bytes 1.3.0",
"futures-channel",
"futures-core",
"futures-util",
@@ -1076,7 +1106,7 @@ dependencies = [
"itoa 1.0.4",
"pin-project-lite",
"socket2",
"tokio 1.21.2",
"tokio 1.22.0",
"tower-service",
"tracing",
"want 0.3.0",
@@ -1118,10 +1148,10 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
dependencies = [
"bytes 1.2.1",
"bytes 1.3.0",
"hyper 0.14.23",
"native-tls",
"tokio 1.21.2",
"tokio 1.22.0",
"tokio-native-tls",
]
@@ -1183,9 +1213,9 @@ dependencies = [
[[package]]
name = "indexmap"
version = "1.9.1"
version = "1.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399"
dependencies = [
"autocfg 1.1.0",
"hashbrown 0.12.3",
@@ -1209,6 +1239,16 @@ dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "io-lifetimes"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7d367024b3f3414d8e01f437f704f41a9f64ab36f9067fa73e526ad4c763c87"
dependencies = [
"libc",
"windows-sys 0.42.0",
]
[[package]]
name = "iovec"
version = "0.1.4"
@@ -1224,6 +1264,18 @@ version = "2.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f88c5561171189e69df9d98bcf18fd5f9558300f7ea7b801eb8a0fd748bd8745"
[[package]]
name = "is-terminal"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aae5bc6e2eb41c9def29a3e0f1306382807764b9b53112030eff57435667352d"
dependencies = [
"hermit-abi 0.2.6",
"io-lifetimes",
"rustix",
"windows-sys 0.42.0",
]
[[package]]
name = "isolang"
version = "1.0.0"
@@ -1303,6 +1355,12 @@ dependencies = [
"cc",
]
[[package]]
name = "linux-raw-sys"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f9f08d8963a6c613f4b1a78f4f4a4dbfadf8e6545b2d72861731e4858b8b47f"
[[package]]
name = "lock_api"
version = "0.3.4"
@@ -1383,6 +1441,15 @@ dependencies = [
"adler",
]
[[package]]
name = "miniz_oxide"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa"
dependencies = [
"adler",
]
[[package]]
name = "mio"
version = "0.6.23"
@@ -1480,7 +1547,7 @@ version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6058e64324c71e02bc2b150e4f3bc8286db6c83092132ffa3f6b1eab0f9def5"
dependencies = [
"hermit-abi",
"hermit-abi 0.1.19",
"libc",
]
@@ -1522,9 +1589,9 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
[[package]]
name = "openssl"
version = "0.10.42"
version = "0.10.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12fc0523e3bd51a692c8850d075d74dc062ccf251c0110668cbd921917118a13"
checksum = "020433887e44c27ff16365eaa2d380547a94544ad509aff6eb5b6e3e0b27b376"
dependencies = [
"bitflags",
"cfg-if 1.0.0",
@@ -1554,9 +1621,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
[[package]]
name = "openssl-sys"
version = "0.9.77"
version = "0.9.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b03b84c3b2d099b81f0953422b4d4ad58761589d0229b5506356afca05a3670a"
checksum = "07d5c8cb6e57b3a3612064d7b18b117912b4ce70955c2504d4b741c9e244b132"
dependencies = [
"autocfg 1.1.0",
"cc",
@@ -1567,9 +1634,9 @@ dependencies = [
[[package]]
name = "os_str_bytes"
version = "6.4.0"
version = "6.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b5bf27447411e9ee3ff51186bf7a08e16c341efdde93f4d823e8844429bed7e"
checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee"
[[package]]
name = "parking_lot"
@@ -1996,12 +2063,12 @@ dependencies = [
[[package]]
name = "reqwest"
version = "0.11.12"
version = "0.11.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "431949c384f4e2ae07605ccaa56d1d9d2ecdb5cadd4f9577ccfab29f2e5149fc"
checksum = "68cc60575865c7831548863cc02356512e3f1dc2f3f82cb837d7fc4cc8f3c97c"
dependencies = [
"base64 0.13.1",
"bytes 1.2.1",
"bytes 1.3.0",
"encoding_rs",
"futures-core",
"futures-util",
@@ -2021,7 +2088,7 @@ dependencies = [
"serde",
"serde_json",
"serde_urlencoded 0.7.1",
"tokio 1.21.2",
"tokio 1.22.0",
"tokio-native-tls",
"tower-service",
"url 2.3.1",
@@ -2070,6 +2137,20 @@ dependencies = [
"semver 1.0.14",
]
[[package]]
name = "rustix"
version = "0.36.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b1fbb4dfc4eb1d390c02df47760bb19a84bb80b301ecc947ab5406394d8223e"
dependencies = [
"bitflags",
"errno",
"io-lifetimes",
"libc",
"linux-raw-sys",
"windows-sys 0.42.0",
]
[[package]]
name = "ryu"
version = "1.0.11"
@@ -2103,9 +2184,8 @@ dependencies = [
[[package]]
name = "scootaloo"
version = "0.11.0"
version = "0.12.2"
dependencies = [
"chrono",
"clap",
"egg-mode",
"elefren",
@@ -2114,11 +2194,11 @@ dependencies = [
"log",
"mime",
"regex",
"reqwest 0.11.12",
"reqwest 0.11.13",
"rusqlite",
"serde",
"simple_logger",
"tokio 1.21.2",
"tokio 1.22.0",
"toml",
]
@@ -2203,9 +2283,9 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.87"
version = "1.0.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce777b7b150d76b9cf60d28b55f5847135a003f7d7350c6be7a773508ce7d45"
checksum = "020ff22c755c2ed3f8cf162dbb41a7268d934702f3ed3631656ea597e08fc3db"
dependencies = [
"itoa 1.0.4",
"ryu",
@@ -2531,12 +2611,12 @@ dependencies = [
[[package]]
name = "tokio"
version = "1.21.2"
version = "1.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9e03c497dc955702ba729190dc4aac6f2a0ce97f913e5b1b5912fc5039d9099"
checksum = "d76ce4a75fb488c605c54bf610f221cea8b0dafb53333c1a67e8ee199dcd2ae3"
dependencies = [
"autocfg 1.1.0",
"bytes 1.2.1",
"bytes 1.3.0",
"libc",
"memchr",
"mio 0.8.5",
@@ -2609,7 +2689,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7d995660bd2b7f8c1568414c1126076c13fbb725c40112dc0120b78eb9b717b"
dependencies = [
"native-tls",
"tokio 1.21.2",
"tokio 1.22.0",
]
[[package]]
@@ -2690,11 +2770,11 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bb2e075f03b3d66d8d8785356224ba688d2906a371015e225beeb65ca92c740"
dependencies = [
"bytes 1.2.1",
"bytes 1.3.0",
"futures-core",
"futures-sink",
"pin-project-lite",
"tokio 1.21.2",
"tokio 1.22.0",
"tracing",
]

View File

@@ -1,13 +1,12 @@
[package]
name = "scootaloo"
version = "0.11.0"
version = "0.12.2"
authors = ["VC <veretcle+framagit@mateu.be>"]
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
chrono = "^0.4"
regex = "^1"
serde = { version = "1.0", features = ["derive"] }
toml = "^0.5"

View File

@@ -19,6 +19,19 @@ First up, create a configuration file (default path is `/usr/local/etc/scootaloo
db_path = "/var/lib/scootaloo/scootaloo.sqlite" ## file containing the SQLite Tweet corresponding Toot DB, must be writeable
cache_path = "/tmp/scootaloo" ## a dir where the temporary files will be download, must be writeable
rate_limiting = 4 ## optional, default 4, number of accounts handled simultaneously
## optional, this should be omitted the majority of the time
## sometimes, twitter try to use french inclusive writting, but instead of using `·` (median point), theyre using `.`
## this makes twitter interpret it as a URL, which is wrong
## this parameter allows you to catch such URLs and apply the `display_url` (i.e. `tout.es`) instead of the `expanded_url` (i.e. `http://tout.es`)
## in those particular cases
## (!) use with caution, it might have some undesired effects
show_url_as_display_url_for = "^http(s)://(.+)\\.es$"
## optional, this allows you to replace the host for popular services such as YouTube of Twitter, or any other
## with their more freely accessible equivalent
[scootaloo.alternative_services_for]
"tamere.lol" = "tonpere.mdr" ## quotes are necessary for both parameters
"you.pi" = "you.pla"
"www.you.pi" = "you.pla" ## this is an exact match, so youll need to lay out all the possibilities
[twitter]
## Consumer/Access key for Twitter (can be generated at https://developer.twitter.com/en/apps)

View File

@@ -19,7 +19,7 @@ pub struct TwitterConfig {
pub page_size: Option<i32>,
}
#[derive(Debug, Deserialize)]
#[derive(Debug, Deserialize, Clone)]
pub struct MastodonConfig {
pub twitter_screen_name: String,
pub mastodon_screen_name: Option<String>,
@@ -36,6 +36,8 @@ pub struct ScootalooConfig {
pub db_path: String,
pub cache_path: String,
pub rate_limit: Option<usize>,
pub show_url_as_display_url_for: Option<String>,
pub alternative_services_for: Option<HashMap<String, String>>,
}
/// Parses the TOML file into a Config Struct

View File

@@ -7,7 +7,7 @@ use config::Config;
mod mastodon;
pub use mastodon::register;
use mastodon::{build_basic_status, get_mastodon_token};
use mastodon::*;
mod twitter;
use twitter::*;
@@ -21,9 +21,11 @@ use state::{read_state, write_state, TweetToToot};
use elefren::{prelude::*, status_builder::StatusBuilder, Language};
use futures::StreamExt;
use html_escape::decode_html_entities;
use log::info;
use regex::Regex;
use rusqlite::Connection;
use std::{collections::HashMap, sync::Arc};
use std::sync::Arc;
use tokio::{spawn, sync::Mutex};
const DEFAULT_RATE_LIMIT: usize = 4;
@@ -42,21 +44,15 @@ pub async fn run(config: Config) {
}),
));
let scootaloo_mentions: HashMap<String, String> = config
.mastodon
.values()
.filter(|s| s.mastodon_screen_name.is_some())
.map(|s| {
(
format!("@{}", s.twitter_screen_name),
format!(
"@{}@{}",
s.mastodon_screen_name.as_ref().unwrap(),
s.base.split('/').last().unwrap()
),
)
})
.collect();
let global_mastodon_config = Arc::new(Mutex::new(config.mastodon.clone()));
let display_url_re = config
.scootaloo
.show_url_as_display_url_for
.as_ref()
.map(|r|
// we want to panic in case the RE is not valid
Regex::new(r).unwrap());
let mut stream = futures::stream::iter(config.mastodon.into_values())
.map(|mastodon_config| {
@@ -67,9 +63,11 @@ pub async fn run(config: Config) {
// create temporary value for each task
let scootaloo_cache_path = config.scootaloo.cache_path.clone();
let scootaloo_mentions = scootaloo_mentions.clone();
let scootaloo_alt_services = config.scootaloo.alternative_services_for.clone();
let display_url_re = display_url_re.clone();
let token = get_oauth2_token(&config.twitter);
let task_conn = conn.clone();
let global_mastodon_config = global_mastodon_config.clone();
spawn(async move {
info!("Starting treating {}", &mastodon_config.twitter_screen_name);
@@ -94,21 +92,71 @@ pub async fn run(config: Config) {
for tweet in &feed {
info!("Treating Tweet {} inside feed", tweet.id);
let lconn = task_conn.lock().await;
// initiate the toot_reply_id var and retrieve the corresponding toot_id
let toot_reply_id: Option<String> = tweet.in_reply_to_user_id.and_then(|_| {
read_state(
&lconn,
&mastodon_config.twitter_screen_name,
tweet.in_reply_to_status_id,
)
.unwrap_or(None)
.map(|s| s.toot_id)
});
drop(lconn);
// basic toot text
let mut status_text = tweet.text.clone();
// build basic status by just yielding text and dereferencing contained urls
let mut status_text = build_basic_status(tweet, &scootaloo_mentions);
// add mentions and smart mentions
if !&tweet.entities.user_mentions.is_empty() {
info!("Tweet contains mentions, add them!");
let global_mastodon_config = global_mastodon_config.lock().await;
twitter_mentions(
&mut status_text,
&tweet.entities.user_mentions,
&global_mastodon_config,
);
drop(global_mastodon_config);
}
if !&tweet.entities.urls.is_empty() {
info!("Tweet contains links, add them!");
let mut associated_urls =
associate_urls(&tweet.entities.urls, &display_url_re);
if let Some(q) = &tweet.quoted_status {
if let Some(u) = &q.user {
info!(
"Tweet {} contains a quote, we try to find it within the DB",
tweet.id
);
// we know we have a quote and a user, we can lock both the
// connection to DB and global_config
// we will release them manually as soon as theyre useless
let lconn = task_conn.lock().await;
let global_mastodon_config = global_mastodon_config.lock().await;
if let Ok(Some(r)) = read_state(&lconn, &u.screen_name, Some(q.id))
{
info!("We have found the associated toot({})", &r.toot_id);
// drop conn immediately after the request: we wont need it
// any more and the treatment there might be time-consuming
drop(lconn);
if let Some((m, t)) =
find_mastodon_screen_name_by_twitter_screen_name(
&r.twitter_screen_name,
&global_mastodon_config,
)
{
// drop the global conf, we have all we required, no need
// to block it further
drop(global_mastodon_config);
replace_tweet_by_toot(
&mut associated_urls,
&r.twitter_screen_name,
q.id,
&m,
&t,
&r.toot_id,
);
}
}
}
}
if let Some(a) = &scootaloo_alt_services {
replace_alt_services(&mut associated_urls, a);
}
decode_urls(&mut status_text, &associated_urls);
}
// building associative media list
let (media_url, status_medias) =
@@ -116,15 +164,27 @@ pub async fn run(config: Config) {
status_text = status_text.replace(&media_url, "");
// now that the text wont be altered anymore, we can safely remove HTML
// entities
status_text = decode_html_entities(&status_text).to_string();
info!("Building corresponding Mastodon status");
let mut status_builder = StatusBuilder::new();
status_builder.status(&status_text).media_ids(status_medias);
status_builder.status(status_text).media_ids(status_medias);
// theard if necessary
if let Some(i) = toot_reply_id {
status_builder.in_reply_to(&i);
// thread if necessary
if tweet.in_reply_to_user_id.is_some() {
let lconn = task_conn.lock().await;
if let Ok(Some(r)) = read_state(
&lconn,
&mastodon_config.twitter_screen_name,
tweet.in_reply_to_status_id,
) {
status_builder.in_reply_to(&r.toot_id);
}
drop(lconn);
}
// language if any

View File

@@ -1,58 +1,121 @@
use crate::config::MastodonConfig;
use egg_mode::{
entities::{MentionEntity, UrlEntity},
tweet::Tweet,
};
use egg_mode::entities::{MentionEntity, UrlEntity};
use elefren::{apps::App, prelude::*, scopes::Read, scopes::Scopes, scopes::Write};
use html_escape::decode_html_entities;
use regex::Regex;
use std::{borrow::Cow, collections::HashMap, io::stdin};
/// Decodes the Twitter mention to something that will make sense once Twitter has joined the
/// Fediverse
fn twitter_mentions(ums: &[MentionEntity]) -> HashMap<String, String> {
ums.iter()
/// Fediverse. Users in the global user list of Scootaloo are rewritten, as they are Mastodon users
/// as well
pub fn twitter_mentions(
toot: &mut String,
ums: &[MentionEntity],
masto: &HashMap<String, MastodonConfig>,
) {
let tm: HashMap<String, String> = ums
.iter()
.map(|s| {
(
format!("@{}", s.screen_name),
format!("@{}@twitter.com", s.screen_name),
)
})
.collect()
.chain(
masto
.values()
.filter(|s| s.mastodon_screen_name.is_some())
.map(|s| {
(
format!("@{}", s.twitter_screen_name),
format!(
"@{}@{}",
s.mastodon_screen_name.as_ref().unwrap(),
s.base.split('/').last().unwrap()
),
)
})
.collect::<HashMap<String, String>>(),
)
.collect();
for (k, v) in tm {
*toot = toot.replace(&k, &v);
}
}
/// Decodes urls from UrlEntities
fn decode_urls(
urls: &[UrlEntity],
re: &Option<Regex>,
alt_urls: &Option<HashMap<String, String>>,
) -> HashMap<String, String> {
/// Decodes urls in toot
pub fn decode_urls(toot: &mut String, urls: &HashMap<String, String>) {
for (k, v) in urls {
*toot = toot.replace(k, v);
}
}
/// Reassociates source url with destination url for rewritting
/// this takes a Teet UrlEntity and an optional Regex
pub fn associate_urls(urls: &[UrlEntity], re: &Option<Regex>) -> HashMap<String, String> {
urls.iter()
.filter(|s| s.expanded_url.is_some())
.map(|s| {
(s.url.to_owned(), {
let mut def = s.expanded_url.as_deref().unwrap().to_owned();
if let Some(a) = &alt_urls {
for (url_source, url_destination) in a {
def = def.replace(
&format!("https://{}", url_source),
&format!("https://{}", url_destination),
);
}
}
if let Some(r) = &re {
if let Some(r) = re {
if r.is_match(s.expanded_url.as_deref().unwrap()) {
def = s.display_url.clone();
def = s.display_url.to_owned();
}
}
def
})
})
.collect()
.collect::<HashMap<String, String>>()
}
/// Replaces the commonly used services by mirrors, if asked to
pub fn replace_alt_services(urls: &mut HashMap<String, String>, alts: &HashMap<String, String>) {
for val in urls.values_mut() {
for (k, v) in alts {
*val = val.replace(&format!("/{}/", k), &format!("/{}/", v));
}
}
}
/// Finds a Mastodon screen_name/base_url from a MastodonConfig
pub fn find_mastodon_screen_name_by_twitter_screen_name(
twitter_screen_name: &str,
masto: &HashMap<String, MastodonConfig>,
) -> Option<(String, String)> {
masto.iter().find_map(|(_, v)| {
if twitter_screen_name == v.twitter_screen_name && v.mastodon_screen_name.is_some() {
Some((
v.mastodon_screen_name.as_ref().unwrap().to_owned(),
v.base.to_owned(),
))
} else {
None
}
})
}
/// Replaces the original quoted tweet by the corresponding toot
pub fn replace_tweet_by_toot(
urls: &mut HashMap<String, String>,
twitter_screen_name: &str,
tweet_id: u64,
mastodon_screen_name: &str,
base_url: &str,
toot_id: &str,
) {
for val in urls.values_mut() {
if val.to_lowercase().starts_with(&format!(
"https://twitter.com/{}/status/{}",
twitter_screen_name.to_lowercase(),
tweet_id
)) {
*val = format!("{}/@{}/{}", base_url, mastodon_screen_name, toot_id);
}
}
}
/// Gets Mastodon Data
@@ -68,30 +131,6 @@ pub fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
Mastodon::from(data)
}
/// Builds toot text from tweet
pub fn build_basic_status(
tweet: &Tweet,
mentions: &HashMap<String, String>,
url_regex_filter: &Option<Regex>,
url_alt_services: &Option<HashMap<String, String>>,
) -> String {
let mut toot = tweet.text.to_owned();
for decoded_url in decode_urls(&tweet.entities.urls, url_regex_filter, url_alt_services) {
toot = toot.replace(&decoded_url.0, &decoded_url.1);
}
for decoded_mention in twitter_mentions(&tweet.entities.user_mentions)
.into_iter()
.chain(mentions.to_owned())
.collect::<HashMap<String, String>>()
{
toot = toot.replace(&decoded_mention.0, &decoded_mention.1);
}
decode_html_entities(&toot).to_string()
}
/// Generic register function
/// As this function is supposed to be run only once, it will panic for every error it encounters
/// Most of this function is a direct copy/paste of the official `elefren` crate
@@ -155,74 +194,209 @@ mastodon_screen_name = \"{}\"
mod tests {
use super::*;
use chrono::prelude::*;
use egg_mode::tweet::TweetEntities;
#[test]
fn test_replace_tweet_by_toot() {
let mut associated_urls = HashMap::from([
(
"https://t.co/perdudeouf".to_string(),
"https://www.perdu.com".to_string(),
),
(
"https://t.co/realquoteshere".to_string(),
"https://twitter.com/nintendojofr/status/1590047921633755136".to_string(),
),
(
"https://t.co/almostthere".to_string(),
"https://twitter.com/NintendojoFR/status/nope".to_string(),
),
(
"http://t.co/yetanotherone".to_string(),
"https://twitter.com/NINTENDOJOFR/status/1590047921633755136".to_string(),
),
]);
let expected_urls = HashMap::from([
(
"https://t.co/perdudeouf".to_string(),
"https://www.perdu.com".to_string(),
),
(
"https://t.co/realquoteshere".to_string(),
"https://m.nintendojo.fr/@nintendojofr/109309605486908797".to_string(),
),
(
"https://t.co/almostthere".to_string(),
"https://twitter.com/NintendojoFR/status/nope".to_string(),
),
(
"http://t.co/yetanotherone".to_string(),
"https://m.nintendojo.fr/@nintendojofr/109309605486908797".to_string(),
),
]);
replace_tweet_by_toot(
&mut associated_urls,
"NintendojoFR",
1590047921633755136,
"nintendojofr",
"https://m.nintendojo.fr",
"109309605486908797",
);
assert_eq!(associated_urls, expected_urls);
}
#[test]
fn test_find_mastodon_screen_name_by_twitter_screen_name() {
let masto_config = HashMap::from([
(
"test".to_string(),
MastodonConfig {
twitter_screen_name: "tonpere".to_string(),
mastodon_screen_name: Some("lalali".to_string()),
twitter_page_size: None,
base: "https://mstdn.net".to_string(),
client_id: "".to_string(),
client_secret: "".to_string(),
redirect: "".to_string(),
token: "".to_string(),
},
),
(
"test2".to_string(),
MastodonConfig {
twitter_screen_name: "tamerelol".to_string(),
mastodon_screen_name: None,
twitter_page_size: None,
base: "https://mastoot.fr".to_string(),
client_id: "".to_string(),
client_secret: "".to_string(),
redirect: "".to_string(),
token: "".to_string(),
},
),
(
"test3".to_string(),
MastodonConfig {
twitter_screen_name: "NintendojoFR".to_string(),
mastodon_screen_name: Some("nintendojofr".to_string()),
twitter_page_size: None,
base: "https://m.nintendojo.fr".to_string(),
client_id: "".to_string(),
client_secret: "".to_string(),
redirect: "".to_string(),
token: "".to_string(),
},
),
]);
// case sensitiveness, to avoid any mistake
assert_eq!(
None,
find_mastodon_screen_name_by_twitter_screen_name("nintendojofr", &masto_config)
);
assert_eq!(
Some((
"nintendojofr".to_string(),
"https://m.nintendojo.fr".to_string()
)),
find_mastodon_screen_name_by_twitter_screen_name("NintendojoFR", &masto_config)
);
// should return None if twitter_screen_name is undefined
assert_eq!(
None,
find_mastodon_screen_name_by_twitter_screen_name("tamerelol", &masto_config)
);
assert_eq!(
Some(("lalali".to_string(), "https://mstdn.net".to_string())),
find_mastodon_screen_name_by_twitter_screen_name("tonpere", &masto_config)
);
}
#[test]
fn test_twitter_mentions() {
let mention_entity = MentionEntity {
let mention_entities = vec![
MentionEntity {
id: 12345,
range: (1, 3),
name: "Ta Mere l0l".to_string(),
screen_name: "tamerelol".to_string(),
};
},
MentionEntity {
id: 6789,
range: (1, 3),
name: "TONPERE".to_string(),
screen_name: "tonpere".to_string(),
},
];
let twitter_ums = vec![mention_entity];
let mut toot = ":kikoo: @tamerelol @tonpere !".to_string();
let mut expected_mentions = HashMap::new();
expected_mentions.insert(
"@tamerelol".to_string(),
"@tamerelol@twitter.com".to_string(),
);
let masto_config = HashMap::from([(
"test".to_string(),
(MastodonConfig {
twitter_screen_name: "tonpere".to_string(),
mastodon_screen_name: Some("lalali".to_string()),
twitter_page_size: None,
base: "https://mstdn.net".to_string(),
client_id: "".to_string(),
client_secret: "".to_string(),
redirect: "".to_string(),
token: "".to_string(),
}),
)]);
let decoded_mentions = twitter_mentions(&twitter_ums);
twitter_mentions(&mut toot, &mention_entities, &masto_config);
assert_eq!(expected_mentions, decoded_mentions);
assert_eq!(&toot, ":kikoo: @tamerelol@twitter.com @lalali@mstdn.net !");
}
#[test]
fn test_decode_urls() {
let url_entity1 = UrlEntity {
let urls = HashMap::from([
(
"https://t.co/thisisatest".to_string(),
"https://www.nintendojo.fr/dojobar".to_string(),
),
(
"https://t.co/nopenotinclusive".to_string(),
"invité.es".to_string(),
),
]);
let mut toot =
"Rendez-vous sur https://t.co/thisisatest avec nos https://t.co/nopenotinclusive !"
.to_string();
decode_urls(&mut toot, &urls);
assert_eq!(
&toot,
"Rendez-vous sur https://www.nintendojo.fr/dojobar avec nos invité.es !"
);
}
#[test]
fn test_associate_urls() {
let urls = vec![
UrlEntity {
display_url: "tamerelol".to_string(),
expanded_url: Some("https://www.nintendojo.fr/dojobar".to_string()),
range: (1, 3),
url: "https://t.me/tamerelol".to_string(),
};
let url_entity2 = UrlEntity {
display_url: "tamerelol".to_string(),
},
UrlEntity {
display_url: "sadcat".to_string(),
expanded_url: None,
range: (1, 3),
url: "https://t.me/tamerelol".to_string(),
};
let wrong_url_entity = UrlEntity {
url: "https://t.me/sadcat".to_string(),
},
UrlEntity {
display_url: "invité.es".to_string(),
expanded_url: Some("http://xn--invit-fsa.es".to_string()),
range: (85, 108),
url: "https://t.co/WAUgnpHLmo".to_string(),
};
let rewritten_url_entity = UrlEntity {
display_url: "youtu.be/w5TrSaoYmZ8".to_string(),
expanded_url: Some("https://youtu.be/w5TrSaoYmZ8".to_string()),
range: (0, 23),
url: "https://t.co/fUVYXuF7tg".to_string(),
};
let re = Regex::new("(.+)\\.es$").ok();
let alt: HashMap<String, String> = HashMap::from([
("youtube.com".to_string(), "invidio.us".to_string()),
("youtu.be".to_string(), "invidio.us".to_string()),
("www.youtube.com".to_string(), "invidio.us".to_string()),
]);
let twitter_urls = vec![
url_entity1,
url_entity2,
wrong_url_entity,
rewritten_url_entity,
},
];
let expected_urls = HashMap::from([
@@ -234,83 +408,72 @@ mod tests {
"https://t.co/WAUgnpHLmo".to_string(),
"invité.es".to_string(),
),
(
"https://t.co/fUVYXuF7tg".to_string(),
"https://invidio.us/w5TrSaoYmZ8".to_string(),
),
]);
let decoded_urls = decode_urls(&twitter_urls, &re, &Some(alt));
let re = Regex::new("(.+)\\.es$").ok();
assert_eq!(expected_urls, decoded_urls);
let associated_urls = associate_urls(&urls, &re);
assert_eq!(associated_urls, expected_urls);
}
#[test]
fn test_build_basic_status() {
let t = Tweet {
coordinates: None,
created_at: Utc::now(),
current_user_retweet: None,
display_text_range: None,
entities: TweetEntities {
hashtags: vec![],
symbols: vec![],
urls: vec![
UrlEntity {
display_url: "youtube.com/watch?v=w5TrSa…".to_string(),
expanded_url: Some("https://www.youtube.com/watch?v=w5TrSaoYmZ8".to_string()),
range: (93, 116),
url: "https://t.co/zXw0FfX2Nt".to_string(),
}
],
user_mentions: vec![
MentionEntity {
id: 491500016,
range: (80, 95),
name: "Nintendo France".to_string(),
screen_name: "NintendoFrance".to_string(),
},
MentionEntity {
id: 999999999,
range: (80, 95),
name: "Willy Wonka".to_string(),
screen_name: "WillyWonka".to_string(),
},
],
media: None,
},
extended_entities: None,
favorite_count: 0,
favorited: None,
filter_level: None,
id: 1491541246984306693,
in_reply_to_user_id: None,
in_reply_to_screen_name: None,
in_reply_to_status_id: None,
lang: None,
place: None,
possibly_sensitive: None,
quoted_status: None,
quoted_status_id: None,
retweet_count: 0,
retweeted: None,
retweeted_status: None,
source: None,
text: "Mother 1 &amp; 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance @WillyWonka https://t.co/zXw0FfX2Nt".to_string(),
truncated: false,
user: None,
withheld_copyright: false,
withheld_in_countries: None,
withheld_scope: None,
};
fn test_replace_alt_services() {
let mut associated_urls = HashMap::from([
(
"https://t.co/youplaboom".to_string(),
"https://www.youtube.com/watch?v=dQw4w9WgXcQ".to_string(),
),
(
"https://t.co/thisisfine".to_string(),
"https://twitter.com/Nintendo/status/1594590628771688448".to_string(),
),
(
"https://t.co/nopenope".to_string(),
"https://www.nintendojo.fr/dojobar".to_string(),
),
(
"https://t.co/broken".to_string(),
"http://youtu.be".to_string(),
),
(
"https://t.co/alsobroken".to_string(),
"https://youtube.com".to_string(),
),
]);
let s: HashMap<String, String> = HashMap::from([(
"@WillyWonka".to_string(),
"@WillyWonka@chocolatefactory.org".to_string(),
)]);
let alt_services = HashMap::from([
("twitter.com".to_string(), "nitter.net".to_string()),
("youtu.be".to_string(), "invidio.us".to_string()),
("www.youtube.com".to_string(), "invidio.us".to_string()),
("youtube.com".to_string(), "invidio.us".to_string()),
]);
let t_out = build_basic_status(&t, &s, &None, &None);
let expected_urls = HashMap::from([
(
"https://t.co/youplaboom".to_string(),
"https://invidio.us/watch?v=dQw4w9WgXcQ".to_string(),
),
(
"https://t.co/thisisfine".to_string(),
"https://nitter.net/Nintendo/status/1594590628771688448".to_string(),
),
(
"https://t.co/nopenope".to_string(),
"https://www.nintendojo.fr/dojobar".to_string(),
),
(
"https://t.co/broken".to_string(),
"http://youtu.be".to_string(),
),
(
"https://t.co/alsobroken".to_string(),
"https://youtube.com".to_string(),
),
]);
assert_eq!(&t_out, "Mother 1 & 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance@twitter.com @WillyWonka@chocolatefactory.org https://www.youtube.com/watch?v=w5TrSaoYmZ8");
replace_alt_services(&mut associated_urls, &alt_services);
assert_eq!(associated_urls, expected_urls);
}
}

View File

@@ -1,4 +1,33 @@
use scootaloo::parse_toml;
use std::collections::HashMap;
#[test]
fn test_alt_services() {
let toml = parse_toml("tests/no_test_alt_services.toml");
assert_eq!(toml.scootaloo.alternative_services_for, None);
let toml = parse_toml("tests/test_alt_services.toml");
assert_eq!(
toml.scootaloo.alternative_services_for,
Some(HashMap::from([
("tamere.lol".to_string(), "tonpere.mdr".to_string()),
("you.pi".to_string(), "you.pla".to_string())
]))
);
}
#[test]
fn test_re_display() {
let toml = parse_toml("tests/no_show_url_as_display_url_for.toml");
assert_eq!(toml.scootaloo.show_url_as_display_url_for, None);
let toml = parse_toml("tests/show_url_as_display_url_for.toml");
assert_eq!(
toml.scootaloo.show_url_as_display_url_for,
Some("^(.+)\\.es$".to_string())
);
}
#[test]
fn test_page_size() {

View File

@@ -0,0 +1,19 @@
[scootaloo]
db_path="/var/random/scootaloo.sqlite"
cache_path="/tmp/scootaloo"
[twitter]
consumer_key="rand consumer key"
consumer_secret="secret"
access_key="rand access key"
access_secret="super secret"
[mastodon]
[mastodon.tamerelol]
twitter_screen_name="tamerelol"
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"

View File

@@ -0,0 +1,19 @@
[scootaloo]
db_path="/var/random/scootaloo.sqlite"
cache_path="/tmp/scootaloo"
[twitter]
consumer_key="rand consumer key"
consumer_secret="secret"
access_key="rand access key"
access_secret="super secret"
[mastodon]
[mastodon.tamerelol]
twitter_screen_name="tamerelol"
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"

View File

@@ -0,0 +1,20 @@
[scootaloo]
db_path="/var/random/scootaloo.sqlite"
cache_path="/tmp/scootaloo"
show_url_as_display_url_for = "^(.+)\\.es$"
[twitter]
consumer_key="rand consumer key"
consumer_secret="secret"
access_key="rand access key"
access_secret="super secret"
[mastodon]
[mastodon.tamerelol]
twitter_screen_name="tamerelol"
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"

View File

@@ -0,0 +1,22 @@
[scootaloo]
db_path="/var/random/scootaloo.sqlite"
cache_path="/tmp/scootaloo"
[scootaloo.alternative_services_for]
"tamere.lol" = "tonpere.mdr"
"you.pi" = "you.pla"
[twitter]
consumer_key="rand consumer key"
consumer_secret="secret"
access_key="rand access key"
access_secret="super secret"
[mastodon]
[mastodon.tamerelol]
twitter_screen_name="tamerelol"
base = "https://m.nintendojo.fr"
client_id = "rand client id"
client_secret = "secret"
redirect = "urn:ietf:wg:oauth:2.0:oob"
token = "super secret"