feat: add necessary changes to decode_urls fn

This commit is contained in:
VC
2022-11-22 23:54:26 +01:00
committed by Clément VERET
parent 64d72ea69d
commit 9227850c99

View File

@@ -6,6 +6,7 @@ use egg_mode::{
};
use elefren::{apps::App, prelude::*, scopes::Read, scopes::Scopes, scopes::Write};
use html_escape::decode_html_entities;
use regex::Regex;
use std::{borrow::Cow, collections::HashMap, io::stdin};
/// Decodes the Twitter mention to something that will make sense once Twitter has joined the
@@ -22,14 +23,34 @@ fn twitter_mentions(ums: &[MentionEntity]) -> HashMap<String, String> {
}
/// Decodes urls from UrlEntities
fn decode_urls(urls: &[UrlEntity]) -> HashMap<String, String> {
fn decode_urls(
urls: &[UrlEntity],
re: &Option<Regex>,
alt_urls: &Option<HashMap<String, String>>,
) -> HashMap<String, String> {
urls.iter()
.filter(|s| s.expanded_url.is_some())
.map(|s| {
(
s.url.to_owned(),
s.expanded_url.as_deref().unwrap().to_owned(),
)
(s.url.to_owned(), {
let mut def = s.expanded_url.as_deref().unwrap().to_owned();
if let Some(a) = &alt_urls {
for (url_source, url_destination) in a {
def = def.replace(
&format!("https://{}", url_source),
&format!("https://{}", url_destination),
);
}
}
if let Some(r) = &re {
if r.is_match(s.expanded_url.as_deref().unwrap()) {
def = s.display_url.clone();
}
}
def
})
})
.collect()
}
@@ -48,10 +69,15 @@ pub fn get_mastodon_token(masto: &MastodonConfig) -> Mastodon {
}
/// Builds toot text from tweet
pub fn build_basic_status(tweet: &Tweet, mentions: &HashMap<String, String>) -> String {
pub fn build_basic_status(
tweet: &Tweet,
mentions: &HashMap<String, String>,
url_regex_filter: &Option<Regex>,
url_alt_services: &Option<HashMap<String, String>>,
) -> String {
let mut toot = tweet.text.to_owned();
for decoded_url in decode_urls(&tweet.entities.urls) {
for decoded_url in decode_urls(&tweet.entities.urls, url_regex_filter, url_alt_services) {
toot = toot.replace(&decoded_url.0, &decoded_url.1);
}
@@ -170,15 +196,51 @@ mod tests {
url: "https://t.me/tamerelol".to_string(),
};
let twitter_urls = vec![url_entity1, url_entity2];
let wrong_url_entity = UrlEntity {
display_url: "invité.es".to_string(),
expanded_url: Some("http://xn--invit-fsa.es".to_string()),
range: (85, 108),
url: "https://t.co/WAUgnpHLmo".to_string(),
};
let mut expected_urls = HashMap::new();
expected_urls.insert(
"https://t.me/tamerelol".to_string(),
"https://www.nintendojo.fr/dojobar".to_string(),
);
let rewritten_url_entity = UrlEntity {
display_url: "youtu.be/w5TrSaoYmZ8".to_string(),
expanded_url: Some("https://youtu.be/w5TrSaoYmZ8".to_string()),
range: (0, 23),
url: "https://t.co/fUVYXuF7tg".to_string(),
};
let decoded_urls = decode_urls(&twitter_urls);
let re = Regex::new("(.+)\\.es$").ok();
let alt: HashMap<String, String> = HashMap::from([
("youtube.com".to_string(), "invidio.us".to_string()),
("youtu.be".to_string(), "invidio.us".to_string()),
("www.youtube.com".to_string(), "invidio.us".to_string()),
]);
let twitter_urls = vec![
url_entity1,
url_entity2,
wrong_url_entity,
rewritten_url_entity,
];
let expected_urls = HashMap::from([
(
"https://t.me/tamerelol".to_string(),
"https://www.nintendojo.fr/dojobar".to_string(),
),
(
"https://t.co/WAUgnpHLmo".to_string(),
"invité.es".to_string(),
),
(
"https://t.co/fUVYXuF7tg".to_string(),
"https://invidio.us/w5TrSaoYmZ8".to_string(),
),
]);
let decoded_urls = decode_urls(&twitter_urls, &re, &Some(alt));
assert_eq!(expected_urls, decoded_urls);
}
@@ -247,7 +309,7 @@ mod tests {
"@WillyWonka@chocolatefactory.org".to_string(),
)]);
let t_out = build_basic_status(&t, &s);
let t_out = build_basic_status(&t, &s, &None, &None);
assert_eq!(&t_out, "Mother 1 & 2 sur le NES/SNES online !\nDispo maintenant. cc @NintendoFrance@twitter.com @WillyWonka@chocolatefactory.org https://www.youtube.com/watch?v=w5TrSaoYmZ8");
}