Merge branch '2-find-a-way-to-remove-dissolve' into 'main'

feat: remove dissolve + add simpler html tag stripper + html entities

Closes #2

See merge request veretcle/oolatoocs!4
This commit is contained in:
VC
2023-11-17 19:30:36 +00:00
4 changed files with 117 additions and 411 deletions

449
Cargo.lock generated
View File

@@ -107,19 +107,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9"
dependencies = [
"proc-macro2",
"quote 1.0.33",
"quote",
"syn 2.0.38",
]
[[package]]
name = "autocfg"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0dde43e75fd43e8a1bf86103336bc699aa8d17ad1be60c76c0bdfd4828e19b78"
dependencies = [
"autocfg 1.1.0",
]
[[package]]
name = "autocfg"
version = "1.1.0"
@@ -249,15 +240,6 @@ version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961"
[[package]]
name = "cloudabi"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
dependencies = [
"bitflags 1.3.2",
]
[[package]]
name = "colorchoice"
version = "1.0.0"
@@ -305,15 +287,6 @@ version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308"
[[package]]
name = "debug_unreachable"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a032eac705ca39214d169f83e3d3da290af06d8d1d344d1baad2fd002dca4b3"
dependencies = [
"unreachable",
]
[[package]]
name = "digest"
version = "0.10.7"
@@ -325,16 +298,6 @@ dependencies = [
"subtle",
]
[[package]]
name = "dissolve"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "898542be4716d992082c8e4fc331b792d626cfa71cb2b4790f828b9a8f921a90"
dependencies = [
"html5ever",
"tendril",
]
[[package]]
name = "encoding_rs"
version = "0.8.33"
@@ -352,7 +315,7 @@ checksum = "85cdab6a89accf66733ad5a1693a4dcced6aeff64602b634530dd73c1f3ee9f0"
dependencies = [
"humantime",
"is-terminal",
"log 0.4.20",
"log",
"regex",
"termcolor",
]
@@ -427,22 +390,6 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
[[package]]
name = "futf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]]
name = "futures"
version = "0.3.29"
@@ -498,7 +445,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb"
dependencies = [
"proc-macro2",
"quote 1.0.33",
"quote",
"syn 2.0.38",
]
@@ -632,16 +579,12 @@ dependencies = [
]
[[package]]
name = "html5ever"
version = "0.21.0"
name = "html-escape"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba3a1fd1857a714d410c191364c5d7bf8a6487c0ab5575146d37dd7eb17ef523"
checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476"
dependencies = [
"log 0.3.9",
"mac",
"markup5ever",
"quote 0.3.15",
"syn 0.11.11",
"utf8-width",
]
[[package]]
@@ -774,7 +717,7 @@ version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg 1.1.0",
"autocfg",
"hashbrown 0.12.3",
]
@@ -820,12 +763,6 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73"
[[package]]
name = "lazy_static"
version = "1.4.0"
@@ -860,45 +797,16 @@ version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"
dependencies = [
"autocfg 1.1.0",
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
dependencies = [
"log 0.4.20",
]
[[package]]
name = "log"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "markup5ever"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2cf89d3e0486c32c9d99521455ddf9a438910a1ce2bd376936086edc15dff5fc"
dependencies = [
"phf",
"phf_codegen",
"rustc-serialize",
"string_cache",
"string_cache_codegen",
"tendril",
]
[[package]]
name = "megalodon"
version = "0.11.6"
@@ -910,9 +818,9 @@ dependencies = [
"futures-util",
"hex",
"http",
"log 0.4.20",
"log",
"oauth2",
"rand 0.8.5",
"rand",
"regex",
"reqwest",
"serde",
@@ -975,9 +883,9 @@ version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e"
dependencies = [
"lazy_static 1.4.0",
"lazy_static",
"libc",
"log 0.4.20",
"log",
"openssl",
"openssl-probe",
"openssl-sys",
@@ -987,19 +895,13 @@ dependencies = [
"tempfile",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
[[package]]
name = "num-traits"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
dependencies = [
"autocfg 1.1.0",
"autocfg",
]
[[package]]
@@ -1032,7 +934,7 @@ dependencies = [
"oauth-credentials",
"oauth1-request-derive",
"percent-encoding",
"rand 0.8.5",
"rand",
"sha-1",
]
@@ -1045,7 +947,7 @@ dependencies = [
"proc-macro-crate",
"proc-macro-error",
"proc-macro2",
"quote 1.0.33",
"quote",
"syn 1.0.109",
]
@@ -1059,7 +961,7 @@ dependencies = [
"chrono",
"getrandom",
"http",
"rand 0.8.5",
"rand",
"reqwest",
"serde",
"serde_json",
@@ -1086,13 +988,13 @@ checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "oolatoocs"
version = "1.1.0"
version = "1.2.0"
dependencies = [
"clap",
"dissolve",
"env_logger",
"futures",
"log 0.4.20",
"html-escape",
"log",
"megalodon",
"oauth1-request",
"regex",
@@ -1125,7 +1027,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [
"proc-macro2",
"quote 1.0.33",
"quote",
"syn 2.0.38",
]
@@ -1176,44 +1078,6 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
[[package]]
name = "phf"
version = "0.7.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3da44b85f8e8dfaec21adae67f95d93244b2ecf6ad2a692320598dcc8e6dd18"
dependencies = [
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.7.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b03e85129e324ad4166b06b2c7491ae27fe3ec353af72e72cd1654c7225d517e"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.7.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09364cc93c159b8b06b1f4dd8a4398984503483891b0c26b867cf431fb132662"
dependencies = [
"phf_shared",
"rand 0.6.5",
]
[[package]]
name = "phf_shared"
version = "0.7.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234f71a15de2288bcb7e3b6515828d22af7ec8598ee6d24c3b526fa0a80b67a0"
dependencies = [
"siphasher",
]
[[package]]
name = "pin-project-lite"
version = "0.2.13"
@@ -1238,12 +1102,6 @@ version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "proc-macro-crate"
version = "1.3.1"
@@ -1262,7 +1120,7 @@ checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote 1.0.33",
"quote",
"syn 1.0.109",
"version_check",
]
@@ -1274,7 +1132,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [
"proc-macro2",
"quote 1.0.33",
"quote",
"version_check",
]
@@ -1287,12 +1145,6 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6e920b65c65f10b2ae65c831a81a073a89edd28c7cce89475bff467ab4167a"
[[package]]
name = "quote"
version = "1.0.33"
@@ -1302,25 +1154,6 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
dependencies = [
"autocfg 0.1.8",
"libc",
"rand_chacha 0.1.1",
"rand_core 0.4.2",
"rand_hc",
"rand_isaac",
"rand_jitter",
"rand_os",
"rand_pcg",
"rand_xorshift",
"winapi",
]
[[package]]
name = "rand"
version = "0.8.5"
@@ -1328,18 +1161,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha 0.3.1",
"rand_core 0.6.4",
]
[[package]]
name = "rand_chacha"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
dependencies = [
"autocfg 0.1.8",
"rand_core 0.3.1",
"rand_chacha",
"rand_core",
]
[[package]]
@@ -1349,24 +1172,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core 0.6.4",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
dependencies = [
"rand_core 0.4.2",
]
[[package]]
name = "rand_core"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc"
[[package]]
name = "rand_core"
version = "0.6.4"
@@ -1376,77 +1184,6 @@ dependencies = [
"getrandom",
]
[[package]]
name = "rand_hc"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "rand_isaac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "rand_jitter"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b"
dependencies = [
"libc",
"rand_core 0.4.2",
"winapi",
]
[[package]]
name = "rand_os"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071"
dependencies = [
"cloudabi",
"fuchsia-cprng",
"libc",
"rand_core 0.4.2",
"rdrand",
"winapi",
]
[[package]]
name = "rand_pcg"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44"
dependencies = [
"autocfg 0.1.8",
"rand_core 0.4.2",
]
[[package]]
name = "rand_xorshift"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "rdrand"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "redox_syscall"
version = "0.3.5"
@@ -1513,7 +1250,7 @@ dependencies = [
"hyper-tls",
"ipnet",
"js-sys",
"log 0.4.20",
"log",
"mime",
"mime_guess",
"native-tls",
@@ -1575,12 +1312,6 @@ version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustc-serialize"
version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda"
[[package]]
name = "rustix"
version = "0.38.20"
@@ -1600,7 +1331,7 @@ version = "0.21.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "446e14c5cda4f3f30fe71863c34ec70f5ac79d6087097ad0bb433e1be5edf04c"
dependencies = [
"log 0.4.20",
"log",
"ring",
"rustls-webpki",
"sct",
@@ -1707,7 +1438,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6c7207fbec9faa48073f3e3074cbe553af6ea512d7c21ba46e434e70ea9fbc1"
dependencies = [
"proc-macro2",
"quote 1.0.33",
"quote",
"syn 2.0.38",
]
@@ -1795,19 +1526,13 @@ dependencies = [
"libc",
]
[[package]]
name = "siphasher"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
[[package]]
name = "slab"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
dependencies = [
"autocfg 1.1.0",
"autocfg",
]
[[package]]
@@ -1842,40 +1567,6 @@ version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
[[package]]
name = "string_cache"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413fc7852aeeb5472f1986ef755f561ddf0c789d3d796e65f0b6fe293ecd4ef8"
dependencies = [
"debug_unreachable",
"lazy_static 0.2.11",
"phf_shared",
"precomputed-hash",
"serde",
"string_cache_codegen",
"string_cache_shared",
]
[[package]]
name = "string_cache_codegen"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0f45ed1b65bf9a4bf2f7b7dc59212d1926e9eaf00fa998988e420fd124467c6"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro2",
"quote 1.0.33",
"string_cache_shared",
]
[[package]]
name = "string_cache_shared"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc"
[[package]]
name = "strsim"
version = "0.10.0"
@@ -1888,17 +1579,6 @@ version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
[[package]]
name = "syn"
version = "0.11.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad"
dependencies = [
"quote 0.3.15",
"synom",
"unicode-xid",
]
[[package]]
name = "syn"
version = "1.0.109"
@@ -1906,7 +1586,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote 1.0.33",
"quote",
"unicode-ident",
]
@@ -1917,19 +1597,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b"
dependencies = [
"proc-macro2",
"quote 1.0.33",
"quote",
"unicode-ident",
]
[[package]]
name = "synom"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a393066ed9010ebaed60b9eafa373d4b1baac186dd7e008555b0f702b51945b6"
dependencies = [
"unicode-xid",
]
[[package]]
name = "system-configuration"
version = "0.5.1"
@@ -1964,17 +1635,6 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "tendril"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]]
name = "termcolor"
version = "1.3.0"
@@ -2000,7 +1660,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8"
dependencies = [
"proc-macro2",
"quote 1.0.33",
"quote",
"syn 2.0.38",
]
@@ -2045,7 +1705,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
dependencies = [
"proc-macro2",
"quote 1.0.33",
"quote",
"syn 2.0.38",
]
@@ -2076,7 +1736,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "212d5dcb2a1ce06d81107c3d0ffa3121fe974b73f068c8282cb1c32328113b6c"
dependencies = [
"futures-util",
"log 0.4.20",
"log",
"rustls",
"rustls-native-certs",
"tokio",
@@ -2185,8 +1845,8 @@ dependencies = [
"data-encoding",
"http",
"httparse",
"log 0.4.20",
"rand 0.8.5",
"log",
"rand",
"rustls",
"sha1",
"thiserror",
@@ -2230,21 +1890,6 @@ dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-xid"
version = "0.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c1f860d7d29cf02cb2f3f359fd35991af3d30bac52c57d265a3c461074cb4dc"
[[package]]
name = "unreachable"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f2ae5ddb18e1c92664717616dd9549dde73f539f01bd7b77c2edb2446bdff91"
dependencies = [
"void",
]
[[package]]
name = "untrusted"
version = "0.9.0"
@@ -2275,6 +1920,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf8-width"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1"
[[package]]
name = "utf8parse"
version = "0.2.1"
@@ -2302,12 +1953,6 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "void"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
[[package]]
name = "want"
version = "0.3.1"
@@ -2340,10 +1985,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
dependencies = [
"bumpalo",
"log 0.4.20",
"log",
"once_cell",
"proc-macro2",
"quote 1.0.33",
"quote",
"syn 2.0.38",
"wasm-bindgen-shared",
]
@@ -2366,7 +2011,7 @@ version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
dependencies = [
"quote 1.0.33",
"quote",
"wasm-bindgen-macro-support",
]
@@ -2377,7 +2022,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
dependencies = [
"proc-macro2",
"quote 1.0.33",
"quote",
"syn 2.0.38",
"wasm-bindgen-backend",
"wasm-bindgen-shared",

View File

@@ -1,20 +1,20 @@
[package]
name = "oolatoocs"
version = "1.1.0"
version = "1.2.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap = "^4"
dissolve = "0.2.2"
env_logger = "^0.10"
futures = "0.3.29"
futures = "^0.3"
html-escape = "^0.2"
log = "^0.4"
megalodon = "^0.11"
oauth1-request = "^0.6"
regex = "1.10.2"
reqwest = { version = "0.11.22", features = ["json", "stream", "multipart"] }
regex = "^1.10"
reqwest = { version = "^0.11", features = ["json", "stream", "multipart"] }
rusqlite = "^0.27"
serde = { version = "^1.0", features = ["derive"] }
tokio = { version = "^1.33", features = ["rt-multi-thread", "macros", "time"] }

View File

@@ -34,7 +34,6 @@ pub async fn get_mastodon_timeline_since(
.await?
.json()
.iter()
.cloned()
.filter(|t| {
// this excludes the reply to other users
t.in_reply_to_account_id.is_none()
@@ -45,6 +44,7 @@ pub async fn get_mastodon_timeline_since(
.filter(|t| t.visibility == StatusVisibility::Public) // excludes everything that isnt
// public
.filter(|t| t.reblog.is_none()) // excludes reblogs
.cloned()
.collect();
timeline.reverse();

View File

@@ -1,15 +1,15 @@
use dissolve::strip_html_tags;
use html_escape::decode_html_entities;
use megalodon::entities::status::Tag;
use regex::Regex;
use std::error::Error;
pub fn strip_everything(content: &str, tags: &Vec<Tag>) -> Result<String, Box<dyn Error>> {
let mut res =
strip_html_tags(&content.replace("</p><p>", "\n\n").replace("<br />", "\n")).join("");
let mut res = strip_html_tags(&content.replace("</p><p>", "\n\n").replace("<br />", "\n"));
strip_mastodon_tags(&mut res, tags).unwrap();
res = res.trim_end_matches('\n').trim_end_matches(' ').to_string();
res = decode_html_entities(&res).to_string();
Ok(res)
}
@@ -22,3 +22,64 @@ fn strip_mastodon_tags(content: &mut String, tags: &Vec<Tag>) -> Result<(), Box<
Ok(())
}
fn strip_html_tags(input: &str) -> String {
let mut data = String::new();
let mut inside = false;
for c in input.chars() {
if c == '<' {
inside = true;
continue;
}
if c == '>' {
inside = false;
continue;
}
if !inside {
data.push(c);
}
}
data
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_strip_mastodon_tags() {
let tags = vec![
Tag {
name: "putaclic".to_string(),
url: "https://m.nintendojo.fr/tags/putaclic".to_string(),
},
Tag {
name: "tamerelol".to_string(),
url: "https://m.nintendojo.fr/tags/tamerelol".to_string(),
},
Tag {
name: "JeFaisNawakEnCamelCase".to_string(),
url: "https://m.nintendojo.fr/tags/jefaisnawakencamelcase".to_string(),
},
];
let mut content =
"Cest super ça! #putaclic #TAMERELOL #JeFaisNawakEnCamelCase".to_string();
let sample = "Cest super ça! ".to_string();
strip_mastodon_tags(&mut content, &tags).unwrap();
assert_eq!(content, sample);
}
#[test]
fn test_strip_everything() {
let content = "<p>Ce soir à 21h, c&#39;est le Dojobar ! Au programme ce soir, une rétrospective sur la série Mario &amp; Luigi.<br />Comme d&#39;hab, le Twitch sera ici : <a href=\"https://twitch.tv/nintendojofr\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"><span class=\"invisible\">https://</span><span class=\"\">twitch.tv/nintendojofr</span><span class=\"invisible\"></span></a><br />Ou juste l&#39;audio là : <a href=\"https://nintendojo.fr/dojobar\" target=\"_blank\" rel=\"nofollow noopener noreferrer\" translate=\"no\"><span class=\"invisible\">https://</span><span class=\"\">nintendojo.fr/dojobar</span><span class=\"invisible\"></span></a><br />A toute !</p>";
let expected_result = "Ce soir à 21h, c'est le Dojobar ! Au programme ce soir, une rétrospective sur la série Mario & Luigi.\nComme d'hab, le Twitch sera ici : https://twitch.tv/nintendojofr\nOu juste l'audio là : https://nintendojo.fr/dojobar\nA toute !".to_string();
let result = strip_everything(content, &vec![]).unwrap();
assert_eq!(result, expected_result);
}
}