From f270f2ed652459cec2d5251b998eef17a88ea49e Mon Sep 17 00:00:00 2001 From: BlackDex Date: Sun, 16 May 2021 15:29:13 +0200 Subject: [PATCH] Updated icon fetching and crates. - Updated some crates - Updated icon fetching code: + Use a cookie jar and set Max-Age to 2 minutes for all cookies + Locate the base href tag to fix some locations + Changed User-Agent (Helps on some sites to get HTML instead of JS) + Reduced HTML code limit from 512KB to 384KB + Allow some large icons higer-up in the sort + Allow GIF images + Ignore cookie_store and hyper::client debug messages --- Cargo.lock | 198 +++++++++++++++++++++++++++++++++-------------- Cargo.toml | 14 +++- src/api/icons.rs | 169 +++++++++++++++++++++++++++------------- src/main.rs | 3 + 4 files changed, 272 insertions(+), 112 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d5556192..afdc07a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -321,6 +321,49 @@ dependencies = [ "version_check 0.9.3", ] +[[package]] +name = "cookie" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffdf8865bac3d9a3bde5bde9088ca431b11f5d37c7a578b8086af77248b76627" +dependencies = [ + "percent-encoding 2.1.0", + "time 0.2.26", + "version_check 0.9.3", +] + +[[package]] +name = "cookie_store" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3818dfca4b0cb5211a659bbcbb94225b7127407b2b135e650d717bfb78ab10d3" +dependencies = [ + "cookie 0.14.4", + "idna 0.2.3", + "log 0.4.14", + "publicsuffix 1.5.6", + "serde", + "serde_json", + "time 0.2.26", + "url 2.2.2", +] + +[[package]] +name = "cookie_store" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55b4ac5559dd39f7bdc516f769cb412b151585d8886d216871a8435ed7f862cd" +dependencies = [ + "cookie 0.15.0", + "idna 0.2.3", + "log 0.4.14", + "publicsuffix 2.1.0", + "serde", + "serde_json", + "time 0.2.26", + "url 2.2.2", +] + [[package]] name = "core-foundation" version = "0.9.1" @@ -339,9 +382,9 @@ checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b" [[package]] name = "cpufeatures" -version = "0.1.1" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dec1028182c380cc45a2e2c5ec841134f2dfd0f8f5f0a5bcd68004f81b5efdf4" +checksum = "ed00c67cb5d0a7d64a44f6ad2668db7e7530311dd53ea79bcd4fb022c64911c8" dependencies = [ "libc", ] @@ -395,7 +438,7 @@ checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57" [[package]] name = "data-url" version = "0.1.0" -source = "git+https://github.com/servo/rust-url?rev=540ede02d0771824c0c80ff9f57fe8eff38b1291#540ede02d0771824c0c80ff9f57fe8eff38b1291" +source = "git+https://github.com/servo/rust-url?rev=eb7330b5296c0d43816d1346211b74182bb4ae37#eb7330b5296c0d43816d1346211b74182bb4ae37" dependencies = [ "matches", ] @@ -648,9 +691,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d5813545e459ad3ca1bff9915e9ad7f1a47dc6a91b627ce321d5863b7dd253" +checksum = "0e7e43a803dae2fa37c1f6a8fe121e1f7bf9548b4dfc0522a42f34145dadfc27" dependencies = [ "futures-channel", "futures-core", @@ -663,9 +706,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce79c6a52a299137a6013061e0cf0e688fce5d7f1bc60125f520912fdb29ec25" +checksum = "e682a68b29a882df0545c143dc3646daefe80ba479bcdede94d5a703de2871e2" dependencies = [ "futures-core", "futures-sink", @@ -673,15 +716,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "098cd1c6dda6ca01650f1a37a794245eb73181d0d4d4e955e2f3c37db7af1815" +checksum = "0402f765d8a89a26043b889b26ce3c4679d268fa6bb22cd7c6aad98340e179d1" [[package]] name = "futures-executor" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10f6cb7042eda00f0049b1d2080aa4b93442997ee507eb3828e8bd7577f94c9d" +checksum = "badaa6a909fac9e7236d0620a2f57f7664640c56575b71a7552fbd68deafab79" dependencies = [ "futures-core", "futures-task", @@ -690,16 +733,17 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "365a1a1fb30ea1c03a830fdb2158f5236833ac81fa0ad12fe35b29cddc35cb04" +checksum = "acc499defb3b348f8d8f3f66415835a9131856ff7714bf10dadfc4ec4bdb29a1" [[package]] name = "futures-macro" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "668c6733a182cd7deb4f1de7ba3bf2120823835b3bcfbeacf7d2c4a773c1bb8b" +checksum = "a4c40298486cdf52cc00cd6d6987892ba502c7656a16a4192a9992b1ccedd121" dependencies = [ + "autocfg", "proc-macro-hack", "proc-macro2 1.0.26", "quote 1.0.9", @@ -708,22 +752,23 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5629433c555de3d82861a7a4e3794a4c40040390907cfbfd7143a92a426c23" +checksum = "a57bead0ceff0d6dde8f465ecd96c9338121bb7717d3e7b108059531870c4282" [[package]] name = "futures-task" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba7aa51095076f3ba6d9a1f702f74bd05ec65f555d70d2033d55ba8d69f581bc" +checksum = "8a16bef9fc1a4dddb5bee51c989e3fbba26569cbb0e31f5b303c184e3dd33dae" [[package]] name = "futures-util" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c144ad54d60f23927f0a6b6d816e4271278b64f005ad65e4e35291d2de9c025" +checksum = "feb5c238d27e2bf94ffdfd27b2c29e3df4a68c4193bb6427384259e2bf191967" dependencies = [ + "autocfg", "futures-channel", "futures-core", "futures-io", @@ -841,6 +886,12 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" + [[package]] name = "hermit-abi" version = "0.1.18" @@ -920,9 +971,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1ce40d6fc9764887c2fdc7305c3dcc429ba11ff981c1509416afd5697e4437" +checksum = "f3a87b616e37e93c22fb19bcd386f02f3af5ea98a25670ad0fce773de23c5e68" [[package]] name = "httpdate" @@ -1027,7 +1078,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "824845a0bf897a9042383849b02c1bc219c2383772efcd5c6f9766fa4b81aef3" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.9.1", ] [[package]] @@ -1072,9 +1123,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.50" +version = "0.3.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d99f9e3e84b8f67f846ef5b4cbbc3b1c29f6c759fcbce6f01aa0e73d932a24c" +checksum = "83bdfbace3a0e81a4253f73b49e960b053e396a11012cbd49b9b74d6a2b67062" dependencies = [ "wasm-bindgen", ] @@ -1123,9 +1174,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "lettre" -version = "0.10.0-beta.4" +version = "0.10.0-rc.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b7fd0c394e97e38d87bd2dfdf91983ab406b044a0bfd4e5b5c82bdfa0324526" +checksum = "4be4ff7e8bcb0e0c6902815554a286889b0e99b4ea6e898afb7b9f53174b1929" dependencies = [ "base64 0.13.0", "fastrand", @@ -1572,9 +1623,9 @@ dependencies = [ [[package]] name = "openssl-probe" -version = "0.1.2" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de" +checksum = "28988d872ab76095a6e6ac88d99b54fd267702734fd7ffe610ca27f533ddb95a" [[package]] name = "openssl-src" @@ -1909,6 +1960,34 @@ dependencies = [ "unicode-xid 0.2.2", ] +[[package]] +name = "psl-types" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66b398073e7cdd6f05934389a8f5961e3aabfa66675b6f440df4e2c793d51a4f" + +[[package]] +name = "publicsuffix" +version = "1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95b4ce31ff0a27d93c8de1849cf58162283752f065a90d508f1105fa6c9a213f" +dependencies = [ + "idna 0.2.3", + "url 2.2.2", +] + +[[package]] +name = "publicsuffix" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3ac055aef7cc7a1caefbc65144be879e862467dcd9b8a8d57b64a13e7dce15d" +dependencies = [ + "byteorder", + "hashbrown 0.11.2", + "idna 0.2.3", + "psl-types", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -2134,6 +2213,8 @@ dependencies = [ "async-compression", "base64 0.13.0", "bytes 1.0.1", + "cookie 0.14.4", + "cookie_store 0.12.0", "encoding_rs", "futures-core", "futures-util", @@ -2152,6 +2233,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", + "time 0.2.26", "tokio", "tokio-native-tls", "tokio-socks", @@ -2248,7 +2330,7 @@ name = "rocket_http" version = "0.5.0-dev" source = "git+https://github.com/SergioBenitez/Rocket?rev=263e39b5b429de1913ce7e3036575a7b4d88b6d7#263e39b5b429de1913ce7e3036575a7b4d88b6d7" dependencies = [ - "cookie", + "cookie 0.14.4", "hyper 0.10.16", "hyper-sync-rustls", "indexmap", @@ -2391,18 +2473,18 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" -version = "1.0.125" +version = "1.0.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "558dc50e1a5a5fa7112ca2ce4effcb321b0300c0d4ccf0776a9f60cd89031171" +checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.125" +version = "1.0.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b093b7a2bb58203b5da3056c05b4ec1fed827dcfdb37347a8841695263b3d06d" +checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43" dependencies = [ "proc-macro2 1.0.26", "quote 1.0.9", @@ -2459,9 +2541,9 @@ dependencies = [ [[package]] name = "sha-1" -version = "0.9.5" +version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b659df5fc3ce22274daac600ffb845300bd2125bcfaec047823075afdab81c00" +checksum = "8c4cfa741c5832d0ef7fab46cabed29c2aae926db0b11bb2069edd8db5e64e16" dependencies = [ "block-buffer 0.9.0", "cfg-if 1.0.0", @@ -2804,9 +2886,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83f0c8e7c0addab50b663055baf787d0af7f413a46e6e7fb9559a4e4db7137a5" +checksum = "bd3076b5c8cc18138b8f8814895c11eb4de37114a5d127bafdc5e55798ceef37" dependencies = [ "autocfg", "bytes 1.0.1", @@ -2841,9 +2923,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.6.6" +version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "940a12c99365c31ea8dd9ba04ec1be183ffe4920102bb7122c2f515437601e8e" +checksum = "1caa0b0c8d94a049db56b5acf8cba99dc0623aab1b26d5b5f5e2d945846b3592" dependencies = [ "bytes 1.0.1", "futures-core", @@ -3054,9 +3136,12 @@ name = "vaultwarden" version = "1.0.0" dependencies = [ "backtrace", + "bytes 1.0.1", "chashmap", "chrono", "chrono-tz", + "cookie 0.15.0", + "cookie_store 0.15.0", "data-encoding", "data-url", "diesel", @@ -3095,6 +3180,7 @@ dependencies = [ "time 0.2.26", "tracing", "u2f", + "url 2.2.2", "uuid", "yubico", ] @@ -3152,9 +3238,9 @@ checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" [[package]] name = "wasm-bindgen" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83240549659d187488f91f33c0f8547cbfef0b2088bc470c116d1d260ef623d9" +checksum = "d54ee1d4ed486f78874278e63e4069fc1ab9f6a18ca492076ffb90c5eb2997fd" dependencies = [ "cfg-if 1.0.0", "serde", @@ -3164,9 +3250,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae70622411ca953215ca6d06d3ebeb1e915f0f6613e3b495122878d7ebec7dae" +checksum = "3b33f6a0694ccfea53d94db8b2ed1c3a8a4c86dd936b13b9f0a15ec4a451b900" dependencies = [ "bumpalo", "lazy_static", @@ -3179,9 +3265,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.23" +version = "0.4.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81b8b767af23de6ac18bf2168b690bed2902743ddf0fb39252e36f9e2bfc63ea" +checksum = "5fba7978c679d53ce2d0ac80c8c175840feb849a161664365d1287b41f2e67f1" dependencies = [ "cfg-if 1.0.0", "js-sys", @@ -3191,9 +3277,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e734d91443f177bfdb41969de821e15c516931c3c3db3d318fa1b68975d0f6f" +checksum = "088169ca61430fe1e58b8096c24975251700e7b1f6fd91cc9d59b04fb9b18bd4" dependencies = [ "quote 1.0.9", "wasm-bindgen-macro-support", @@ -3201,9 +3287,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d53739ff08c8a68b0fdbcd54c372b8ab800b1449ab3c9d706503bc7dd1621b2c" +checksum = "be2241542ff3d9f241f5e2cb6dd09b37efe786df8851c54957683a49f0987a97" dependencies = [ "proc-macro2 1.0.26", "quote 1.0.9", @@ -3214,15 +3300,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.73" +version = "0.2.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9a543ae66aa233d14bb765ed9af4a33e81b8b58d1584cf1b47ff8cd0b9e4489" +checksum = "d7cff876b8f18eed75a66cf49b65e7f967cb354a7aa16003fb55dbfd25b44b4f" [[package]] name = "web-sys" -version = "0.3.50" +version = "0.3.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a905d57e488fec8861446d3393670fb50d27a262344013181c2cdf9fff5481be" +checksum = "e828417b379f3df7111d3a2a9e5753706cae29c41f7c4029ee9fd77f3e09e582" dependencies = [ "js-sys", "wasm-bindgen", @@ -3345,7 +3431,7 @@ dependencies = [ "hmac 0.10.1", "rand 0.8.3", "reqwest", - "sha-1 0.9.5", + "sha-1 0.9.6", "threadpool", "url 1.7.2", ] diff --git a/Cargo.toml b/Cargo.toml index 7282f066..36df2db4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,13 @@ rocket = { version = "0.5.0-dev", features = ["tls"], default-features = false } rocket_contrib = "0.5.0-dev" # HTTP client -reqwest = { version = "0.11.3", features = ["blocking", "json", "gzip", "brotli", "socks"] } +reqwest = { version = "0.11.3", features = ["blocking", "json", "gzip", "brotli", "socks", "cookies"] } + +# Used for custom short lived cookie jar +cookie = "0.15.0" +cookie_store = "0.15.0" +bytes = "1.0.1" +url = "2.2.2" # multipart/form-data support multipart = { version = "0.17.1", features = ["server"], default-features = false } @@ -47,7 +53,7 @@ rmpv = "0.4.7" chashmap = "2.2.2" # A generic serialization/deserialization framework -serde = { version = "1.0.125", features = ["derive"] } +serde = { version = "1.0.126", features = ["derive"] } serde_json = "1.0.64" # Logging @@ -103,7 +109,7 @@ num-derive = "0.3.3" # Email libraries tracing = { version = "0.1.26", features = ["log"] } # Needed to have lettre trace logging used when SMTP_DEBUG is enabled. -lettre = { version = "0.10.0-beta.4", features = ["smtp-transport", "builder", "serde", "native-tls", "hostname", "tracing"], default-features = false } +lettre = { version = "0.10.0-rc.1", features = ["smtp-transport", "builder", "serde", "native-tls", "hostname", "tracing"], default-features = false } # Template library handlebars = { version = "3.5.5", features = ["dir_source"] } @@ -137,7 +143,7 @@ rocket = { git = 'https://github.com/SergioBenitez/Rocket', rev = '263e39b5b429d rocket_contrib = { git = 'https://github.com/SergioBenitez/Rocket', rev = '263e39b5b429de1913ce7e3036575a7b4d88b6d7' } # For favicon extraction from main website -data-url = { git = 'https://github.com/servo/rust-url', package="data-url", rev = '540ede02d0771824c0c80ff9f57fe8eff38b1291' } +data-url = { git = 'https://github.com/servo/rust-url', package="data-url", rev = 'eb7330b5296c0d43816d1346211b74182bb4ae37' } # The maintainer of the `job_scheduler` crate doesn't seem to have responded # to any issues or PRs for almost a year (as of April 2021). This hopefully diff --git a/src/api/icons.rs b/src/api/icons.rs index 2b527285..f085678a 100644 --- a/src/api/icons.rs +++ b/src/api/icons.rs @@ -3,14 +3,14 @@ use std::{ fs::{create_dir_all, remove_file, symlink_metadata, File}, io::prelude::*, net::{IpAddr, ToSocketAddrs}, - sync::RwLock, + sync::{Arc, RwLock}, time::{Duration, SystemTime}, }; use once_cell::sync::Lazy; use regex::Regex; -use reqwest::{blocking::Client, blocking::Response, header, Url}; -use rocket::{http::ContentType, http::Cookie, response::Content, Route}; +use reqwest::{blocking::Client, blocking::Response, header}; +use rocket::{http::ContentType, response::Content, Route}; use crate::{ error::Error, @@ -25,19 +25,17 @@ pub fn routes() -> Vec { static CLIENT: Lazy = Lazy::new(|| { // Generate the default headers let mut default_headers = header::HeaderMap::new(); - default_headers.insert(header::USER_AGENT, header::HeaderValue::from_static("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15")); - default_headers.insert(header::ACCEPT_LANGUAGE, header::HeaderValue::from_static("en-US,en;q=0.8")); + default_headers + .insert(header::USER_AGENT, header::HeaderValue::from_static("Links (2.22; Linux X86_64; GNU C; text)")); + default_headers + .insert(header::ACCEPT, header::HeaderValue::from_static("text/html, text/*;q=0.5, image/*, */*;q=0.1")); + default_headers.insert(header::ACCEPT_LANGUAGE, header::HeaderValue::from_static("en,*;q=0.1")); default_headers.insert(header::CACHE_CONTROL, header::HeaderValue::from_static("no-cache")); default_headers.insert(header::PRAGMA, header::HeaderValue::from_static("no-cache")); - default_headers.insert( - header::ACCEPT, - header::HeaderValue::from_static( - "text/html,application/xhtml+xml,application/xml; q=0.9,image/webp,image/apng,*/*;q=0.8", - ), - ); // Reuse the client between requests get_reqwest_client_builder() + .cookie_provider(Arc::new(Jar::default())) .timeout(Duration::from_secs(CONFIG.icon_download_timeout())) .default_headers(default_headers) .build() @@ -80,7 +78,7 @@ fn is_valid_domain(domain: &str) -> bool { const ALLOWED_CHARS: &str = "_-."; // If parsing the domain fails using Url, it will not work with reqwest. - if let Err(parse_error) = Url::parse(format!("https://{}", domain).as_str()) { + if let Err(parse_error) = url::Url::parse(format!("https://{}", domain).as_str()) { debug!("Domain parse error: '{}' - {:?}", domain, parse_error); return false; } else if domain.is_empty() @@ -360,7 +358,51 @@ impl Icon { } } -fn get_favicons_node(node: &std::rc::Rc, icons: &mut Vec, url: &Url) { +/// Iterates over the HTML document to find +/// When found it will stop the iteration and the found base href will be shared deref via `base_href`. +/// +/// # Arguments +/// * `node` - A Parsed HTML document via html5ever::parse_document() +/// * `base_href` - a mutable url::Url which will be overwritten when a base href tag has been found. +/// +fn get_base_href(node: &std::rc::Rc, base_href: &mut url::Url) -> bool { + if let markup5ever_rcdom::NodeData::Element { + name, + attrs, + .. + } = &node.data + { + if name.local.as_ref() == "base" { + let attrs = attrs.borrow(); + for attr in attrs.iter() { + let attr_name = attr.name.local.as_ref(); + let attr_value = attr.value.as_ref(); + + if attr_name == "href" { + debug!("Found base href: {}", attr_value); + *base_href = match base_href.join(attr_value) { + Ok(href) => href, + _ => base_href.clone(), + }; + return true; + } + } + return true; + } + } + + // TODO: Might want to limit the recursion depth? + for child in node.children.borrow().iter() { + // Check if we got a true back and stop the iter. + // This means we found a tag and can stop processing the html. + if get_base_href(child, base_href) { + return true; + } + } + false +} + +fn get_favicons_node(node: &std::rc::Rc, icons: &mut Vec, url: &url::Url) { if let markup5ever_rcdom::NodeData::Element { name, attrs, @@ -406,12 +448,11 @@ fn get_favicons_node(node: &std::rc::Rc, icons: &mut Ve struct IconUrlResult { iconlist: Vec, - cookies: String, referer: String, } -/// Returns a Result/Tuple which holds a Vector IconList and a string which holds the cookies from the last response. -/// There will always be a result with a string which will contain https://example.com/favicon.ico and an empty string for the cookies. +/// Returns a IconUrlResult which holds a Vector IconList and a string which holds the referer. +/// There will always two items within the iconlist which holds http(s)://domain.tld/favicon.ico. /// This does not mean that that location does exists, but it is the default location browser use. /// /// # Argument @@ -419,8 +460,8 @@ struct IconUrlResult { /// /// # Example /// ``` -/// let (mut iconlist, cookie_str) = get_icon_url("github.com")?; -/// let (mut iconlist, cookie_str) = get_icon_url("gitlab.com")?; +/// let icon_result = get_icon_url("github.com")?; +/// let icon_result = get_icon_url("vaultwarden.discourse.group")?; /// ``` fn get_icon_url(domain: &str) -> Result { // Default URL with secure and insecure schemes @@ -468,32 +509,12 @@ fn get_icon_url(domain: &str) -> Result { // Create the iconlist let mut iconlist: Vec = Vec::new(); - - // Create the cookie_str to fill it all the cookies from the response - // These cookies can be used to request/download the favicon image. - // Some sites have extra security in place with for example XSRF Tokens. - let mut cookie_str = "".to_string(); - let mut referer = "".to_string(); + let mut referer = String::from(""); if let Ok(content) = resp { // Extract the URL from the respose in case redirects occured (like @ gitlab.com) let url = content.url().clone(); - // Get all the cookies and pass it on to the next function. - // Needed for XSRF Cookies for example (like @ mijn.ing.nl) - let raw_cookies = content.headers().get_all("set-cookie"); - cookie_str = raw_cookies - .iter() - .filter_map(|raw_cookie| raw_cookie.to_str().ok()) - .map(|cookie_str| { - if let Ok(cookie) = Cookie::parse(cookie_str) { - format!("{}={}; ", cookie.name(), cookie.value()) - } else { - String::new() - } - }) - .collect::(); - // Set the referer to be used on the final request, some sites check this. // Mostly used to prevent direct linking and other security resons. referer = url.as_str().to_string(); @@ -501,16 +522,17 @@ fn get_icon_url(domain: &str) -> Result { // Add the default favicon.ico to the list with the domain the content responded from. iconlist.push(Icon::new(35, String::from(url.join("/favicon.ico").unwrap()))); - // 512KB should be more than enough for the HTML, though as we only really need - // the HTML header, it could potentially be reduced even further - let mut limited_reader = content.take(512 * 1024); + // 384KB should be more than enough for the HTML, though as we only really need the HTML header. + let mut limited_reader = content.take(384 * 1024); use html5ever::tendril::TendrilSink; let dom = html5ever::parse_document(markup5ever_rcdom::RcDom::default(), Default::default()) .from_utf8() .read_from(&mut limited_reader)?; - get_favicons_node(&dom.document, &mut iconlist, &url); + let mut base_url: url::Url = url; + get_base_href(&dom.document, &mut base_url); + get_favicons_node(&dom.document, &mut iconlist, &base_url); } else { // Add the default favicon.ico to the list with just the given domain iconlist.push(Icon::new(35, format!("{}/favicon.ico", ssldomain))); @@ -523,24 +545,20 @@ fn get_icon_url(domain: &str) -> Result { // There always is an icon in the list, so no need to check if it exists, and just return the first one Ok(IconUrlResult { iconlist, - cookies: cookie_str, referer, }) } fn get_page(url: &str) -> Result { - get_page_with_cookies(url, "", "") + get_page_with_referer(url, "") } -fn get_page_with_cookies(url: &str, cookie_str: &str, referer: &str) -> Result { - if is_domain_blacklisted(Url::parse(url).unwrap().host_str().unwrap_or_default()) { +fn get_page_with_referer(url: &str, referer: &str) -> Result { + if is_domain_blacklisted(url::Url::parse(url).unwrap().host_str().unwrap_or_default()) { err!("Favicon rel linked to a blacklisted domain!"); } let mut client = CLIENT.get(url); - if !cookie_str.is_empty() { - client = client.header("Cookie", cookie_str) - } if !referer.is_empty() { client = client.header("Referer", referer) } @@ -573,7 +591,7 @@ fn get_icon_priority(href: &str, sizes: Option<&str>) -> u8 { 1 } else if width == 64 { 2 - } else if (24..=128).contains(&width) { + } else if (24..=192).contains(&width) { 3 } else if width == 16 { 4 @@ -661,7 +679,7 @@ fn download_icon(domain: &str) -> Result<(Vec, Option<&str>), Error> { _ => warn!("Extracted icon from data:image uri is invalid"), }; } else { - match get_page_with_cookies(&icon.href, &icon_result.cookies, &icon_result.referer) { + match get_page_with_referer(&icon.href, &icon_result.referer) { Ok(mut res) => { res.copy_to(&mut buffer)?; // Check if the icon type is allowed, else try an icon from the list. @@ -706,7 +724,54 @@ fn get_icon_type(bytes: &[u8]) -> Option<&'static str> { [0, 0, 1, 0, ..] => Some("x-icon"), [82, 73, 70, 70, ..] => Some("webp"), [255, 216, 255, ..] => Some("jpeg"), + [71, 73, 70, 56, ..] => Some("gif"), [66, 77, ..] => Some("bmp"), _ => None, } } + +/// This is an implementation of the default Cookie Jar from Reqwest and reqwest_cookie_store build by pfernie. +/// The default cookie jar used by Reqwest keeps all the cookies based upon the Max-Age or Expires which could be a long time. +/// That could be used for tracking, to prevent this we force the lifespan of the cookies to always be max two minutes. +/// A Cookie Jar is needed because some sites force a redirect with cookies to verify if a request uses cookies or not. +use cookie_store::CookieStore; +#[derive(Default)] +pub struct Jar(RwLock); + +impl reqwest::cookie::CookieStore for Jar { + fn set_cookies(&self, cookie_headers: &mut dyn Iterator, url: &url::Url) { + use cookie::{Cookie as RawCookie, ParseError as RawCookieParseError}; + use time::Duration; + + let mut cookie_store = self.0.write().unwrap(); + let cookies = cookie_headers.filter_map(|val| { + std::str::from_utf8(val.as_bytes()) + .map_err(RawCookieParseError::from) + .and_then(RawCookie::parse) + .map(|mut c| { + c.set_expires(None); + c.set_max_age(Some(Duration::minutes(2))); + c.into_owned() + }) + .ok() + }); + cookie_store.store_response_cookies(cookies, url); + } + + fn cookies(&self, url: &url::Url) -> Option { + use bytes::Bytes; + + let cookie_store = self.0.read().unwrap(); + let s = cookie_store + .get_request_values(url) + .map(|(name, value)| format!("{}={}", name, value)) + .collect::>() + .join("; "); + + if s.is_empty() { + return None; + } + + header::HeaderValue::from_maybe_shared(Bytes::from(s)).ok() + } +} diff --git a/src/main.rs b/src/main.rs index 13f60481..80d2c242 100644 --- a/src/main.rs +++ b/src/main.rs @@ -122,6 +122,9 @@ fn init_logging(level: log::LevelFilter) -> Result<(), fern::InitError> { // Never show html5ever and hyper::proto logs, too noisy .level_for("html5ever", log::LevelFilter::Off) .level_for("hyper::proto", log::LevelFilter::Off) + .level_for("hyper::client", log::LevelFilter::Off) + // Prevent cookie_store logs + .level_for("cookie_store", log::LevelFilter::Off) .chain(std::io::stdout()); // Enable smtp debug logging only specifically for smtp when need.