From bbb945e3ce4ca791b6538dad5978515bca2d13ed Mon Sep 17 00:00:00 2001 From: R0flcopt3r Date: Tue, 5 Oct 2021 21:44:33 +0200 Subject: Adds url preview hook --- src/hooks/mod.rs | 1 + src/hooks/url.rs | 190 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 src/hooks/url.rs (limited to 'src/hooks') diff --git a/src/hooks/mod.rs b/src/hooks/mod.rs index c5d6e1d..924fe2e 100644 --- a/src/hooks/mod.rs +++ b/src/hooks/mod.rs @@ -7,6 +7,7 @@ pub mod intensify; pub mod pet; pub mod sed; pub mod shifty_eyes; +pub mod url; pub use intensify::intensify; pub use shifty_eyes::shifty_eyes; diff --git a/src/hooks/url.rs b/src/hooks/url.rs new file mode 100644 index 0000000..21f4b06 --- /dev/null +++ b/src/hooks/url.rs @@ -0,0 +1,190 @@ +use anyhow::{bail, Context, Error, Result}; +use irc::client::prelude::*; + +use regex::Regex; + +extern crate kuchiki; +use kuchiki::{parse_html, traits::*}; +use reqwest::{get, Url}; +use tracing::trace; + +pub const URL_REGEX: &str = r#"(https?://|www.)\S+"#; + +#[tracing::instrument] +pub fn url_parser(msg: &str) -> Vec { + let url_regex = Regex::new(URL_REGEX).unwrap(); + + url_regex + .find_iter(msg) + .map(|u| u.as_str().to_string().replace("www.", "https://")) + .collect::>() +} + +#[tracing::instrument] +pub async fn url_title(url: &str) -> Result { + let body = get(Url::parse(url).context("Failed to parse url")?) + .await + .context("Failed to make request")? + .text() + .await + .context("failed to get request response text")?; + + let document = parse_html().one(body); + match document.select("title") { + Ok(title) => Ok(title + .into_iter() + .nth(0) + .context("title did not have text")? + .text_contents()), + Err(_) => bail!("could not find title"), + } +} + +#[tracing::instrument(skip(bot))] +pub async fn url_preview(bot: &crate::Bot, msg: Message) -> Result<()> { + if let Command::PRIVMSG(target, text) = msg.command.clone() { + let mut futures: Vec> = Vec::new(); + + for url in url_parser(&text) { + futures.push(tokio::spawn(async move { + trace!("got url: {:?}", url); + match url_title(&url.as_str()).await { + Ok(title) => { + trace!("extracted title from url: {:?}, {:?}", title, url); + Ok(title) + } + Err(err) => bail!("Failed to get urls title: {:?}", err), + } + })) + } + + let titles = futures::future::join_all(futures).await; + + let titles: Vec = titles + .into_iter() + .filter_map(|x| x.ok()) + .filter_map(|x| x.ok()) + .collect(); + + if !titles.is_empty() { + bot.send_privmsg(&target, &msg_builder(&titles))?; + } + } + Ok(()) +} + +#[tracing::instrument] +pub fn msg_builder(titles: &Vec) -> String { + format!( + "Title{}: {}", + if titles.len() > 1 { "s" } else { "" }, + titles.join(" --- ") + ) +} + +#[cfg(test)] +mod tests { + + use super::msg_builder; + use super::url_parser; + use super::url_title; + use mockito; + + #[test] + fn test_url_titel() { + assert!(tokio_test::block_on(url_title(&mockito::server_url())).is_err()); + + let _m = mockito::mock("GET", "/") + .with_body( + r#" + + + This is test site + + +

some heading

+ + +"#, + ) + .create(); + mockito::start(); + + let title: String = tokio_test::block_on(url_title(&mockito::server_url())).unwrap(); + assert_eq!(title.as_str(), "This is test site"); + } + #[test] + fn test_url_parser() { + let url = url_parser("some message https://news.ycombinator.com/ here"); + assert_eq!(url[0], "https://news.ycombinator.com/"); + + let url = url_parser("no url here!"); + assert!(url.is_empty()); + + let url = url_parser( + &[ + "https://new.ycombinator.com/ ", + "http://news.ycombinator.com/ ", + "www.google.com", + ] + .concat(), + ); + assert_eq!(url.len(), 3); + } + + #[test] + fn test_msg_builder() { + let msg = msg_builder(&Vec::from(["hello".to_string(), "world".to_string()])); + assert_eq!("Titles: hello --- world", msg); + + let msg = msg_builder(&Vec::from(["hello".to_string()])); + assert_eq!("Title: hello", msg); + } + + #[test] + /** + Integration test ish. this tries to replicate url_preview, to make sure + everything works together. + */ + fn test_all() { + let _urls = [ + mockito::mock("GET", "/1") + .with_body( + r#" + + + test site 1 + + +"#, + ) + .create(), + mockito::mock("GET", "/2") + .with_body( + r#" + + + test site 2 + +"#, + ) + .create(), + ]; + + let mut titles: Vec = Vec::new(); + let text = format!( + "some text {u}/1 other text {u}/2", + u = &mockito::server_url() + ); + let urls = url_parser(&text); + + assert_eq!(urls.len(), 2); + + for url in &urls { + if let Ok(title) = tokio_test::block_on(url_title(&url.as_str())) { + titles.push(title); + } + } + assert_eq!(msg_builder(&titles), "Titles: test site 1 --- test site 2"); + } +} -- cgit v1.2.3