diff options
Diffstat (limited to 'src/hooks/sed')
| -rw-r--r-- | src/hooks/sed/mod.rs | 252 | ||||
| -rw-r--r-- | src/hooks/sed/parser.rs | 344 |
2 files changed, 596 insertions, 0 deletions
diff --git a/src/hooks/sed/mod.rs b/src/hooks/sed/mod.rs new file mode 100644 index 0000000..3128372 --- /dev/null +++ b/src/hooks/sed/mod.rs @@ -0,0 +1,252 @@ +use anyhow::{anyhow, bail, Context, Result}; +use irc::client::prelude::*; + +use std::collections::HashMap; + +#[allow(dead_code)] +mod parser; + +static LOG_MAX_SIZE: usize = 10000; + +thread_local!(static RE: regex::Regex = regex::Regex::new(r"^s/").unwrap()); + +pub struct Sed(HashMap<String, Vec<(String, String)>>); + +impl Sed { + pub fn new() -> Sed { + Sed(HashMap::new()) + } + + pub fn log(&mut self, _bot: &crate::Bot, msg: Message) -> Result<()> { + self.log_msg(msg).context("failed to log new message") + } + + fn log_msg(&mut self, msg: Message) -> Result<()> { + if let Command::PRIVMSG(target, mut text) = msg.command.clone() { + if text.starts_with("\x01ACTION") { + text = text.replace("\x01ACTION", "\x01\x01"); + } + + match self.0.get_mut(&target) { + Some(log) => { + if log.len() >= LOG_MAX_SIZE { + let _ = log.remove(0); + } + log.push((msg.source_nickname().unwrap().to_string(), text)) + } + None => { + let mut log = Vec::with_capacity(LOG_MAX_SIZE); + log.push((msg.source_nickname().unwrap().to_string(), text)); + self.0.insert(target, log); + } + } + } + Ok(()) + } + + pub fn replace(&mut self, bot: &crate::Bot, msg: Message) -> Result<()> { + match self.find_and_replace(&msg) { + Ok(res) => match bot.send_privmsg(msg.response_target().unwrap(), res.as_str()) { + Ok(_) => Ok(()), + Err(_) => bail!( + "failed to send message: \"{:?}\" to channel: {:?}", + msg.response_target().unwrap(), + res + ), + }, + Err(_) => bail!("did not find match for: {:?}", msg), + } + } + + fn find_and_replace(&mut self, msg: &Message) -> Result<String> { + if let Command::PRIVMSG(target, text) = msg.command.clone() { + let cmd = + parser::Command::from_str(text.as_str()).context("failed to parse sed command")?; + + let log = self + .0 + .get(&target) + .context("did not find log for current channel")?; + + return log + .iter() + .rev() + .find(|(_, text)| cmd.regex().is_match(text) && !RE.with(|re| re.is_match(text))) + .and_then(|(nick, text)| { + if text.starts_with("\x01\x01") { + Some(format!( + "* {}{}", + nick, + cmd.execute(&text.replace("\x01", "")) + )) + } else { + Some(format!("<{}> {}", nick, cmd.execute(text))) + } + }) + .map_or(Err(anyhow!("replace failed")), |v| Ok(v)); + } + + Err(anyhow!("not a privmsg")) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + pub fn populate_log() -> Sed { + let mut sed = Sed::new(); + + sed.log_msg( + Message::new( + Some("user!user@user.com"), + "PRIVMSG", + vec!["user", "this is a long message which will be replaced"], + ) + .unwrap(), + ) + .unwrap(); + + for _ in 0..LOG_MAX_SIZE - 1 { + sed.log_msg( + Message::new( + Some("user!user@user.com"), + "PRIVMSG", + vec!["user", "this is a long message which doesn't matter"], + ) + .unwrap(), + ) + .unwrap(); + } + + return sed; + } + + #[test] + fn test_log_push_max() { + let mut sed = Sed::new(); + + sed.log_msg( + Message::new(Some("user!user@user.com"), "PRIVMSG", vec!["user", "one"]).unwrap(), + ) + .unwrap(); + + for _ in 0..LOG_MAX_SIZE - 2 { + sed.log_msg( + Message::new(Some("user!user@user.com"), "PRIVMSG", vec!["user", "two"]).unwrap(), + ) + .unwrap(); + } + sed.log_msg( + Message::new(Some("user!user@user.com"), "PRIVMSG", vec!["user", "three"]).unwrap(), + ) + .unwrap(); + + { + let log = sed.0.get("user").unwrap(); + assert_eq!( + log[LOG_MAX_SIZE - 1], + ("user".to_string(), "three".to_string()) + ); + assert_eq!(log[0], ("user".to_string(), "one".to_string())); + } + + sed.log_msg( + Message::new(Some("user!user@user.com"), "PRIVMSG", vec!["user", "four"]).unwrap(), + ) + .unwrap(); + + { + let log = sed.0.get("user").unwrap(); + + assert_eq!( + log[LOG_MAX_SIZE - 1], + ("user".to_string(), "four".to_string()) + ); + assert_eq!(log[0], ("user".to_string(), "two".to_string())); + } + } + + #[test] + fn test_log_limit() { + let mut sed = populate_log(); + + { + let log = sed.0.get("user").unwrap(); + assert_eq!(log.len(), LOG_MAX_SIZE); + } + + sed.log_msg( + Message::new( + Some("user!user@user.com"), + "PRIVMSG", + vec!["user", "this is the 10001th message"], + ) + .unwrap(), + ) + .unwrap(); + + { + let log = sed.0.get("user").unwrap(); + assert_eq!(log.len(), LOG_MAX_SIZE); + } + } + + #[test] + fn test_replace() { + let mut sed = populate_log(); + assert_eq!( + sed.find_and_replace(&Message { + tags: None, + prefix: None, + command: Command::PRIVMSG("user".to_string(), "s/will be/has been/".to_string(),), + }) + .unwrap(), + "<user> this is a long message which \x02has been\x02 replaced" + ) + } + + #[test] + fn test_replace_complex() { + let mut sed = populate_log(); + assert_eq!( + sed.find_and_replace(&Message { + tags: None, + prefix: None, + command: Command::PRIVMSG("user".to_string(), "s/(will).*(be)/$2 $1/".to_string(),), + }) + .unwrap(), + "<user> this is a long message which \x02be will\x02 replaced" + ) + } +} + +#[cfg(all(test, feature = "bench"))] +mod bench { + use super::*; + use test::Bencher; + + #[bench] + fn bench_replace(b: &mut Bencher) { + let mut sed = tests::populate_log(); + b.iter(|| { + sed.find_and_replace(&Message { + tags: None, + prefix: None, + command: Command::PRIVMSG("user".to_string(), "s/will be/has been/".to_string()), + }) + }); + } + + #[bench] + fn bench_replace_complex(b: &mut Bencher) { + let mut sed = tests::populate_log(); + b.iter(|| { + sed.find_and_replace(&Message { + tags: None, + prefix: None, + command: Command::PRIVMSG("user".to_string(), "s/(will).*(be)/$2 $1/".to_string()), + }) + }); + } +} diff --git a/src/hooks/sed/parser.rs b/src/hooks/sed/parser.rs new file mode 100644 index 0000000..eb7ef3e --- /dev/null +++ b/src/hooks/sed/parser.rs @@ -0,0 +1,344 @@ +use std::{borrow::Cow, str::Chars}; + +use bitflags::bitflags; +use regex::Regex; + +use crate::util::formatting::Formatting; + +type Commands = Vec<Command>; + +#[derive(Debug, Clone)] +pub struct Command { + left: Regex, + right: String, + flags: Flags, +} + +impl PartialEq for Command { + fn eq(&self, other: &Self) -> bool { + self.left.as_str() == other.left.as_str() + && self.right == other.right + && self.flags == other.flags + } +} + +#[derive(Debug, Clone, PartialEq, thiserror::Error)] +pub enum ParseError { + #[error("not a sed command, does not start with 's/'")] + NotSedCommand, + #[error("unknown flag")] + InvalidFlag, + #[error(transparent)] + InvalidRegex(#[from] regex::Error), +} + +impl Command { + pub fn from_str(input: &str) -> Result<Command, ParseError> { + let mut chars = input.chars(); + + if chars.next().unwrap() == 's' && chars.next().unwrap() == '/' { + let left = Command::parse_segment(&mut chars)?; + let right = Command::parse_segment(&mut chars)?.bold(); + let flags = Flags::from_chars(&mut chars)?; + + let left = Regex::new(&format!("(?{}){}", flags.to_string(), left)) + .map_err(|err| ParseError::InvalidRegex(err))?; + + return Ok(Command { left, right, flags }); + } else { + return Err(ParseError::NotSedCommand); + } + } + + pub fn from_str_multiple(input: &str) -> Result<Commands, ParseError> { + let mut commands = Commands::new(); + + let mut chars = input.chars(); + + loop { + let s = chars.next(); + let slash = chars.next(); + + if s.is_some() && slash.is_some() { + if s.unwrap() == 's' && slash.unwrap() == '/' { + let left = Command::parse_segment(&mut chars)?; + let right = Command::parse_segment(&mut chars)?.bold(); + let flags = Flags::from_chars(&mut chars)?; + + let left = Regex::new(&format!("(?{}){}", flags.to_string(), left)) + .map_err(|err| ParseError::InvalidRegex(err))?; + + commands.push(Command { left, right, flags }); + } else { + return Err(ParseError::NotSedCommand); + } + } else { + break; + } + } + + Ok(commands) + } + + fn parse_segment(chars: &mut Chars) -> Result<String, ParseError> { + let mut last_char = '/'; + let mut output = String::new(); + + while let Some(c) = chars.next() { + if c == '/' && last_char != '\\' { + break; + } else if c == '/' && last_char == '\\' { + output.pop().unwrap(); + } + + output.push(c); + last_char = c; + } + + Ok(output) + } + + pub fn execute(self, target: &str) -> Cow<str> { + let result: Cow<str>; + + if self.flags.contains(Flags::GLOBAL) { + result = self.left.replace_all(target, self.right); + } else { + result = self.left.replace(target, self.right); + } + + return result; + } + + pub fn regex(&self) -> &Regex { + &self.left + } +} + +bitflags! { + /// i case-insensitive: letters match both upper and lower case + /// m multi-line mode: ^ and $ match begin/end of line + /// s allow . to match \n + /// U swap the meaning of x* and x*? + /// x ignore whitespace and allow line comments (starting with `#`) + struct Flags: u32 { + const GLOBAL = 0b00000001; + const CASE_INSENSITIVE = 0b00000010; + const SINGLE_LINE = 0b00001000; + const UNGREEDY = 0b00010000; + const EXTENDED = 0b00100000; + } +} + +impl Flags { + pub fn to_string(&self) -> String { + let mut result = String::new(); + + result.push('m'); + + if self.contains(Flags::CASE_INSENSITIVE) { + result.push('i'); + } + + if self.contains(Flags::SINGLE_LINE) { + result.push('s'); + } + + if self.contains(Flags::UNGREEDY) { + result.push('U'); + } + + if self.contains(Flags::EXTENDED) { + result.push('x'); + } + + return result; + } + + pub fn from_chars(chars: &mut Chars) -> Result<Flags, ParseError> { + let mut flags: Flags = Flags::empty(); + + while let Some(c) = chars.next() { + match c { + 'g' => { + flags = flags | Flags::GLOBAL; + } + 'i' => { + flags = flags | Flags::CASE_INSENSITIVE; + } + 's' => { + flags = flags | Flags::SINGLE_LINE; + } + 'U' => { + flags = flags | Flags::UNGREEDY; + } + 'x' => { + flags = flags | Flags::EXTENDED; + } + ';' => return Ok(flags), + _ => return Err(ParseError::InvalidFlag), + }; + } + + Ok(flags) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const COMMAND_SIMPLE: &str = "s/replace/replacee/ig"; + const COMMAND_MULTIPLE: &str = "s/replace/replacee/ig;s/two/tworeplace/i"; + + #[test] + fn test_parse_segment() -> Result<(), ParseError> { + let mut chars = "replace/replacee/ig".chars(); + + let left = "replace"; + let right = Command::parse_segment(&mut chars)?; + + assert_eq!(left, right); + + let left = "replacee"; + let right = Command::parse_segment(&mut chars)?; + + assert_eq!(left, right); + + Ok(()) + } + + #[test] + fn test_flags_from_chars() -> Result<(), ParseError> { + let mut chars = "ig".chars(); + + let left = Flags::CASE_INSENSITIVE | Flags::GLOBAL; + let right = Flags::from_chars(&mut chars)?; + + assert_eq!(left, right); + + let mut chars = "igf".chars(); + let right = Flags::from_chars(&mut chars); + + assert_eq!(Err(ParseError::InvalidFlag), right); + + Ok(()) + } + + #[test] + fn test_flags_from_chars_with_terminator() -> Result<(), ParseError> { + let mut chars = "ig;bla".chars(); + + let left = Flags::CASE_INSENSITIVE | Flags::GLOBAL; + let right = Flags::from_chars(&mut chars)?; + + assert_eq!(left, right); + assert_eq!("bla", chars.as_str()); + + Ok(()) + } + + #[test] + fn test_new_command_simple() -> Result<(), ParseError> { + let left = Command::from_str(COMMAND_SIMPLE)?; + let right = Command { + left: Regex::new("(?mi)replace").unwrap(), + right: "\x02replacee\x02".to_string(), + flags: Flags::CASE_INSENSITIVE | Flags::GLOBAL, + }; + + assert_eq!(left, right); + + Ok(()) + } + + #[test] + fn test_new_command_simple_escaped_slash() -> Result<(), ParseError> { + let left = Command::from_str(r#"s/repl\/ace/replacee"#)?; + let right = Command { + left: Regex::new("(?m)repl/ace").unwrap(), + right: "\x02replacee\x02".to_string(), + flags: Flags::empty(), + }; + + assert_eq!(left, right); + + Ok(()) + } + + #[test] + fn test_new_command_simple_no_terminating_slash() -> Result<(), ParseError> { + let left = Command::from_str("s/replace/replacee")?; + let right = Command { + left: Regex::new("(?m)replace").unwrap(), + right: "\x02replacee\x02".to_string(), + flags: Flags::empty(), + }; + + assert_eq!(left, right); + + Ok(()) + } + + #[test] + fn test_new_command_complex_regex() -> Result<(), ParseError> { + let left = + Command::from_str(r#"s/http(?:s?):\/\/regex101\.com\/r\/([a-zA-Z0-9]{1,6})?$/$1/g"#)?; + let right = Command { + left: Regex::new(r#"(?m)http(?:s?)://regex101\.com/r/([a-zA-Z0-9]{1,6})?$"#).unwrap(), + right: "\x02$1\x02".to_string(), + flags: Flags::GLOBAL, + }; + + assert_eq!(left, right); + + Ok(()) + } + + #[test] + fn test_new_command_multiple_fail() -> Result<(), ParseError> { + let left = Command::from_str_multiple(COMMAND_SIMPLE)?; + let right = vec![Command { + left: Regex::new("(?mi)replace").unwrap(), + right: "\x02replacee\x02".to_string(), + flags: Flags::CASE_INSENSITIVE | Flags::GLOBAL, + }]; + + assert_eq!(left, right); + + Ok(()) + } + + #[test] + fn test_new_command_multiple() -> Result<(), ParseError> { + let left = Command::from_str_multiple(COMMAND_MULTIPLE)?; + let right = vec![ + Command { + left: Regex::new("(?mi)replace").unwrap(), + right: "\x02replacee\x02".to_string(), + flags: Flags::CASE_INSENSITIVE | Flags::GLOBAL, + }, + Command { + left: Regex::new("(?mi)two").unwrap(), + right: "\x02tworeplace\x02".to_string(), + flags: Flags::CASE_INSENSITIVE, + }, + ]; + + assert_eq!(left, right); + + Ok(()) + } + + #[test] + fn test_run_regex() -> Result<(), ParseError> { + let cmd = Command::from_str(COMMAND_SIMPLE)?; + + let left = "this is a sentence to \x02replacee\x02 text in"; + let right = cmd.execute("this is a sentence to replace text in"); + + assert_eq!(left, right); + + Ok(()) + } +} |
