aboutsummaryrefslogtreecommitdiff
path: root/src/hooks/sed
diff options
context:
space:
mode:
Diffstat (limited to 'src/hooks/sed')
-rw-r--r--src/hooks/sed/mod.rs252
-rw-r--r--src/hooks/sed/parser.rs344
2 files changed, 596 insertions, 0 deletions
diff --git a/src/hooks/sed/mod.rs b/src/hooks/sed/mod.rs
new file mode 100644
index 0000000..3128372
--- /dev/null
+++ b/src/hooks/sed/mod.rs
@@ -0,0 +1,252 @@
+use anyhow::{anyhow, bail, Context, Result};
+use irc::client::prelude::*;
+
+use std::collections::HashMap;
+
+#[allow(dead_code)]
+mod parser;
+
+static LOG_MAX_SIZE: usize = 10000;
+
+thread_local!(static RE: regex::Regex = regex::Regex::new(r"^s/").unwrap());
+
+pub struct Sed(HashMap<String, Vec<(String, String)>>);
+
+impl Sed {
+ pub fn new() -> Sed {
+ Sed(HashMap::new())
+ }
+
+ pub fn log(&mut self, _bot: &crate::Bot, msg: Message) -> Result<()> {
+ self.log_msg(msg).context("failed to log new message")
+ }
+
+ fn log_msg(&mut self, msg: Message) -> Result<()> {
+ if let Command::PRIVMSG(target, mut text) = msg.command.clone() {
+ if text.starts_with("\x01ACTION") {
+ text = text.replace("\x01ACTION", "\x01\x01");
+ }
+
+ match self.0.get_mut(&target) {
+ Some(log) => {
+ if log.len() >= LOG_MAX_SIZE {
+ let _ = log.remove(0);
+ }
+ log.push((msg.source_nickname().unwrap().to_string(), text))
+ }
+ None => {
+ let mut log = Vec::with_capacity(LOG_MAX_SIZE);
+ log.push((msg.source_nickname().unwrap().to_string(), text));
+ self.0.insert(target, log);
+ }
+ }
+ }
+ Ok(())
+ }
+
+ pub fn replace(&mut self, bot: &crate::Bot, msg: Message) -> Result<()> {
+ match self.find_and_replace(&msg) {
+ Ok(res) => match bot.send_privmsg(msg.response_target().unwrap(), res.as_str()) {
+ Ok(_) => Ok(()),
+ Err(_) => bail!(
+ "failed to send message: \"{:?}\" to channel: {:?}",
+ msg.response_target().unwrap(),
+ res
+ ),
+ },
+ Err(_) => bail!("did not find match for: {:?}", msg),
+ }
+ }
+
+ fn find_and_replace(&mut self, msg: &Message) -> Result<String> {
+ if let Command::PRIVMSG(target, text) = msg.command.clone() {
+ let cmd =
+ parser::Command::from_str(text.as_str()).context("failed to parse sed command")?;
+
+ let log = self
+ .0
+ .get(&target)
+ .context("did not find log for current channel")?;
+
+ return log
+ .iter()
+ .rev()
+ .find(|(_, text)| cmd.regex().is_match(text) && !RE.with(|re| re.is_match(text)))
+ .and_then(|(nick, text)| {
+ if text.starts_with("\x01\x01") {
+ Some(format!(
+ "* {}{}",
+ nick,
+ cmd.execute(&text.replace("\x01", ""))
+ ))
+ } else {
+ Some(format!("<{}> {}", nick, cmd.execute(text)))
+ }
+ })
+ .map_or(Err(anyhow!("replace failed")), |v| Ok(v));
+ }
+
+ Err(anyhow!("not a privmsg"))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ pub fn populate_log() -> Sed {
+ let mut sed = Sed::new();
+
+ sed.log_msg(
+ Message::new(
+ Some("user!user@user.com"),
+ "PRIVMSG",
+ vec!["user", "this is a long message which will be replaced"],
+ )
+ .unwrap(),
+ )
+ .unwrap();
+
+ for _ in 0..LOG_MAX_SIZE - 1 {
+ sed.log_msg(
+ Message::new(
+ Some("user!user@user.com"),
+ "PRIVMSG",
+ vec!["user", "this is a long message which doesn't matter"],
+ )
+ .unwrap(),
+ )
+ .unwrap();
+ }
+
+ return sed;
+ }
+
+ #[test]
+ fn test_log_push_max() {
+ let mut sed = Sed::new();
+
+ sed.log_msg(
+ Message::new(Some("user!user@user.com"), "PRIVMSG", vec!["user", "one"]).unwrap(),
+ )
+ .unwrap();
+
+ for _ in 0..LOG_MAX_SIZE - 2 {
+ sed.log_msg(
+ Message::new(Some("user!user@user.com"), "PRIVMSG", vec!["user", "two"]).unwrap(),
+ )
+ .unwrap();
+ }
+ sed.log_msg(
+ Message::new(Some("user!user@user.com"), "PRIVMSG", vec!["user", "three"]).unwrap(),
+ )
+ .unwrap();
+
+ {
+ let log = sed.0.get("user").unwrap();
+ assert_eq!(
+ log[LOG_MAX_SIZE - 1],
+ ("user".to_string(), "three".to_string())
+ );
+ assert_eq!(log[0], ("user".to_string(), "one".to_string()));
+ }
+
+ sed.log_msg(
+ Message::new(Some("user!user@user.com"), "PRIVMSG", vec!["user", "four"]).unwrap(),
+ )
+ .unwrap();
+
+ {
+ let log = sed.0.get("user").unwrap();
+
+ assert_eq!(
+ log[LOG_MAX_SIZE - 1],
+ ("user".to_string(), "four".to_string())
+ );
+ assert_eq!(log[0], ("user".to_string(), "two".to_string()));
+ }
+ }
+
+ #[test]
+ fn test_log_limit() {
+ let mut sed = populate_log();
+
+ {
+ let log = sed.0.get("user").unwrap();
+ assert_eq!(log.len(), LOG_MAX_SIZE);
+ }
+
+ sed.log_msg(
+ Message::new(
+ Some("user!user@user.com"),
+ "PRIVMSG",
+ vec!["user", "this is the 10001th message"],
+ )
+ .unwrap(),
+ )
+ .unwrap();
+
+ {
+ let log = sed.0.get("user").unwrap();
+ assert_eq!(log.len(), LOG_MAX_SIZE);
+ }
+ }
+
+ #[test]
+ fn test_replace() {
+ let mut sed = populate_log();
+ assert_eq!(
+ sed.find_and_replace(&Message {
+ tags: None,
+ prefix: None,
+ command: Command::PRIVMSG("user".to_string(), "s/will be/has been/".to_string(),),
+ })
+ .unwrap(),
+ "<user> this is a long message which \x02has been\x02 replaced"
+ )
+ }
+
+ #[test]
+ fn test_replace_complex() {
+ let mut sed = populate_log();
+ assert_eq!(
+ sed.find_and_replace(&Message {
+ tags: None,
+ prefix: None,
+ command: Command::PRIVMSG("user".to_string(), "s/(will).*(be)/$2 $1/".to_string(),),
+ })
+ .unwrap(),
+ "<user> this is a long message which \x02be will\x02 replaced"
+ )
+ }
+}
+
+#[cfg(all(test, feature = "bench"))]
+mod bench {
+ use super::*;
+ use test::Bencher;
+
+ #[bench]
+ fn bench_replace(b: &mut Bencher) {
+ let mut sed = tests::populate_log();
+ b.iter(|| {
+ sed.find_and_replace(&Message {
+ tags: None,
+ prefix: None,
+ command: Command::PRIVMSG("user".to_string(), "s/will be/has been/".to_string()),
+ })
+ });
+ }
+
+ #[bench]
+ fn bench_replace_complex(b: &mut Bencher) {
+ let mut sed = tests::populate_log();
+ b.iter(|| {
+ sed.find_and_replace(&Message {
+ tags: None,
+ prefix: None,
+ command: Command::PRIVMSG("user".to_string(), "s/(will).*(be)/$2 $1/".to_string()),
+ })
+ });
+ }
+}
diff --git a/src/hooks/sed/parser.rs b/src/hooks/sed/parser.rs
new file mode 100644
index 0000000..eb7ef3e
--- /dev/null
+++ b/src/hooks/sed/parser.rs
@@ -0,0 +1,344 @@
+use std::{borrow::Cow, str::Chars};
+
+use bitflags::bitflags;
+use regex::Regex;
+
+use crate::util::formatting::Formatting;
+
+type Commands = Vec<Command>;
+
+#[derive(Debug, Clone)]
+pub struct Command {
+ left: Regex,
+ right: String,
+ flags: Flags,
+}
+
+impl PartialEq for Command {
+ fn eq(&self, other: &Self) -> bool {
+ self.left.as_str() == other.left.as_str()
+ && self.right == other.right
+ && self.flags == other.flags
+ }
+}
+
+#[derive(Debug, Clone, PartialEq, thiserror::Error)]
+pub enum ParseError {
+ #[error("not a sed command, does not start with 's/'")]
+ NotSedCommand,
+ #[error("unknown flag")]
+ InvalidFlag,
+ #[error(transparent)]
+ InvalidRegex(#[from] regex::Error),
+}
+
+impl Command {
+ pub fn from_str(input: &str) -> Result<Command, ParseError> {
+ let mut chars = input.chars();
+
+ if chars.next().unwrap() == 's' && chars.next().unwrap() == '/' {
+ let left = Command::parse_segment(&mut chars)?;
+ let right = Command::parse_segment(&mut chars)?.bold();
+ let flags = Flags::from_chars(&mut chars)?;
+
+ let left = Regex::new(&format!("(?{}){}", flags.to_string(), left))
+ .map_err(|err| ParseError::InvalidRegex(err))?;
+
+ return Ok(Command { left, right, flags });
+ } else {
+ return Err(ParseError::NotSedCommand);
+ }
+ }
+
+ pub fn from_str_multiple(input: &str) -> Result<Commands, ParseError> {
+ let mut commands = Commands::new();
+
+ let mut chars = input.chars();
+
+ loop {
+ let s = chars.next();
+ let slash = chars.next();
+
+ if s.is_some() && slash.is_some() {
+ if s.unwrap() == 's' && slash.unwrap() == '/' {
+ let left = Command::parse_segment(&mut chars)?;
+ let right = Command::parse_segment(&mut chars)?.bold();
+ let flags = Flags::from_chars(&mut chars)?;
+
+ let left = Regex::new(&format!("(?{}){}", flags.to_string(), left))
+ .map_err(|err| ParseError::InvalidRegex(err))?;
+
+ commands.push(Command { left, right, flags });
+ } else {
+ return Err(ParseError::NotSedCommand);
+ }
+ } else {
+ break;
+ }
+ }
+
+ Ok(commands)
+ }
+
+ fn parse_segment(chars: &mut Chars) -> Result<String, ParseError> {
+ let mut last_char = '/';
+ let mut output = String::new();
+
+ while let Some(c) = chars.next() {
+ if c == '/' && last_char != '\\' {
+ break;
+ } else if c == '/' && last_char == '\\' {
+ output.pop().unwrap();
+ }
+
+ output.push(c);
+ last_char = c;
+ }
+
+ Ok(output)
+ }
+
+ pub fn execute(self, target: &str) -> Cow<str> {
+ let result: Cow<str>;
+
+ if self.flags.contains(Flags::GLOBAL) {
+ result = self.left.replace_all(target, self.right);
+ } else {
+ result = self.left.replace(target, self.right);
+ }
+
+ return result;
+ }
+
+ pub fn regex(&self) -> &Regex {
+ &self.left
+ }
+}
+
+bitflags! {
+ /// i case-insensitive: letters match both upper and lower case
+ /// m multi-line mode: ^ and $ match begin/end of line
+ /// s allow . to match \n
+ /// U swap the meaning of x* and x*?
+ /// x ignore whitespace and allow line comments (starting with `#`)
+ struct Flags: u32 {
+ const GLOBAL = 0b00000001;
+ const CASE_INSENSITIVE = 0b00000010;
+ const SINGLE_LINE = 0b00001000;
+ const UNGREEDY = 0b00010000;
+ const EXTENDED = 0b00100000;
+ }
+}
+
+impl Flags {
+ pub fn to_string(&self) -> String {
+ let mut result = String::new();
+
+ result.push('m');
+
+ if self.contains(Flags::CASE_INSENSITIVE) {
+ result.push('i');
+ }
+
+ if self.contains(Flags::SINGLE_LINE) {
+ result.push('s');
+ }
+
+ if self.contains(Flags::UNGREEDY) {
+ result.push('U');
+ }
+
+ if self.contains(Flags::EXTENDED) {
+ result.push('x');
+ }
+
+ return result;
+ }
+
+ pub fn from_chars(chars: &mut Chars) -> Result<Flags, ParseError> {
+ let mut flags: Flags = Flags::empty();
+
+ while let Some(c) = chars.next() {
+ match c {
+ 'g' => {
+ flags = flags | Flags::GLOBAL;
+ }
+ 'i' => {
+ flags = flags | Flags::CASE_INSENSITIVE;
+ }
+ 's' => {
+ flags = flags | Flags::SINGLE_LINE;
+ }
+ 'U' => {
+ flags = flags | Flags::UNGREEDY;
+ }
+ 'x' => {
+ flags = flags | Flags::EXTENDED;
+ }
+ ';' => return Ok(flags),
+ _ => return Err(ParseError::InvalidFlag),
+ };
+ }
+
+ Ok(flags)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const COMMAND_SIMPLE: &str = "s/replace/replacee/ig";
+ const COMMAND_MULTIPLE: &str = "s/replace/replacee/ig;s/two/tworeplace/i";
+
+ #[test]
+ fn test_parse_segment() -> Result<(), ParseError> {
+ let mut chars = "replace/replacee/ig".chars();
+
+ let left = "replace";
+ let right = Command::parse_segment(&mut chars)?;
+
+ assert_eq!(left, right);
+
+ let left = "replacee";
+ let right = Command::parse_segment(&mut chars)?;
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_flags_from_chars() -> Result<(), ParseError> {
+ let mut chars = "ig".chars();
+
+ let left = Flags::CASE_INSENSITIVE | Flags::GLOBAL;
+ let right = Flags::from_chars(&mut chars)?;
+
+ assert_eq!(left, right);
+
+ let mut chars = "igf".chars();
+ let right = Flags::from_chars(&mut chars);
+
+ assert_eq!(Err(ParseError::InvalidFlag), right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_flags_from_chars_with_terminator() -> Result<(), ParseError> {
+ let mut chars = "ig;bla".chars();
+
+ let left = Flags::CASE_INSENSITIVE | Flags::GLOBAL;
+ let right = Flags::from_chars(&mut chars)?;
+
+ assert_eq!(left, right);
+ assert_eq!("bla", chars.as_str());
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_simple() -> Result<(), ParseError> {
+ let left = Command::from_str(COMMAND_SIMPLE)?;
+ let right = Command {
+ left: Regex::new("(?mi)replace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::CASE_INSENSITIVE | Flags::GLOBAL,
+ };
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_simple_escaped_slash() -> Result<(), ParseError> {
+ let left = Command::from_str(r#"s/repl\/ace/replacee"#)?;
+ let right = Command {
+ left: Regex::new("(?m)repl/ace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::empty(),
+ };
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_simple_no_terminating_slash() -> Result<(), ParseError> {
+ let left = Command::from_str("s/replace/replacee")?;
+ let right = Command {
+ left: Regex::new("(?m)replace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::empty(),
+ };
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_complex_regex() -> Result<(), ParseError> {
+ let left =
+ Command::from_str(r#"s/http(?:s?):\/\/regex101\.com\/r\/([a-zA-Z0-9]{1,6})?$/$1/g"#)?;
+ let right = Command {
+ left: Regex::new(r#"(?m)http(?:s?)://regex101\.com/r/([a-zA-Z0-9]{1,6})?$"#).unwrap(),
+ right: "\x02$1\x02".to_string(),
+ flags: Flags::GLOBAL,
+ };
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_multiple_fail() -> Result<(), ParseError> {
+ let left = Command::from_str_multiple(COMMAND_SIMPLE)?;
+ let right = vec![Command {
+ left: Regex::new("(?mi)replace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::CASE_INSENSITIVE | Flags::GLOBAL,
+ }];
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_multiple() -> Result<(), ParseError> {
+ let left = Command::from_str_multiple(COMMAND_MULTIPLE)?;
+ let right = vec![
+ Command {
+ left: Regex::new("(?mi)replace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::CASE_INSENSITIVE | Flags::GLOBAL,
+ },
+ Command {
+ left: Regex::new("(?mi)two").unwrap(),
+ right: "\x02tworeplace\x02".to_string(),
+ flags: Flags::CASE_INSENSITIVE,
+ },
+ ];
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_run_regex() -> Result<(), ParseError> {
+ let cmd = Command::from_str(COMMAND_SIMPLE)?;
+
+ let left = "this is a sentence to \x02replacee\x02 text in";
+ let right = cmd.execute("this is a sentence to replace text in");
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+}