aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock11
-rw-r--r--Cargo.toml4
-rw-r--r--src/hooks/sed/mod.rs (renamed from src/hooks/sed.rs)19
-rw-r--r--src/hooks/sed/parser.rs344
4 files changed, 358 insertions, 20 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 70e4c42..cd25dc2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -138,6 +138,7 @@ dependencies = [
"anyhow",
"async-trait",
"base64",
+ "bitflags",
"catinator_macros",
"figment",
"futures",
@@ -148,9 +149,9 @@ dependencies = [
"regex",
"reqwest",
"sasl",
- "sedregex",
"serde",
"serde_json",
+ "thiserror",
"tokio",
"toml",
"tracing",
@@ -1173,14 +1174,6 @@ dependencies = [
]
[[package]]
-name = "sedregex"
-version = "0.2.4"
-source = "git+https://gitlab.com/audron/sedregex#2ac3be5f56f53122cd89a160bc2ff9f7387a9467"
-dependencies = [
- "regex",
-]
-
-[[package]]
name = "serde"
version = "1.0.125"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index e0f9d2a..41fe6a0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,6 +18,7 @@ serde_json = "1"
toml = "0.5"
anyhow = "1"
+thiserror = "1"
futures = "0.3"
tokio = { version = "1", features = ["full", "rt-multi-thread"] }
@@ -27,7 +28,6 @@ tracing-subscriber = "0.2"
tracing-futures = "0.2"
regex = "1"
-sedregex = { version = "0.2", git = "https://gitlab.com/audron/sedregex" }
rand = "0.8.3"
@@ -35,6 +35,8 @@ reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"
urlparse = "0.7"
async-trait = "0.1"
+bitflags = "1"
+
[dev-dependencies]
mockito = "0.30.0"
diff --git a/src/hooks/sed.rs b/src/hooks/sed/mod.rs
index 120ac7d..3128372 100644
--- a/src/hooks/sed.rs
+++ b/src/hooks/sed/mod.rs
@@ -1,10 +1,11 @@
use anyhow::{anyhow, bail, Context, Result};
use irc::client::prelude::*;
-use sedregex::ReplaceCommand;
-
use std::collections::HashMap;
+#[allow(dead_code)]
+mod parser;
+
static LOG_MAX_SIZE: usize = 10000;
thread_local!(static RE: regex::Regex = regex::Regex::new(r"^s/").unwrap());
@@ -59,10 +60,8 @@ impl Sed {
fn find_and_replace(&mut self, msg: &Message) -> Result<String> {
if let Command::PRIVMSG(target, text) = msg.command.clone() {
- let cmd = match ReplaceCommand::new(text.as_str()) {
- Ok(cmd) => cmd,
- Err(_) => return Err(anyhow!("building replace command failed")),
- };
+ let cmd =
+ parser::Command::from_str(text.as_str()).context("failed to parse sed command")?;
let log = self
.0
@@ -72,13 +71,13 @@ impl Sed {
return log
.iter()
.rev()
- .find(|(_, text)| cmd.expr.is_match(text) && !RE.with(|re| re.is_match(text)))
+ .find(|(_, text)| cmd.regex().is_match(text) && !RE.with(|re| re.is_match(text)))
.and_then(|(nick, text)| {
if text.starts_with("\x01\x01") {
Some(format!(
"* {}{}",
nick,
- cmd.execute(text.replace("\x01", ""))
+ cmd.execute(&text.replace("\x01", ""))
))
} else {
Some(format!("<{}> {}", nick, cmd.execute(text)))
@@ -203,7 +202,7 @@ mod tests {
command: Command::PRIVMSG("user".to_string(), "s/will be/has been/".to_string(),),
})
.unwrap(),
- "<user> this is a long message which has been replaced"
+ "<user> this is a long message which \x02has been\x02 replaced"
)
}
@@ -217,7 +216,7 @@ mod tests {
command: Command::PRIVMSG("user".to_string(), "s/(will).*(be)/$2 $1/".to_string(),),
})
.unwrap(),
- "<user> this is a long message which be will replaced"
+ "<user> this is a long message which \x02be will\x02 replaced"
)
}
}
diff --git a/src/hooks/sed/parser.rs b/src/hooks/sed/parser.rs
new file mode 100644
index 0000000..eb7ef3e
--- /dev/null
+++ b/src/hooks/sed/parser.rs
@@ -0,0 +1,344 @@
+use std::{borrow::Cow, str::Chars};
+
+use bitflags::bitflags;
+use regex::Regex;
+
+use crate::util::formatting::Formatting;
+
+type Commands = Vec<Command>;
+
+#[derive(Debug, Clone)]
+pub struct Command {
+ left: Regex,
+ right: String,
+ flags: Flags,
+}
+
+impl PartialEq for Command {
+ fn eq(&self, other: &Self) -> bool {
+ self.left.as_str() == other.left.as_str()
+ && self.right == other.right
+ && self.flags == other.flags
+ }
+}
+
+#[derive(Debug, Clone, PartialEq, thiserror::Error)]
+pub enum ParseError {
+ #[error("not a sed command, does not start with 's/'")]
+ NotSedCommand,
+ #[error("unknown flag")]
+ InvalidFlag,
+ #[error(transparent)]
+ InvalidRegex(#[from] regex::Error),
+}
+
+impl Command {
+ pub fn from_str(input: &str) -> Result<Command, ParseError> {
+ let mut chars = input.chars();
+
+ if chars.next().unwrap() == 's' && chars.next().unwrap() == '/' {
+ let left = Command::parse_segment(&mut chars)?;
+ let right = Command::parse_segment(&mut chars)?.bold();
+ let flags = Flags::from_chars(&mut chars)?;
+
+ let left = Regex::new(&format!("(?{}){}", flags.to_string(), left))
+ .map_err(|err| ParseError::InvalidRegex(err))?;
+
+ return Ok(Command { left, right, flags });
+ } else {
+ return Err(ParseError::NotSedCommand);
+ }
+ }
+
+ pub fn from_str_multiple(input: &str) -> Result<Commands, ParseError> {
+ let mut commands = Commands::new();
+
+ let mut chars = input.chars();
+
+ loop {
+ let s = chars.next();
+ let slash = chars.next();
+
+ if s.is_some() && slash.is_some() {
+ if s.unwrap() == 's' && slash.unwrap() == '/' {
+ let left = Command::parse_segment(&mut chars)?;
+ let right = Command::parse_segment(&mut chars)?.bold();
+ let flags = Flags::from_chars(&mut chars)?;
+
+ let left = Regex::new(&format!("(?{}){}", flags.to_string(), left))
+ .map_err(|err| ParseError::InvalidRegex(err))?;
+
+ commands.push(Command { left, right, flags });
+ } else {
+ return Err(ParseError::NotSedCommand);
+ }
+ } else {
+ break;
+ }
+ }
+
+ Ok(commands)
+ }
+
+ fn parse_segment(chars: &mut Chars) -> Result<String, ParseError> {
+ let mut last_char = '/';
+ let mut output = String::new();
+
+ while let Some(c) = chars.next() {
+ if c == '/' && last_char != '\\' {
+ break;
+ } else if c == '/' && last_char == '\\' {
+ output.pop().unwrap();
+ }
+
+ output.push(c);
+ last_char = c;
+ }
+
+ Ok(output)
+ }
+
+ pub fn execute(self, target: &str) -> Cow<str> {
+ let result: Cow<str>;
+
+ if self.flags.contains(Flags::GLOBAL) {
+ result = self.left.replace_all(target, self.right);
+ } else {
+ result = self.left.replace(target, self.right);
+ }
+
+ return result;
+ }
+
+ pub fn regex(&self) -> &Regex {
+ &self.left
+ }
+}
+
+bitflags! {
+ /// i case-insensitive: letters match both upper and lower case
+ /// m multi-line mode: ^ and $ match begin/end of line
+ /// s allow . to match \n
+ /// U swap the meaning of x* and x*?
+ /// x ignore whitespace and allow line comments (starting with `#`)
+ struct Flags: u32 {
+ const GLOBAL = 0b00000001;
+ const CASE_INSENSITIVE = 0b00000010;
+ const SINGLE_LINE = 0b00001000;
+ const UNGREEDY = 0b00010000;
+ const EXTENDED = 0b00100000;
+ }
+}
+
+impl Flags {
+ pub fn to_string(&self) -> String {
+ let mut result = String::new();
+
+ result.push('m');
+
+ if self.contains(Flags::CASE_INSENSITIVE) {
+ result.push('i');
+ }
+
+ if self.contains(Flags::SINGLE_LINE) {
+ result.push('s');
+ }
+
+ if self.contains(Flags::UNGREEDY) {
+ result.push('U');
+ }
+
+ if self.contains(Flags::EXTENDED) {
+ result.push('x');
+ }
+
+ return result;
+ }
+
+ pub fn from_chars(chars: &mut Chars) -> Result<Flags, ParseError> {
+ let mut flags: Flags = Flags::empty();
+
+ while let Some(c) = chars.next() {
+ match c {
+ 'g' => {
+ flags = flags | Flags::GLOBAL;
+ }
+ 'i' => {
+ flags = flags | Flags::CASE_INSENSITIVE;
+ }
+ 's' => {
+ flags = flags | Flags::SINGLE_LINE;
+ }
+ 'U' => {
+ flags = flags | Flags::UNGREEDY;
+ }
+ 'x' => {
+ flags = flags | Flags::EXTENDED;
+ }
+ ';' => return Ok(flags),
+ _ => return Err(ParseError::InvalidFlag),
+ };
+ }
+
+ Ok(flags)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const COMMAND_SIMPLE: &str = "s/replace/replacee/ig";
+ const COMMAND_MULTIPLE: &str = "s/replace/replacee/ig;s/two/tworeplace/i";
+
+ #[test]
+ fn test_parse_segment() -> Result<(), ParseError> {
+ let mut chars = "replace/replacee/ig".chars();
+
+ let left = "replace";
+ let right = Command::parse_segment(&mut chars)?;
+
+ assert_eq!(left, right);
+
+ let left = "replacee";
+ let right = Command::parse_segment(&mut chars)?;
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_flags_from_chars() -> Result<(), ParseError> {
+ let mut chars = "ig".chars();
+
+ let left = Flags::CASE_INSENSITIVE | Flags::GLOBAL;
+ let right = Flags::from_chars(&mut chars)?;
+
+ assert_eq!(left, right);
+
+ let mut chars = "igf".chars();
+ let right = Flags::from_chars(&mut chars);
+
+ assert_eq!(Err(ParseError::InvalidFlag), right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_flags_from_chars_with_terminator() -> Result<(), ParseError> {
+ let mut chars = "ig;bla".chars();
+
+ let left = Flags::CASE_INSENSITIVE | Flags::GLOBAL;
+ let right = Flags::from_chars(&mut chars)?;
+
+ assert_eq!(left, right);
+ assert_eq!("bla", chars.as_str());
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_simple() -> Result<(), ParseError> {
+ let left = Command::from_str(COMMAND_SIMPLE)?;
+ let right = Command {
+ left: Regex::new("(?mi)replace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::CASE_INSENSITIVE | Flags::GLOBAL,
+ };
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_simple_escaped_slash() -> Result<(), ParseError> {
+ let left = Command::from_str(r#"s/repl\/ace/replacee"#)?;
+ let right = Command {
+ left: Regex::new("(?m)repl/ace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::empty(),
+ };
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_simple_no_terminating_slash() -> Result<(), ParseError> {
+ let left = Command::from_str("s/replace/replacee")?;
+ let right = Command {
+ left: Regex::new("(?m)replace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::empty(),
+ };
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_complex_regex() -> Result<(), ParseError> {
+ let left =
+ Command::from_str(r#"s/http(?:s?):\/\/regex101\.com\/r\/([a-zA-Z0-9]{1,6})?$/$1/g"#)?;
+ let right = Command {
+ left: Regex::new(r#"(?m)http(?:s?)://regex101\.com/r/([a-zA-Z0-9]{1,6})?$"#).unwrap(),
+ right: "\x02$1\x02".to_string(),
+ flags: Flags::GLOBAL,
+ };
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_multiple_fail() -> Result<(), ParseError> {
+ let left = Command::from_str_multiple(COMMAND_SIMPLE)?;
+ let right = vec![Command {
+ left: Regex::new("(?mi)replace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::CASE_INSENSITIVE | Flags::GLOBAL,
+ }];
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_multiple() -> Result<(), ParseError> {
+ let left = Command::from_str_multiple(COMMAND_MULTIPLE)?;
+ let right = vec![
+ Command {
+ left: Regex::new("(?mi)replace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::CASE_INSENSITIVE | Flags::GLOBAL,
+ },
+ Command {
+ left: Regex::new("(?mi)two").unwrap(),
+ right: "\x02tworeplace\x02".to_string(),
+ flags: Flags::CASE_INSENSITIVE,
+ },
+ ];
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_run_regex() -> Result<(), ParseError> {
+ let cmd = Command::from_str(COMMAND_SIMPLE)?;
+
+ let left = "this is a sentence to \x02replacee\x02 text in";
+ let right = cmd.execute("this is a sentence to replace text in");
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+}