aboutsummaryrefslogtreecommitdiff
path: root/src/hooks/sed/parser.rs
diff options
context:
space:
mode:
authorMax Audron <audron@cocaine.farm>2021-10-17 17:07:09 +0200
committerMax Audron <audron@cocaine.farm>2021-10-19 14:37:14 +0200
commit4754b420ced2503eb2641d6ddf678736e1aa7369 (patch)
tree511e69d8753cdea469a57152a483d209d16a5fa8 /src/hooks/sed/parser.rs
parentadd formatting trait for irc codes (diff)
replace sedregex crate8-rework-sed
This replaces the sedregex crate with our own implementation for multiple reasons: 1. We required to access the parsed regex, this required a patch to the sedregex crate which did not get merged due to an inactive dev, blocking us from publishing on crates.Io 2. We wanted to highlight the changes done in bold 3. We want to add execution of multiple chained sed commands in the future which would require more modification
Diffstat (limited to '')
-rw-r--r--src/hooks/sed/parser.rs344
1 files changed, 344 insertions, 0 deletions
diff --git a/src/hooks/sed/parser.rs b/src/hooks/sed/parser.rs
new file mode 100644
index 0000000..eb7ef3e
--- /dev/null
+++ b/src/hooks/sed/parser.rs
@@ -0,0 +1,344 @@
+use std::{borrow::Cow, str::Chars};
+
+use bitflags::bitflags;
+use regex::Regex;
+
+use crate::util::formatting::Formatting;
+
+type Commands = Vec<Command>;
+
+#[derive(Debug, Clone)]
+pub struct Command {
+ left: Regex,
+ right: String,
+ flags: Flags,
+}
+
+impl PartialEq for Command {
+ fn eq(&self, other: &Self) -> bool {
+ self.left.as_str() == other.left.as_str()
+ && self.right == other.right
+ && self.flags == other.flags
+ }
+}
+
+#[derive(Debug, Clone, PartialEq, thiserror::Error)]
+pub enum ParseError {
+ #[error("not a sed command, does not start with 's/'")]
+ NotSedCommand,
+ #[error("unknown flag")]
+ InvalidFlag,
+ #[error(transparent)]
+ InvalidRegex(#[from] regex::Error),
+}
+
+impl Command {
+ pub fn from_str(input: &str) -> Result<Command, ParseError> {
+ let mut chars = input.chars();
+
+ if chars.next().unwrap() == 's' && chars.next().unwrap() == '/' {
+ let left = Command::parse_segment(&mut chars)?;
+ let right = Command::parse_segment(&mut chars)?.bold();
+ let flags = Flags::from_chars(&mut chars)?;
+
+ let left = Regex::new(&format!("(?{}){}", flags.to_string(), left))
+ .map_err(|err| ParseError::InvalidRegex(err))?;
+
+ return Ok(Command { left, right, flags });
+ } else {
+ return Err(ParseError::NotSedCommand);
+ }
+ }
+
+ pub fn from_str_multiple(input: &str) -> Result<Commands, ParseError> {
+ let mut commands = Commands::new();
+
+ let mut chars = input.chars();
+
+ loop {
+ let s = chars.next();
+ let slash = chars.next();
+
+ if s.is_some() && slash.is_some() {
+ if s.unwrap() == 's' && slash.unwrap() == '/' {
+ let left = Command::parse_segment(&mut chars)?;
+ let right = Command::parse_segment(&mut chars)?.bold();
+ let flags = Flags::from_chars(&mut chars)?;
+
+ let left = Regex::new(&format!("(?{}){}", flags.to_string(), left))
+ .map_err(|err| ParseError::InvalidRegex(err))?;
+
+ commands.push(Command { left, right, flags });
+ } else {
+ return Err(ParseError::NotSedCommand);
+ }
+ } else {
+ break;
+ }
+ }
+
+ Ok(commands)
+ }
+
+ fn parse_segment(chars: &mut Chars) -> Result<String, ParseError> {
+ let mut last_char = '/';
+ let mut output = String::new();
+
+ while let Some(c) = chars.next() {
+ if c == '/' && last_char != '\\' {
+ break;
+ } else if c == '/' && last_char == '\\' {
+ output.pop().unwrap();
+ }
+
+ output.push(c);
+ last_char = c;
+ }
+
+ Ok(output)
+ }
+
+ pub fn execute(self, target: &str) -> Cow<str> {
+ let result: Cow<str>;
+
+ if self.flags.contains(Flags::GLOBAL) {
+ result = self.left.replace_all(target, self.right);
+ } else {
+ result = self.left.replace(target, self.right);
+ }
+
+ return result;
+ }
+
+ pub fn regex(&self) -> &Regex {
+ &self.left
+ }
+}
+
+bitflags! {
+ /// i case-insensitive: letters match both upper and lower case
+ /// m multi-line mode: ^ and $ match begin/end of line
+ /// s allow . to match \n
+ /// U swap the meaning of x* and x*?
+ /// x ignore whitespace and allow line comments (starting with `#`)
+ struct Flags: u32 {
+ const GLOBAL = 0b00000001;
+ const CASE_INSENSITIVE = 0b00000010;
+ const SINGLE_LINE = 0b00001000;
+ const UNGREEDY = 0b00010000;
+ const EXTENDED = 0b00100000;
+ }
+}
+
+impl Flags {
+ pub fn to_string(&self) -> String {
+ let mut result = String::new();
+
+ result.push('m');
+
+ if self.contains(Flags::CASE_INSENSITIVE) {
+ result.push('i');
+ }
+
+ if self.contains(Flags::SINGLE_LINE) {
+ result.push('s');
+ }
+
+ if self.contains(Flags::UNGREEDY) {
+ result.push('U');
+ }
+
+ if self.contains(Flags::EXTENDED) {
+ result.push('x');
+ }
+
+ return result;
+ }
+
+ pub fn from_chars(chars: &mut Chars) -> Result<Flags, ParseError> {
+ let mut flags: Flags = Flags::empty();
+
+ while let Some(c) = chars.next() {
+ match c {
+ 'g' => {
+ flags = flags | Flags::GLOBAL;
+ }
+ 'i' => {
+ flags = flags | Flags::CASE_INSENSITIVE;
+ }
+ 's' => {
+ flags = flags | Flags::SINGLE_LINE;
+ }
+ 'U' => {
+ flags = flags | Flags::UNGREEDY;
+ }
+ 'x' => {
+ flags = flags | Flags::EXTENDED;
+ }
+ ';' => return Ok(flags),
+ _ => return Err(ParseError::InvalidFlag),
+ };
+ }
+
+ Ok(flags)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const COMMAND_SIMPLE: &str = "s/replace/replacee/ig";
+ const COMMAND_MULTIPLE: &str = "s/replace/replacee/ig;s/two/tworeplace/i";
+
+ #[test]
+ fn test_parse_segment() -> Result<(), ParseError> {
+ let mut chars = "replace/replacee/ig".chars();
+
+ let left = "replace";
+ let right = Command::parse_segment(&mut chars)?;
+
+ assert_eq!(left, right);
+
+ let left = "replacee";
+ let right = Command::parse_segment(&mut chars)?;
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_flags_from_chars() -> Result<(), ParseError> {
+ let mut chars = "ig".chars();
+
+ let left = Flags::CASE_INSENSITIVE | Flags::GLOBAL;
+ let right = Flags::from_chars(&mut chars)?;
+
+ assert_eq!(left, right);
+
+ let mut chars = "igf".chars();
+ let right = Flags::from_chars(&mut chars);
+
+ assert_eq!(Err(ParseError::InvalidFlag), right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_flags_from_chars_with_terminator() -> Result<(), ParseError> {
+ let mut chars = "ig;bla".chars();
+
+ let left = Flags::CASE_INSENSITIVE | Flags::GLOBAL;
+ let right = Flags::from_chars(&mut chars)?;
+
+ assert_eq!(left, right);
+ assert_eq!("bla", chars.as_str());
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_simple() -> Result<(), ParseError> {
+ let left = Command::from_str(COMMAND_SIMPLE)?;
+ let right = Command {
+ left: Regex::new("(?mi)replace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::CASE_INSENSITIVE | Flags::GLOBAL,
+ };
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_simple_escaped_slash() -> Result<(), ParseError> {
+ let left = Command::from_str(r#"s/repl\/ace/replacee"#)?;
+ let right = Command {
+ left: Regex::new("(?m)repl/ace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::empty(),
+ };
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_simple_no_terminating_slash() -> Result<(), ParseError> {
+ let left = Command::from_str("s/replace/replacee")?;
+ let right = Command {
+ left: Regex::new("(?m)replace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::empty(),
+ };
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_complex_regex() -> Result<(), ParseError> {
+ let left =
+ Command::from_str(r#"s/http(?:s?):\/\/regex101\.com\/r\/([a-zA-Z0-9]{1,6})?$/$1/g"#)?;
+ let right = Command {
+ left: Regex::new(r#"(?m)http(?:s?)://regex101\.com/r/([a-zA-Z0-9]{1,6})?$"#).unwrap(),
+ right: "\x02$1\x02".to_string(),
+ flags: Flags::GLOBAL,
+ };
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_multiple_fail() -> Result<(), ParseError> {
+ let left = Command::from_str_multiple(COMMAND_SIMPLE)?;
+ let right = vec![Command {
+ left: Regex::new("(?mi)replace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::CASE_INSENSITIVE | Flags::GLOBAL,
+ }];
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_new_command_multiple() -> Result<(), ParseError> {
+ let left = Command::from_str_multiple(COMMAND_MULTIPLE)?;
+ let right = vec![
+ Command {
+ left: Regex::new("(?mi)replace").unwrap(),
+ right: "\x02replacee\x02".to_string(),
+ flags: Flags::CASE_INSENSITIVE | Flags::GLOBAL,
+ },
+ Command {
+ left: Regex::new("(?mi)two").unwrap(),
+ right: "\x02tworeplace\x02".to_string(),
+ flags: Flags::CASE_INSENSITIVE,
+ },
+ ];
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_run_regex() -> Result<(), ParseError> {
+ let cmd = Command::from_str(COMMAND_SIMPLE)?;
+
+ let left = "this is a sentence to \x02replacee\x02 text in";
+ let right = cmd.execute("this is a sentence to replace text in");
+
+ assert_eq!(left, right);
+
+ Ok(())
+ }
+}