aboutsummaryrefslogtreecommitdiff
path: root/src/regex.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex.rs')
-rw-r--r--src/regex.rs158
1 files changed, 158 insertions, 0 deletions
diff --git a/src/regex.rs b/src/regex.rs
new file mode 100644
index 0000000..66214c0
--- /dev/null
+++ b/src/regex.rs
@@ -0,0 +1,158 @@
+#![allow(dead_code, unused_variables, unused_mut)]
+
+macro_rules! Sym {
+ ($c:expr) => {
+ RegexToken::Symbol($c)
+ };
+}
+
+macro_rules! Star {
+ ($c:expr) => {
+ RegexToken::Star(Box::new($c))
+ };
+}
+
+macro_rules! Plus {
+ ($c:expr) => {
+ RegexToken::Plus(Box::new($c))
+ };
+}
+
+macro_rules! Concat {
+ ($a:expr, $b:expr) => {
+ RegexToken::Concat((Box::new($a), Box::new($b)))
+ };
+}
+
+macro_rules! Union {
+ ($a:expr, $b:expr) => {
+ RegexToken::Union((Box::new($a), Box::new($b)))
+ };
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_concat() {
+ assert_eq!(
+ Regex::new(String::from("ab")),
+ Concat!(Sym!('a'), Sym!('b'))
+ )
+ }
+
+ #[test]
+ fn test_plus() {
+ assert_eq!(
+ Regex::new(String::from("(a|b)+c")),
+ Concat!(Plus!(Union!(Sym!('a'), Sym!('b'))), Sym!('c'))
+ )
+ }
+
+ #[test]
+ fn test_union() {
+ assert_eq!(
+ Regex::new(String::from("(a|b)")),
+ Union!(Sym!('a'), Sym!('b'))
+ )
+ }
+
+ #[test]
+ fn test_none() {
+ assert_eq!(Regex::new(String::from("")), RegexToken::None)
+ }
+
+ #[test]
+ fn test_star() {
+ assert_eq!(
+ Regex::new(String::from("a*b")),
+ Concat!(Star!(Sym!('a')), Sym!('b'))
+ )
+ }
+}
+
+type ReToken = Box<RegexToken>;
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum RegexToken {
+ Symbol(char),
+ Concat((ReToken, ReToken)),
+ Union((ReToken, ReToken)),
+ Plus(ReToken),
+ Star(ReToken),
+ Dot,
+ None,
+}
+
+#[derive(Debug, PartialEq)]
+pub struct Regex {}
+
+impl Regex {
+ pub fn new(input: String) -> RegexToken {
+ Regex::parse(input)
+ }
+
+ fn parse(input: String) -> RegexToken {
+ if input.is_empty() {
+ return RegexToken::None;
+ }
+
+ let mut chars = input.chars().peekable();
+ let mut parsed_token = Self::parse_token(&mut chars);
+
+ Self::parse_expression(&mut parsed_token, &mut chars)
+ }
+
+ fn parse_expression(
+ left: &mut RegexToken,
+ chars: &mut std::iter::Peekable<std::str::Chars>,
+ ) -> RegexToken {
+ while let Some(&next) = chars.peek() {
+ match next {
+ '|' => {
+ chars.next(); // Consume '|'
+ let right = Self::parse_token(chars);
+ *left = RegexToken::Union((Box::new(left.clone()), Box::new(right)));
+ }
+ '*' => {
+ chars.next(); // Consume '*'
+ let right = Self::parse_token(chars);
+ *left = RegexToken::Concat((
+ Box::new(RegexToken::Star(Box::new(left.clone()))),
+ Box::new(right),
+ ));
+ }
+ '+' => {
+ chars.next(); // Consume '+'
+ let right = Self::parse_token(chars);
+ *left = RegexToken::Concat((
+ Box::new(RegexToken::Plus(Box::new(left.clone()))),
+ Box::new(right),
+ ));
+ }
+ _ => {
+ let right = Self::parse_token(chars);
+ if let RegexToken::None = right {
+ } else {
+ *left = RegexToken::Concat((Box::new(left.clone()), Box::new(right)));
+ }
+ }
+ }
+ }
+ left.clone()
+ }
+
+ fn parse_token(chars: &mut std::iter::Peekable<std::str::Chars>) -> RegexToken {
+ match chars.next() {
+ Some('(') => {
+ let token = Self::parse(chars.collect());
+ chars.next(); // Skip ')'
+ token
+ }
+ Some('.') => RegexToken::Dot,
+ Some(c) if c.is_ascii_alphanumeric() => Sym!(c),
+ _ => RegexToken::None,
+ }
+ }
+}