aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoromagdy7 <omar.professional8777@gmail.com>2023-11-29 23:32:42 +0200
committeromagdy7 <omar.professional8777@gmail.com>2023-11-29 23:32:42 +0200
commit8ef74fd2db43e7f25f65ff2ad8c86e5f5dec3f79 (patch)
tree5bf2bffec1eb2923acffb5d389dea48b7bdcba93 /src
parent939271a529167c2653528cdd201e562d784e2864 (diff)
downloadrex-8ef74fd2db43e7f25f65ff2ad8c86e5f5dec3f79.tar.xz
rex-8ef74fd2db43e7f25f65ff2ad8c86e5f5dec3f79.zip
Added a regex parser which takes a string and parses the regex into tokens
Diffstat (limited to 'src')
-rwxr-xr-xsrc/main.rs86
1 files changed, 54 insertions, 32 deletions
diff --git a/src/main.rs b/src/main.rs
index 01a4105..2d0f9c9 100755
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,19 +1,5 @@
#![allow(dead_code, unused_variables, unused_mut)]
-type ReToken = Box<RegexToken>;
-
-#[derive(Debug, PartialEq, Clone)]
-enum RegexToken {
- Token(ReToken),
- Symbol(char),
- Number(usize),
- Concat((ReToken, ReToken)),
- Union((ReToken, ReToken)),
- Star(ReToken),
- Dot,
- None,
-}
-
macro_rules! Sym {
($c:expr) => {
RegexToken::Symbol($c)
@@ -26,6 +12,12 @@ macro_rules! Star {
};
}
+macro_rules! Plus {
+ ($c:expr) => {
+ RegexToken::Plus(Box::new($c))
+ };
+}
+
macro_rules! Concat {
($a:expr, $b:expr) => {
RegexToken::Concat((Box::new($a), Box::new($b)))
@@ -51,10 +43,18 @@ mod tests {
}
#[test]
+ fn test_plus() {
+ assert_eq!(
+ Regex::new(String::from("(a|b)+c")),
+ Concat!(Plus!(Union!(Sym!('a'), Sym!('b'))), Sym!('c'))
+ )
+ }
+
+ #[test]
fn test_union() {
assert_eq!(
Regex::new(String::from("(a|b)")),
- Concat!(Union!(Sym!('a'), Sym!('b')), RegexToken::None)
+ Union!(Sym!('a'), Sym!('b'))
)
}
@@ -72,6 +72,21 @@ mod tests {
}
}
+type ReToken = Box<RegexToken>;
+
+#[derive(Debug, PartialEq, Clone)]
+enum RegexToken {
+ Token(ReToken),
+ Symbol(char),
+ Number(usize),
+ Concat((ReToken, ReToken)),
+ Union((ReToken, ReToken)),
+ Plus(ReToken),
+ Star(ReToken),
+ Dot,
+ None,
+}
+
#[derive(Debug, PartialEq)]
struct Regex {}
@@ -86,7 +101,7 @@ impl Regex {
}
let mut chars = input.chars().peekable();
- let mut parsed_token = Self::parse_token(&mut RegexToken::None, &mut chars);
+ let mut parsed_token = Self::parse_token(&mut chars);
Self::parse_expression(&mut parsed_token, &mut chars)
}
@@ -99,27 +114,38 @@ impl Regex {
match next {
'|' => {
chars.next(); // Consume '|'
- let right = Self::parse_token(left, chars);
+ let right = Self::parse_token(chars);
*left = RegexToken::Union((Box::new(left.clone()), Box::new(right)));
}
'*' => {
- chars.next(); // Consume '|'
- let right = Self::parse_token(left, chars);
- *left = RegexToken::Star(Box::new(left.clone()));
+ chars.next(); // Consume '*'
+ let right = Self::parse_token(chars);
+ *left = RegexToken::Concat((
+ Box::new(RegexToken::Star(Box::new(left.clone()))),
+ Box::new(right),
+ ));
+ }
+ '+' => {
+ chars.next(); // Consume '+'
+ let right = Self::parse_token(chars);
+ *left = RegexToken::Concat((
+ Box::new(RegexToken::Plus(Box::new(left.clone()))),
+ Box::new(right),
+ ));
}
_ => {
- let right = Self::parse_token(left, chars);
- *left = RegexToken::Concat((Box::new(left.clone()), Box::new(right)));
+ let right = Self::parse_token(chars);
+ if let RegexToken::None = right {
+ } else {
+ *left = RegexToken::Concat((Box::new(left.clone()), Box::new(right)));
+ }
}
}
}
left.clone()
}
- fn parse_token(
- left: &mut RegexToken,
- chars: &mut std::iter::Peekable<std::str::Chars>,
- ) -> RegexToken {
+ fn parse_token(chars: &mut std::iter::Peekable<std::str::Chars>) -> RegexToken {
match chars.next() {
Some('(') => {
let token = Self::parse(chars.collect());
@@ -128,17 +154,13 @@ impl Regex {
}
Some('.') => RegexToken::Dot,
Some(c) if c.is_ascii_alphanumeric() => Sym!(c),
- Some('*') => {
- let token = Self::parse_token(left, chars);
- Star!(left.clone())
- }
- _ => RegexToken::None, // Handle other cases accordingly
+ _ => RegexToken::None,
}
}
}
fn main() {
- let input = "a*b";
+ let input = "((aa)|(bb))";
let token = Regex::new(String::from(input));
println!("{input}\n{:#?}", token)
}