diff options
| author | omagdy7 <omar.professional8777@gmail.com> | 2023-11-29 23:32:42 +0200 |
|---|---|---|
| committer | omagdy7 <omar.professional8777@gmail.com> | 2023-11-29 23:32:42 +0200 |
| commit | 8ef74fd2db43e7f25f65ff2ad8c86e5f5dec3f79 (patch) | |
| tree | 5bf2bffec1eb2923acffb5d389dea48b7bdcba93 /src/main.rs | |
| parent | 939271a529167c2653528cdd201e562d784e2864 (diff) | |
| download | rex-8ef74fd2db43e7f25f65ff2ad8c86e5f5dec3f79.tar.xz rex-8ef74fd2db43e7f25f65ff2ad8c86e5f5dec3f79.zip | |
Added a regex parser which takes a string and parses the regex into tokens
Diffstat (limited to 'src/main.rs')
| -rwxr-xr-x | src/main.rs | 86 |
1 files changed, 54 insertions, 32 deletions
diff --git a/src/main.rs b/src/main.rs index 01a4105..2d0f9c9 100755 --- a/src/main.rs +++ b/src/main.rs @@ -1,19 +1,5 @@ #![allow(dead_code, unused_variables, unused_mut)] -type ReToken = Box<RegexToken>; - -#[derive(Debug, PartialEq, Clone)] -enum RegexToken { - Token(ReToken), - Symbol(char), - Number(usize), - Concat((ReToken, ReToken)), - Union((ReToken, ReToken)), - Star(ReToken), - Dot, - None, -} - macro_rules! Sym { ($c:expr) => { RegexToken::Symbol($c) @@ -26,6 +12,12 @@ macro_rules! Star { }; } +macro_rules! Plus { + ($c:expr) => { + RegexToken::Plus(Box::new($c)) + }; +} + macro_rules! Concat { ($a:expr, $b:expr) => { RegexToken::Concat((Box::new($a), Box::new($b))) @@ -51,10 +43,18 @@ mod tests { } #[test] + fn test_plus() { + assert_eq!( + Regex::new(String::from("(a|b)+c")), + Concat!(Plus!(Union!(Sym!('a'), Sym!('b'))), Sym!('c')) + ) + } + + #[test] fn test_union() { assert_eq!( Regex::new(String::from("(a|b)")), - Concat!(Union!(Sym!('a'), Sym!('b')), RegexToken::None) + Union!(Sym!('a'), Sym!('b')) ) } @@ -72,6 +72,21 @@ mod tests { } } +type ReToken = Box<RegexToken>; + +#[derive(Debug, PartialEq, Clone)] +enum RegexToken { + Token(ReToken), + Symbol(char), + Number(usize), + Concat((ReToken, ReToken)), + Union((ReToken, ReToken)), + Plus(ReToken), + Star(ReToken), + Dot, + None, +} + #[derive(Debug, PartialEq)] struct Regex {} @@ -86,7 +101,7 @@ impl Regex { } let mut chars = input.chars().peekable(); - let mut parsed_token = Self::parse_token(&mut RegexToken::None, &mut chars); + let mut parsed_token = Self::parse_token(&mut chars); Self::parse_expression(&mut parsed_token, &mut chars) } @@ -99,27 +114,38 @@ impl Regex { match next { '|' => { chars.next(); // Consume '|' - let right = Self::parse_token(left, chars); + let right = Self::parse_token(chars); *left = RegexToken::Union((Box::new(left.clone()), Box::new(right))); } '*' => { - chars.next(); // Consume '|' - let right = Self::parse_token(left, chars); - *left = RegexToken::Star(Box::new(left.clone())); + chars.next(); // Consume '*' + let right = Self::parse_token(chars); + *left = RegexToken::Concat(( + Box::new(RegexToken::Star(Box::new(left.clone()))), + Box::new(right), + )); + } + '+' => { + chars.next(); // Consume '+' + let right = Self::parse_token(chars); + *left = RegexToken::Concat(( + Box::new(RegexToken::Plus(Box::new(left.clone()))), + Box::new(right), + )); } _ => { - let right = Self::parse_token(left, chars); - *left = RegexToken::Concat((Box::new(left.clone()), Box::new(right))); + let right = Self::parse_token(chars); + if let RegexToken::None = right { + } else { + *left = RegexToken::Concat((Box::new(left.clone()), Box::new(right))); + } } } } left.clone() } - fn parse_token( - left: &mut RegexToken, - chars: &mut std::iter::Peekable<std::str::Chars>, - ) -> RegexToken { + fn parse_token(chars: &mut std::iter::Peekable<std::str::Chars>) -> RegexToken { match chars.next() { Some('(') => { let token = Self::parse(chars.collect()); @@ -128,17 +154,13 @@ impl Regex { } Some('.') => RegexToken::Dot, Some(c) if c.is_ascii_alphanumeric() => Sym!(c), - Some('*') => { - let token = Self::parse_token(left, chars); - Star!(left.clone()) - } - _ => RegexToken::None, // Handle other cases accordingly + _ => RegexToken::None, } } } fn main() { - let input = "a*b"; + let input = "((aa)|(bb))"; let token = Regex::new(String::from(input)); println!("{input}\n{:#?}", token) } |
