aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoromagdy7 <omar.professional8777@gmail.com>2023-11-30 01:32:41 +0200
committeromagdy7 <omar.professional8777@gmail.com>2023-11-30 01:32:41 +0200
commit1e3ce70c5c0d757cd2abaef809a211a3430cdd26 (patch)
treecb4f671c144bb0e12e2d66dc55a397df6ee86ec8
parent8ef74fd2db43e7f25f65ff2ad8c86e5f5dec3f79 (diff)
downloadrex-1e3ce70c5c0d757cd2abaef809a211a3430cdd26.tar.xz
rex-1e3ce70c5c0d757cd2abaef809a211a3430cdd26.zip
Restructures the project into separate files and added basic NFA functionality
-rwxr-xr-xsrc/main.rs177
-rw-r--r--src/nfa.rs136
-rw-r--r--src/regex.rs158
3 files changed, 309 insertions, 162 deletions
diff --git a/src/main.rs b/src/main.rs
index 2d0f9c9..bfe2050 100755
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,166 +1,19 @@
-#![allow(dead_code, unused_variables, unused_mut)]
-
-macro_rules! Sym {
- ($c:expr) => {
- RegexToken::Symbol($c)
- };
-}
-
-macro_rules! Star {
- ($c:expr) => {
- RegexToken::Star(Box::new($c))
- };
-}
-
-macro_rules! Plus {
- ($c:expr) => {
- RegexToken::Plus(Box::new($c))
- };
-}
-
-macro_rules! Concat {
- ($a:expr, $b:expr) => {
- RegexToken::Concat((Box::new($a), Box::new($b)))
- };
-}
-
-macro_rules! Union {
- ($a:expr, $b:expr) => {
- RegexToken::Union((Box::new($a), Box::new($b)))
- };
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn test_concat() {
- assert_eq!(
- Regex::new(String::from("ab")),
- Concat!(Sym!('a'), Sym!('b'))
- )
- }
-
- #[test]
- fn test_plus() {
- assert_eq!(
- Regex::new(String::from("(a|b)+c")),
- Concat!(Plus!(Union!(Sym!('a'), Sym!('b'))), Sym!('c'))
- )
- }
-
- #[test]
- fn test_union() {
- assert_eq!(
- Regex::new(String::from("(a|b)")),
- Union!(Sym!('a'), Sym!('b'))
- )
- }
-
- #[test]
- fn test_none() {
- assert_eq!(Regex::new(String::from("")), RegexToken::None)
- }
-
- #[test]
- fn test_star() {
- assert_eq!(
- Regex::new(String::from("a*b")),
- Concat!(Star!(Sym!('a')), Sym!('b'))
- )
- }
-}
-
-type ReToken = Box<RegexToken>;
-
-#[derive(Debug, PartialEq, Clone)]
-enum RegexToken {
- Token(ReToken),
- Symbol(char),
- Number(usize),
- Concat((ReToken, ReToken)),
- Union((ReToken, ReToken)),
- Plus(ReToken),
- Star(ReToken),
- Dot,
- None,
-}
-
-#[derive(Debug, PartialEq)]
-struct Regex {}
-
-impl Regex {
- fn new(input: String) -> RegexToken {
- Regex::parse(input)
- }
-
- fn parse(input: String) -> RegexToken {
- if input.is_empty() {
- return RegexToken::None;
- }
-
- let mut chars = input.chars().peekable();
- let mut parsed_token = Self::parse_token(&mut chars);
-
- Self::parse_expression(&mut parsed_token, &mut chars)
- }
-
- fn parse_expression(
- left: &mut RegexToken,
- chars: &mut std::iter::Peekable<std::str::Chars>,
- ) -> RegexToken {
- while let Some(&next) = chars.peek() {
- match next {
- '|' => {
- chars.next(); // Consume '|'
- let right = Self::parse_token(chars);
- *left = RegexToken::Union((Box::new(left.clone()), Box::new(right)));
- }
- '*' => {
- chars.next(); // Consume '*'
- let right = Self::parse_token(chars);
- *left = RegexToken::Concat((
- Box::new(RegexToken::Star(Box::new(left.clone()))),
- Box::new(right),
- ));
- }
- '+' => {
- chars.next(); // Consume '+'
- let right = Self::parse_token(chars);
- *left = RegexToken::Concat((
- Box::new(RegexToken::Plus(Box::new(left.clone()))),
- Box::new(right),
- ));
- }
- _ => {
- let right = Self::parse_token(chars);
- if let RegexToken::None = right {
- } else {
- *left = RegexToken::Concat((Box::new(left.clone()), Box::new(right)));
- }
- }
- }
- }
- left.clone()
- }
-
- fn parse_token(chars: &mut std::iter::Peekable<std::str::Chars>) -> RegexToken {
- match chars.next() {
- Some('(') => {
- let token = Self::parse(chars.collect());
- chars.next(); // Skip ')'
- token
- }
- Some('.') => RegexToken::Dot,
- Some(c) if c.is_ascii_alphanumeric() => Sym!(c),
- _ => RegexToken::None,
- }
- }
-}
+mod nfa;
+mod regex;
+use nfa::*;
+use regex::*;
fn main() {
- let input = "((aa)|(bb))";
+ let input = "abcdefglmno";
let token = Regex::new(String::from(input));
- println!("{input}\n{:#?}", token)
+ println!("{input}\n{:#?}", token);
+
+ let mut nfa = NFA::new();
+ nfa.regex_to_nfa(token);
+ nfa.add_state();
+
+ println!("NFA: {:#?}", nfa);
+ let inp = "abcdefglmno";
+ let output = nfa.simulate(String::from(inp));
+ println!("{inp} was = {output}")
}
diff --git a/src/nfa.rs b/src/nfa.rs
new file mode 100644
index 0000000..6c134fe
--- /dev/null
+++ b/src/nfa.rs
@@ -0,0 +1,136 @@
+use std::collections::HashMap;
+
+use crate::regex::RegexToken;
+
+#[derive(Debug, Clone, PartialEq)]
+struct State {
+ id: usize,
+ transitions: Vec<(usize, char)>,
+}
+
+impl State {
+ fn new(id: usize) -> Self {
+ Self {
+ id,
+ transitions: Vec::new(),
+ }
+ }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct NFA {
+ states: HashMap<usize, State>,
+ transitions: HashMap<usize, Vec<usize>>,
+ initial_state: usize,
+ accepting_states: Vec<usize>,
+}
+
+impl NFA {
+ pub fn new() -> Self {
+ Self {
+ states: HashMap::new(),
+ transitions: HashMap::new(),
+ initial_state: 0,
+ accepting_states: Vec::new(),
+ }
+ }
+
+ pub fn add_state(&mut self) -> usize {
+ let new_state = State::new(self.states.len());
+ let id = new_state.id;
+ self.states.insert(id, new_state);
+ id
+ }
+
+ pub fn regex_to_nfa(&mut self, input: RegexToken) {
+ match input {
+ RegexToken::Symbol(ch) => {
+ let state_1 = self.add_state();
+ self.add_transition(state_1, state_1 + 1, ch);
+ }
+ RegexToken::Concat((left, right)) => {
+ self.regex_to_nfa(*left);
+ self.regex_to_nfa(*right);
+ }
+ RegexToken::Union(_) => todo!(),
+ RegexToken::Plus(_) => todo!(),
+ RegexToken::Star(_) => todo!(),
+ RegexToken::Dot => {
+ self.add_state();
+ }
+ RegexToken::None => {
+ self.add_state();
+ }
+ }
+ }
+
+ pub fn add_transition(&mut self, from: usize, to: usize, trans: char) {
+ self.transitions
+ .entry(from)
+ .and_modify(|val| val.push(to))
+ .or_insert_with(|| vec![to]);
+ self.states
+ .get_mut(&from)
+ .unwrap()
+ .transitions
+ .push((to, trans));
+ }
+
+ fn deduce_accepting_states(&mut self) {
+ for (id, state) in &self.states {
+ if state.transitions.is_empty() {
+ self.accepting_states.push(*id)
+ }
+ }
+ }
+
+ pub fn simulate(&mut self, input: String) -> bool {
+ let mut current = self.states.get(&self.initial_state).unwrap();
+ let mut chars = input.chars();
+ 'simu: while let Some(nxt) = chars.next() {
+ println!("current: {current:?}");
+ for (id, ch) in &current.transitions {
+ println!("nxt: {nxt} ch: {ch}");
+ if *ch == nxt {
+ current = self.states.get(id).unwrap();
+ } else {
+ break 'simu;
+ }
+ }
+ }
+ println!("current node: {current:?}");
+ let current_id = current.id;
+ self.deduce_accepting_states();
+ self.accepting_states.iter().any(|&id| id == current_id)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::regex::Regex;
+
+ #[test]
+ fn test_simple_succ() {
+ let input = "abc";
+ let token = Regex::new(String::from(input));
+ let mut nfa = NFA::new();
+ nfa.regex_to_nfa(token);
+ nfa.add_state();
+ let inp = "abc";
+ let output = nfa.simulate(String::from(inp));
+ assert!(output)
+ }
+
+ #[test]
+ fn test_simple_fail() {
+ let input = "abc";
+ let token = Regex::new(String::from(input));
+ let mut nfa = NFA::new();
+ nfa.regex_to_nfa(token);
+ nfa.add_state();
+ let inp = "abd";
+ let output = nfa.simulate(String::from(inp));
+ assert!(!output)
+ }
+}
diff --git a/src/regex.rs b/src/regex.rs
new file mode 100644
index 0000000..66214c0
--- /dev/null
+++ b/src/regex.rs
@@ -0,0 +1,158 @@
+#![allow(dead_code, unused_variables, unused_mut)]
+
+macro_rules! Sym {
+ ($c:expr) => {
+ RegexToken::Symbol($c)
+ };
+}
+
+macro_rules! Star {
+ ($c:expr) => {
+ RegexToken::Star(Box::new($c))
+ };
+}
+
+macro_rules! Plus {
+ ($c:expr) => {
+ RegexToken::Plus(Box::new($c))
+ };
+}
+
+macro_rules! Concat {
+ ($a:expr, $b:expr) => {
+ RegexToken::Concat((Box::new($a), Box::new($b)))
+ };
+}
+
+macro_rules! Union {
+ ($a:expr, $b:expr) => {
+ RegexToken::Union((Box::new($a), Box::new($b)))
+ };
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_concat() {
+ assert_eq!(
+ Regex::new(String::from("ab")),
+ Concat!(Sym!('a'), Sym!('b'))
+ )
+ }
+
+ #[test]
+ fn test_plus() {
+ assert_eq!(
+ Regex::new(String::from("(a|b)+c")),
+ Concat!(Plus!(Union!(Sym!('a'), Sym!('b'))), Sym!('c'))
+ )
+ }
+
+ #[test]
+ fn test_union() {
+ assert_eq!(
+ Regex::new(String::from("(a|b)")),
+ Union!(Sym!('a'), Sym!('b'))
+ )
+ }
+
+ #[test]
+ fn test_none() {
+ assert_eq!(Regex::new(String::from("")), RegexToken::None)
+ }
+
+ #[test]
+ fn test_star() {
+ assert_eq!(
+ Regex::new(String::from("a*b")),
+ Concat!(Star!(Sym!('a')), Sym!('b'))
+ )
+ }
+}
+
+type ReToken = Box<RegexToken>;
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum RegexToken {
+ Symbol(char),
+ Concat((ReToken, ReToken)),
+ Union((ReToken, ReToken)),
+ Plus(ReToken),
+ Star(ReToken),
+ Dot,
+ None,
+}
+
+#[derive(Debug, PartialEq)]
+pub struct Regex {}
+
+impl Regex {
+ pub fn new(input: String) -> RegexToken {
+ Regex::parse(input)
+ }
+
+ fn parse(input: String) -> RegexToken {
+ if input.is_empty() {
+ return RegexToken::None;
+ }
+
+ let mut chars = input.chars().peekable();
+ let mut parsed_token = Self::parse_token(&mut chars);
+
+ Self::parse_expression(&mut parsed_token, &mut chars)
+ }
+
+ fn parse_expression(
+ left: &mut RegexToken,
+ chars: &mut std::iter::Peekable<std::str::Chars>,
+ ) -> RegexToken {
+ while let Some(&next) = chars.peek() {
+ match next {
+ '|' => {
+ chars.next(); // Consume '|'
+ let right = Self::parse_token(chars);
+ *left = RegexToken::Union((Box::new(left.clone()), Box::new(right)));
+ }
+ '*' => {
+ chars.next(); // Consume '*'
+ let right = Self::parse_token(chars);
+ *left = RegexToken::Concat((
+ Box::new(RegexToken::Star(Box::new(left.clone()))),
+ Box::new(right),
+ ));
+ }
+ '+' => {
+ chars.next(); // Consume '+'
+ let right = Self::parse_token(chars);
+ *left = RegexToken::Concat((
+ Box::new(RegexToken::Plus(Box::new(left.clone()))),
+ Box::new(right),
+ ));
+ }
+ _ => {
+ let right = Self::parse_token(chars);
+ if let RegexToken::None = right {
+ } else {
+ *left = RegexToken::Concat((Box::new(left.clone()), Box::new(right)));
+ }
+ }
+ }
+ }
+ left.clone()
+ }
+
+ fn parse_token(chars: &mut std::iter::Peekable<std::str::Chars>) -> RegexToken {
+ match chars.next() {
+ Some('(') => {
+ let token = Self::parse(chars.collect());
+ chars.next(); // Skip ')'
+ token
+ }
+ Some('.') => RegexToken::Dot,
+ Some(c) if c.is_ascii_alphanumeric() => Sym!(c),
+ _ => RegexToken::None,
+ }
+ }
+}