From 1e3ce70c5c0d757cd2abaef809a211a3430cdd26 Mon Sep 17 00:00:00 2001 From: omagdy7 Date: Thu, 30 Nov 2023 01:32:41 +0200 Subject: Restructures the project into separate files and added basic NFA functionality --- src/main.rs | 177 +++++------------------------------------------------------ src/nfa.rs | 136 +++++++++++++++++++++++++++++++++++++++++++++ src/regex.rs | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 309 insertions(+), 162 deletions(-) create mode 100644 src/nfa.rs create mode 100644 src/regex.rs diff --git a/src/main.rs b/src/main.rs index 2d0f9c9..bfe2050 100755 --- a/src/main.rs +++ b/src/main.rs @@ -1,166 +1,19 @@ -#![allow(dead_code, unused_variables, unused_mut)] - -macro_rules! Sym { - ($c:expr) => { - RegexToken::Symbol($c) - }; -} - -macro_rules! Star { - ($c:expr) => { - RegexToken::Star(Box::new($c)) - }; -} - -macro_rules! Plus { - ($c:expr) => { - RegexToken::Plus(Box::new($c)) - }; -} - -macro_rules! Concat { - ($a:expr, $b:expr) => { - RegexToken::Concat((Box::new($a), Box::new($b))) - }; -} - -macro_rules! Union { - ($a:expr, $b:expr) => { - RegexToken::Union((Box::new($a), Box::new($b))) - }; -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_concat() { - assert_eq!( - Regex::new(String::from("ab")), - Concat!(Sym!('a'), Sym!('b')) - ) - } - - #[test] - fn test_plus() { - assert_eq!( - Regex::new(String::from("(a|b)+c")), - Concat!(Plus!(Union!(Sym!('a'), Sym!('b'))), Sym!('c')) - ) - } - - #[test] - fn test_union() { - assert_eq!( - Regex::new(String::from("(a|b)")), - Union!(Sym!('a'), Sym!('b')) - ) - } - - #[test] - fn test_none() { - assert_eq!(Regex::new(String::from("")), RegexToken::None) - } - - #[test] - fn test_star() { - assert_eq!( - Regex::new(String::from("a*b")), - Concat!(Star!(Sym!('a')), Sym!('b')) - ) - } -} - -type ReToken = Box; - -#[derive(Debug, PartialEq, Clone)] -enum RegexToken { - Token(ReToken), - Symbol(char), - Number(usize), - Concat((ReToken, ReToken)), - Union((ReToken, ReToken)), - Plus(ReToken), - Star(ReToken), - Dot, - None, -} - -#[derive(Debug, PartialEq)] -struct Regex {} - -impl Regex { - fn new(input: String) -> RegexToken { - Regex::parse(input) - } - - fn parse(input: String) -> RegexToken { - if input.is_empty() { - return RegexToken::None; - } - - let mut chars = input.chars().peekable(); - let mut parsed_token = Self::parse_token(&mut chars); - - Self::parse_expression(&mut parsed_token, &mut chars) - } - - fn parse_expression( - left: &mut RegexToken, - chars: &mut std::iter::Peekable, - ) -> RegexToken { - while let Some(&next) = chars.peek() { - match next { - '|' => { - chars.next(); // Consume '|' - let right = Self::parse_token(chars); - *left = RegexToken::Union((Box::new(left.clone()), Box::new(right))); - } - '*' => { - chars.next(); // Consume '*' - let right = Self::parse_token(chars); - *left = RegexToken::Concat(( - Box::new(RegexToken::Star(Box::new(left.clone()))), - Box::new(right), - )); - } - '+' => { - chars.next(); // Consume '+' - let right = Self::parse_token(chars); - *left = RegexToken::Concat(( - Box::new(RegexToken::Plus(Box::new(left.clone()))), - Box::new(right), - )); - } - _ => { - let right = Self::parse_token(chars); - if let RegexToken::None = right { - } else { - *left = RegexToken::Concat((Box::new(left.clone()), Box::new(right))); - } - } - } - } - left.clone() - } - - fn parse_token(chars: &mut std::iter::Peekable) -> RegexToken { - match chars.next() { - Some('(') => { - let token = Self::parse(chars.collect()); - chars.next(); // Skip ')' - token - } - Some('.') => RegexToken::Dot, - Some(c) if c.is_ascii_alphanumeric() => Sym!(c), - _ => RegexToken::None, - } - } -} +mod nfa; +mod regex; +use nfa::*; +use regex::*; fn main() { - let input = "((aa)|(bb))"; + let input = "abcdefglmno"; let token = Regex::new(String::from(input)); - println!("{input}\n{:#?}", token) + println!("{input}\n{:#?}", token); + + let mut nfa = NFA::new(); + nfa.regex_to_nfa(token); + nfa.add_state(); + + println!("NFA: {:#?}", nfa); + let inp = "abcdefglmno"; + let output = nfa.simulate(String::from(inp)); + println!("{inp} was = {output}") } diff --git a/src/nfa.rs b/src/nfa.rs new file mode 100644 index 0000000..6c134fe --- /dev/null +++ b/src/nfa.rs @@ -0,0 +1,136 @@ +use std::collections::HashMap; + +use crate::regex::RegexToken; + +#[derive(Debug, Clone, PartialEq)] +struct State { + id: usize, + transitions: Vec<(usize, char)>, +} + +impl State { + fn new(id: usize) -> Self { + Self { + id, + transitions: Vec::new(), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct NFA { + states: HashMap, + transitions: HashMap>, + initial_state: usize, + accepting_states: Vec, +} + +impl NFA { + pub fn new() -> Self { + Self { + states: HashMap::new(), + transitions: HashMap::new(), + initial_state: 0, + accepting_states: Vec::new(), + } + } + + pub fn add_state(&mut self) -> usize { + let new_state = State::new(self.states.len()); + let id = new_state.id; + self.states.insert(id, new_state); + id + } + + pub fn regex_to_nfa(&mut self, input: RegexToken) { + match input { + RegexToken::Symbol(ch) => { + let state_1 = self.add_state(); + self.add_transition(state_1, state_1 + 1, ch); + } + RegexToken::Concat((left, right)) => { + self.regex_to_nfa(*left); + self.regex_to_nfa(*right); + } + RegexToken::Union(_) => todo!(), + RegexToken::Plus(_) => todo!(), + RegexToken::Star(_) => todo!(), + RegexToken::Dot => { + self.add_state(); + } + RegexToken::None => { + self.add_state(); + } + } + } + + pub fn add_transition(&mut self, from: usize, to: usize, trans: char) { + self.transitions + .entry(from) + .and_modify(|val| val.push(to)) + .or_insert_with(|| vec![to]); + self.states + .get_mut(&from) + .unwrap() + .transitions + .push((to, trans)); + } + + fn deduce_accepting_states(&mut self) { + for (id, state) in &self.states { + if state.transitions.is_empty() { + self.accepting_states.push(*id) + } + } + } + + pub fn simulate(&mut self, input: String) -> bool { + let mut current = self.states.get(&self.initial_state).unwrap(); + let mut chars = input.chars(); + 'simu: while let Some(nxt) = chars.next() { + println!("current: {current:?}"); + for (id, ch) in ¤t.transitions { + println!("nxt: {nxt} ch: {ch}"); + if *ch == nxt { + current = self.states.get(id).unwrap(); + } else { + break 'simu; + } + } + } + println!("current node: {current:?}"); + let current_id = current.id; + self.deduce_accepting_states(); + self.accepting_states.iter().any(|&id| id == current_id) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::regex::Regex; + + #[test] + fn test_simple_succ() { + let input = "abc"; + let token = Regex::new(String::from(input)); + let mut nfa = NFA::new(); + nfa.regex_to_nfa(token); + nfa.add_state(); + let inp = "abc"; + let output = nfa.simulate(String::from(inp)); + assert!(output) + } + + #[test] + fn test_simple_fail() { + let input = "abc"; + let token = Regex::new(String::from(input)); + let mut nfa = NFA::new(); + nfa.regex_to_nfa(token); + nfa.add_state(); + let inp = "abd"; + let output = nfa.simulate(String::from(inp)); + assert!(!output) + } +} diff --git a/src/regex.rs b/src/regex.rs new file mode 100644 index 0000000..66214c0 --- /dev/null +++ b/src/regex.rs @@ -0,0 +1,158 @@ +#![allow(dead_code, unused_variables, unused_mut)] + +macro_rules! Sym { + ($c:expr) => { + RegexToken::Symbol($c) + }; +} + +macro_rules! Star { + ($c:expr) => { + RegexToken::Star(Box::new($c)) + }; +} + +macro_rules! Plus { + ($c:expr) => { + RegexToken::Plus(Box::new($c)) + }; +} + +macro_rules! Concat { + ($a:expr, $b:expr) => { + RegexToken::Concat((Box::new($a), Box::new($b))) + }; +} + +macro_rules! Union { + ($a:expr, $b:expr) => { + RegexToken::Union((Box::new($a), Box::new($b))) + }; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_concat() { + assert_eq!( + Regex::new(String::from("ab")), + Concat!(Sym!('a'), Sym!('b')) + ) + } + + #[test] + fn test_plus() { + assert_eq!( + Regex::new(String::from("(a|b)+c")), + Concat!(Plus!(Union!(Sym!('a'), Sym!('b'))), Sym!('c')) + ) + } + + #[test] + fn test_union() { + assert_eq!( + Regex::new(String::from("(a|b)")), + Union!(Sym!('a'), Sym!('b')) + ) + } + + #[test] + fn test_none() { + assert_eq!(Regex::new(String::from("")), RegexToken::None) + } + + #[test] + fn test_star() { + assert_eq!( + Regex::new(String::from("a*b")), + Concat!(Star!(Sym!('a')), Sym!('b')) + ) + } +} + +type ReToken = Box; + +#[derive(Debug, PartialEq, Clone)] +pub enum RegexToken { + Symbol(char), + Concat((ReToken, ReToken)), + Union((ReToken, ReToken)), + Plus(ReToken), + Star(ReToken), + Dot, + None, +} + +#[derive(Debug, PartialEq)] +pub struct Regex {} + +impl Regex { + pub fn new(input: String) -> RegexToken { + Regex::parse(input) + } + + fn parse(input: String) -> RegexToken { + if input.is_empty() { + return RegexToken::None; + } + + let mut chars = input.chars().peekable(); + let mut parsed_token = Self::parse_token(&mut chars); + + Self::parse_expression(&mut parsed_token, &mut chars) + } + + fn parse_expression( + left: &mut RegexToken, + chars: &mut std::iter::Peekable, + ) -> RegexToken { + while let Some(&next) = chars.peek() { + match next { + '|' => { + chars.next(); // Consume '|' + let right = Self::parse_token(chars); + *left = RegexToken::Union((Box::new(left.clone()), Box::new(right))); + } + '*' => { + chars.next(); // Consume '*' + let right = Self::parse_token(chars); + *left = RegexToken::Concat(( + Box::new(RegexToken::Star(Box::new(left.clone()))), + Box::new(right), + )); + } + '+' => { + chars.next(); // Consume '+' + let right = Self::parse_token(chars); + *left = RegexToken::Concat(( + Box::new(RegexToken::Plus(Box::new(left.clone()))), + Box::new(right), + )); + } + _ => { + let right = Self::parse_token(chars); + if let RegexToken::None = right { + } else { + *left = RegexToken::Concat((Box::new(left.clone()), Box::new(right))); + } + } + } + } + left.clone() + } + + fn parse_token(chars: &mut std::iter::Peekable) -> RegexToken { + match chars.next() { + Some('(') => { + let token = Self::parse(chars.collect()); + chars.next(); // Skip ')' + token + } + Some('.') => RegexToken::Dot, + Some(c) if c.is_ascii_alphanumeric() => Sym!(c), + _ => RegexToken::None, + } + } +} -- cgit v1.2.3