-- Type of regular expressions over some alphabet: data RE a = Lit a -- a | Conc (RE a) (RE a) -- (re1 . re2) | Alt (RE a) (RE a) -- (re1 | re2) | Star (RE a) -- re* -- "abc" abc = Conc (Lit 'a') (Conc (Lit 'b') (Lit 'c')) -- "a|b|c" abcAlt = Alt (Lit 'a') (Alt (Lit 'b') (Lit 'c')) -- "a(b*)" abstar = Conc (Lit 'a') (Star (Lit 'b')) -- Further abstraction: "plus" operator plus :: RE a -> RE a plus re = Conc re (Star re) -- "ab+" abplus = Conc (Lit 'a') (plus (Lit 'b')) -- Define semantics of regular expression: -- non-deterministic operation returning a possible string -- associated to the regular expression sem :: RE a -> [a] sem (Lit c) = [c] sem (Conc a b) = sem a ++ sem b sem (Alt a b) = sem a ? sem b sem (Star a) = [] ? sem (Conc a (Star a)) -- match a string against a regular expression: match :: RE Char -> String -> Bool match re s | sem re == s = True -- Unix `grep`: search in a string for a substring matching a regular expression grep :: RE Char -> String -> Bool grep re s | _ ++ sem re ++ _ == s = True -- search in a string for a substring matching a regular expression -- and return the start and stop position of this match: grepPos :: RE Char -> String -> (Int,Int) grepPos re s | xs ++ sem re ++ ys == s = (length xs, length s - length ys) where xs, ys free