Implement regular expression matching with support for '.'
and '*'
.
'.' Matches any single character. '*' Matches zero or more of the preceding element. The matching should cover the entire input string (not partial). The function prototype should be: bool isMatch(const char *s, const char *p) Some examples: isMatch("aa","a") → false isMatch("aa","aa") → true isMatch("aaa","aa") → false isMatch("aa", "a*") → true isMatch("aa", ".*") → true isMatch("ab", ".*") → true isMatch("aab", "c*a*b") → true
We can use recursive calls to process the matching.
C++ Code:
/* * func: regular_expression_match * goal: to check if the string is matched with the current expression * @param s: a pointer to the string * @param p: a pointer to the pattern * return: true or false */ /* * Idea: use recursion to check every case * Code reference: http://leetcode.com/2011/09/regular-expression-matching.html */ bool regular_expression_match(const char *s, const char *p){ //If pattern string reaches the end if(*p == '\0'){ return *s == '\0'; } //next char is not '*': if(*(p+1) != '*'){ //If the current char of two string matches, then go to the next one //Else return false return ((*p == *s) || (*p == '.' && *s != '\0')) && regular_expression_match(s+1, p+1); } //If current char matches and next patter char is * while((*p == *s) || (*p == '.' && *s != '\0')){ //Consider the current char occurs 0 times, like "ab" by "a*ab" if( regular_expression_match(s, p+2)){ return true; } //Succeed match one char ++s; } //Case 1: current char doesn't match, which means the above while loop won't run //Case 2: After matching '*', matching the remaining part return regular_expression_match(s, p+2); }
For python case, it took me a very long time since when the above code was converted to python it cannot pass some case like "aaaaaaaaaaaaab", "a*a*a*a*a*a*a*a*a*a*c". Below is a method I found online, the trick is to simplify the pattern before matching. The method is very smart. For the previous case, it will be reduced to "a*c", which eliminates many recursions. But its matching part is a little more complicated than the best way I found.(200ms vs 174 ms)
Python Code 1(referenced online):
#link: http://codesays.com/2014/solution-to-regular-expression-matching-by-leetcode/ class Solution: # @return a pre-processed pattern string # Make the pattern compact. For example, we will shorten "a*.*" and "a*a*" into # ".*" and "a*" respectively. def _compile(self, p): result = "" while len(p) != 0: if len(p) >= 2 and p[1] == "*": # Meet with a new *, without * neighbors if len(result) < 2 or result[-1] != "*": result += p[0:2] # Meet with a same * segment as previous one elif result[-2] == p[0] or result[-2] == ".": pass # Reduce all * segments around .* elif p[0] == ".": while len(result) >= 2 and result[-1] == "*": result = result[:-2] result += p[:2] # Meet with a different * segment else: result += p[0:2] p = p[2:] else: result += p[0] p = p[1:] return result # @return True if s matches p completely # False else. def _isMatch(self, s, p): # Both string and pattern reach the end. Nothing left to deal with. if len(s) == 0 and len(p) == 0: return True # Pattern ends. But string left some substring unmatched elif len(p) == 0: return False # Meet with a * segment in pattern elif len(p) >= 2 and p[1] == "*": return self._isMatchStar(s, p[2:], p[0]) # String ends. But pattern remains some part. And the remaining part is not # a * segment. In other words, the remaining part DO need some subtring to # match. But we do not have any more. elif len(s) == 0: return False # Single character matches. Recursively check the next position. elif s[0] == p[0] or p[0] == ".": return self._isMatch(s[1:], p[1:]) # Single character fails to match. else: return False # @return True if s matches (ch)*p completely # False else. def _isMatchStar(self, s, p, ch): # Works like a DFS algorithm while True: # Simply try to skip the * segment. That is,the * segment does not consume # any thing in this round. if self._isMatch(s, p): return True # The * segment would like to, but has noting to consume. elif len(s) == 0: return False # This * segment consume one quanlified character in the string elif s[0] == ch or ch == ".": s = s[1:] # This * segment would like to, but has no quanlified content to consume. else: return False def isMatch(self, s, p): assert p == "" or p[0] != "*" return self._isMatch(s, self._compile(p))
Python Code 2(based on solution referenced online):
# @return a pre-processed pattern string # Make the pattern compact. For example, we will shorten "a*.*" and "a*a*" into # ".*" and "a*" respectively. def pattern_simplify(p): result = "" while len(p) != 0: if len(p) >= 2 and p[1] == "*": # Meet with a new *, without * neighbors if len(result) < 2 or result[-1] != "*": result += p[0:2] # Meet with a same * segment as previous one elif result[-2] == p[0] or result[-2] == ".": pass # Reduce all * segments around .* elif p[0] == ".": while len(result) >= 2 and result[-1] == "*": result = result[:-2] result += p[:2] # Meet with a different * segment else: result += p[0:2] p = p[2:] else: result += p[0] p = p[1:] return result # func: regular expression matching # @param s: the input string # @param p: the input pattern # @return: True of False def is_match(s, p): #when p goes to the end if len(p) == 0: return len(s) == 0 #If p's length is 1 if len(p) == 1 or p[1] != '*': #If current pattern char doesn't match, return false if len(s) < 1 or (p[0] != '.' and s[0] != p[0]): return False #Continue to match the next one return is_match(s[1:], p[1:]) else: i = -1 while i < len(s) and (i < 0 or p[0] == '.' or p[0] == s[i]): if is_match(s[i+1:], p[2:]): return True i += 1 return False def isMatch(s, p): return is_match(s, pattern_simplify(p))
Sometimes the test system is like a jerk. However it can help us know more about how to deal with jerk.
Reference Link:
1.Regular Expression Matching: http://leetcode.com/2011/09/regular-expression-matching.html
2.Solution to Regular Expression Matching by LeetCode: http://codesays.com/2014/solution-to-regular-expression-matching-by-leetcode/
3.LeetCode – Regular Expression Matching in Java: http://www.programcreek.com/2012/12/leetcode-regular-expression-matching-in-java/
No comments:
Post a Comment