今天有点空闲,想想用Ruby写个NFA试试。从正则表达式构造NFA采用经典的Thompson算法:正则表达式 -> 后缀表达式 -> 构造NFA。构造了NFA后,用之匹配字符串。一句话,写了个玩具的正则表达式引擎,支持concatenation、alternation以及*、?、+量词,不支持反向引用和转义符。测试了下与Ruby自带的正则表达式引擎的性能对比,慢了3倍
。构造NFA没什么问题,主要是匹配运行写的烂,有空再改改。
nfa.rb
module
NFA
class
NFA
def
initialize(state) @state
=
state end
def
step(clist,c)
return
clist
if
clist.size
==
0; nlist
=
[] allNull
=
true matched
=
false clist.each do
|
t
|
if
!t.nil? allNull
=
false
if
t.c
!=-
1
if
t.c
==
c
&&
t.end.type
==
1
then matched
=
true nlist.push(t.end.out1)
if
!t.end.out1.end.nil? nlist.push(t.end.out2)
if
!t.end.out2.end.nil? elsif (t.c
==
c
&&
t.end.type
==
0) then matched
=
true;
return
ListUitls.new_list(t); elsif (t.c
==
-
1
&&
!t.end.nil?) then nlist.push(t.end.out1); nlist.push(t.end.out2); end end end
return
step(nlist, c)
if
(allNull)
return
step(nlist, c)
if
(!matched) nlist end
def
test?(s) match(@state,s) end
def
match(state,s) clist
=
[] clist.push(state.out1); clist.push(state.out2); s.each_byte do
|
c
|
c
=
c
&
0xFF
; clist
=
step(clist, c);
return
false
if
clist.size
==
0 end
return
is_match?(clist) end
def
is_match?(clist) clist.each do
|
t
|
return
true
if
!t.nil?
and
t.c
==-
1
and
t.end
and
t.end.is_matched? end false end end
class
Paren attr_accessor:n_alt,:n_atom end
class
State attr_accessor :out1,:out2,:type
def
initialize(out1,out2) @out1
=
out1 @out2
=
out2 @type
=
1
end
def
is_matched?
return
@type
==
0 end end
class
Transition attr_accessor :c,:end
def
initialize(c) @c
=
c end end
class
Frame attr_accessor :start,:outs
def
initialize(start,outs) @start
=
start @outs
=
outs end end
class
ListUitls
def
self.link(list,state) list.each{
|
t
|
t.end
=
state} end
def
self.append(list1,list2) list1
+
list2 end
def
self.new_list(out) result
=
[] result.push(out) result end end
def
self.compile(re) post
=
re2post(re)
raise
ArgumentError.new,
"
bad regexp!
"
if
post.nil? state
=
post2nfa(post);
raise
RuntimeError.new,
"
construct nfa from postfix fail!
"
if
state.nil?
return
NFA.new(state); end
def
self.post2nfa(postfix) stack
=
[] s
=
nil t
=
t1
=
t2
=
nil e1
=
e2
=
e
=
nil
return
nil
if
postfix.nil? postfix.each_byte do
|
p
|
case p.chr when
'
.
'
: e2
=
stack.pop() e1
=
stack.pop() ListUitls.link(e1.outs, e2.start) stack.push(Frame.new(e1.start, e2.outs)) when
'
|
'
: e2
=
stack.pop() e1
=
stack.pop() t1
=
Transition.new(
-
1
) t2
=
Transition.new(
-
1
) t1.end
=
e1.start t2.end
=
e2.start s
=
State.new(t1, t2) stack.push(Frame.new(s, ListUitls.append(e1.outs, e2.outs))) when
'
?
'
: e
=
stack.pop() t1
=
Transition.new(
-
1
) t2
=
Transition.new(
-
1
) t1.end
=
e.start s
=
State.new(t1, t2) stack.push(Frame.new(s, ListUitls.append(e.outs, ListUitls.new_list(t2)))) when
'
*
'
: e
=
stack.pop() t1
=
Transition.new(
-
1
) t2
=
Transition.new(
-
1
) t1.end
=
e.start s
=
State.new(t1, t2) ListUitls.link(e.outs, s) stack.push(Frame.new(s, ListUitls.new_list(s.out2))) when
'
+
'
: e
=
stack.pop() t1
=
Transition.new(
-
1
) t2
=
Transition.new(
-
1
) t1.end
=
e.start s
=
State.new(t1, t2) ListUitls.link(e.outs, s) stack.push(Frame.new(e.start, ListUitls.new_list(t2)))
else
t
=
Transition.new(p) s
=
State.new(t, Transition.new(
-
1
)) stack.push(Frame.new(s, ListUitls.new_list(s.out1))) end end e
=
stack.pop()
return
nil
if
stack.size()
>
0 end_state
=
State.new(nil, nil) end_state.type
=
0 e.outs.each do
|
tran
|
if
tran.c
!=-
1
t1
=
Transition.new(
-
1
) t2
=
Transition.new(
-
1
) s
=
State.new(t1,t2) tran.end
=
s s.out1.end
=
end_state s.out2.end
=
end_state
else
tran.end
=
end_state end end start
=
e.start
return
start end
def
self.re2post(re) n_alt
=
n_atom
=
0 result
=
""
paren
=
[] re.each_byte do
|
c
|
case c.chr when
'
(
'
then
if
(n_atom
>
1
) then n_atom
-=
1
result
<<
"
.
"
end p
=
Paren.new p.n_alt
=
n_alt p.n_atom
=
n_atom paren.push(p) n_alt
=
n_atom
=
0 when
'
|
'
then
if
(n_atom
==
0)
return
nil end
while
(n_atom
-=
1
)
>
0 result
<<
"
.
"
end n_alt
+=
1
when
'
)
'
then
if
(paren.size()
==
0)
return
nil end
if
(n_atom
==
0)
return
nil end
while
(n_atom
-=
1
)
>
0 result
<<
"
.
"
end
while
(n_alt
>
0) result
<<
"
|
"
n_alt
-=
1
end p
=
paren.pop() n_alt
=
p.n_alt n_atom
=
p.n_atom n_atom
+=
1
when
'
*
'
,
'
+
'
,
'
?
'
:
if
(n_atom
==
0)
return
nil end result
<<
c
else
if
(n_atom
>
1
) n_atom
-=
1
result
<<
"
.
"
end result
<<
c n_atom
+=
1
end end
return
nil
if
paren.size()
>
0
while
( (n_atom
-=
1
)
>
0) result
<<
"
.
"
end
while
(n_alt
>
0) n_alt
-=
1
result
<<
"
|
"
end result end end
使用:
nfa
=
NFA::compile(
"
a(bb)+a(cdf)*
"
)
assert
nfa.test?(
"
abba
"
)
assert
nfa.test?(
"
abbbba
"
)
assert
!nfa.test?(
"
a
"
)
assert
!nfa.test?(
"
aa
"
)
assert
nfa.test?(
"
abbacdf
"
)
assert
nfa.test?(
"
abbbbacdfcdf
"
)
assert
!nfa.test?(
"
bbbbacdfcdf
"
)
assert
!nfa.test?(
"
abbbacdfcdf
"
)
assert
!nfa.test?(
"
abbbbacdfdf
"
)
assert
!nfa.test?(
"
abbbbacdfdfg
"
)
文章转自庄周梦蝶 ,原文发布时间2008-02-25
相关资源:敏捷开发V1.0.pptx
转载请注明原文地址: https://yun.8miu.com/read-128322.html