def
extract_features(words, tags, n0, n, stack, parse):
def
get_stack_context(depth, stack, data):
if
depth >;
=
3
:
return
data[stack[
-
1
]], data[stack[
-
2
]], data[stack[
-
3
]]
elif
depth >
=
2
:
return
data[stack[
-
1
]], data[stack[
-
2
]], ''
elif
depth
=
=
1
:
return
data[stack[
-
1
]], '
', '
'
else
:
return
'
', '
', '
'
def
get_buffer_context(i, n, data):
if
i
+
1
>
=
n:
return
data[i], '
', '
'
elif
i
+
2
>
=
n:
return
data[i], data[i
+
1
], ''
else
:
return
data[i], data[i
+
1
], data[i
+
2
]
def
get_parse_context(word, deps, data):
if
word
=
=
-
1
:
return
0
, '
', '
'
deps
=
deps[word]
valency
=
len
(deps)
if
not
valency:
return
0
, '
', '
'
elif
valency
=
=
1
:
return
1
, data[deps[
-
1
]], ''
else
:
return
valency, data[deps[
-
1
]], data[deps[
-
2
]]
features
=
{}
depth
=
len
(stack)
s0
=
stack[
-
1
]
if
depth
else
-
1
Ws0, Ws1, Ws2
=
get_stack_context(depth, stack, words)
Ts0, Ts1, Ts2
=
get_stack_context(depth, stack, tags)
Wn0, Wn1, Wn2
=
get_buffer_context(n0, n, words)
Tn0, Tn1, Tn2
=
get_buffer_context(n0, n, tags)
Vn0b, Wn0b1, Wn0b2
=
get_parse_context(n0, parse.lefts, words)
Vn0b, Tn0b1, Tn0b2
=
get_parse_context(n0, parse.lefts, tags)
Vn0f, Wn0f1, Wn0f2
=
get_parse_context(n0, parse.rights, words)
_, Tn0f1, Tn0f2
=
get_parse_context(n0, parse.rights, tags)
Vs0b, Ws0b1, Ws0b2
=
get_parse_context(s0, parse.lefts, words)
_, Ts0b1, Ts0b2
=
get_parse_context(s0, parse.lefts, tags)
Vs0f, Ws0f1, Ws0f2
=
get_parse_context(s0, parse.rights, words)
_, Ts0f1, Ts0f2
=
get_parse_context(s0, parse.rights, tags)
Ds0n0
=
min
((n0
-
s0,
5
))
if
s0 !
=
0
else
0
features[
'bias'
]
=
1
for
w
in
(Wn0, Wn1, Wn2, Ws0, Ws1, Ws2, Wn0b1, Wn0b2, Ws0b1, Ws0b2, Ws0f1, Ws0f2):
if
w:
features[
'w=%s'
%
w]
=
1
for
t
in
(Tn0, Tn1, Tn2, Ts0, Ts1, Ts2, Tn0b1, Tn0b2, Ts0b1, Ts0b2, Ts0f1, Ts0f2):
if
t:
features[
't=%s'
%
t]
=
1
for
i, (w, t)
in
enumerate
(((Wn0, Tn0), (Wn1, Tn1), (Wn2, Tn2), (Ws0, Ts0))):
if
w
or
t:
features[
'%d w=%s, t=%s'
%
(i, w, t)]
=
1
features[
's0w=%s, n0w=%s'
%
(Ws0, Wn0)]
=
1
features[
'wn0tn0-ws0 %s/%s %s'
%
(Wn0, Tn0, Ws0)]
=
1
features[
'wn0tn0-ts0 %s/%s %s'
%
(Wn0, Tn0, Ts0)]
=
1
features[
'ws0ts0-wn0 %s/%s %s'
%
(Ws0, Ts0, Wn0)]
=
1
features[
'ws0-ts0 tn0 %s/%s %s'
%
(Ws0, Ts0, Tn0)]
=
1
features[
'wt-wt %s/%s %s/%s'
%
(Ws0, Ts0, Wn0, Tn0)]
=
1
features[
'tt s0=%s n0=%s'
%
(Ts0, Tn0)]
=
1
features[
'tt n0=%s n1=%s'
%
(Tn0, Tn1)]
=
1
trigrams
=
((Tn0, Tn1, Tn2), (Ts0, Tn0, Tn1), (Ts0, Ts1, Tn0),
(Ts0, Ts0f1, Tn0), (Ts0, Ts0f1, Tn0), (Ts0, Tn0, Tn0b1),
(Ts0, Ts0b1, Ts0b2), (Ts0, Ts0f1, Ts0f2), (Tn0, Tn0b1, Tn0b2),
(Ts0, Ts1, Ts1))
for
i, (t1, t2, t3)
in
enumerate
(trigrams):
if
t1
or
t2
or
t3:
features[
'ttt-%d %s %s %s'
%
(i, t1, t2, t3)]
=
1
vw
=
((Ws0, Vs0f), (Ws0, Vs0b), (Wn0, Vn0b))
vt
=
((Ts0, Vs0f), (Ts0, Vs0b), (Tn0, Vn0b))
d
=
((Ws0, Ds0n0), (Wn0, Ds0n0), (Ts0, Ds0n0), (Tn0, Ds0n0),
(
't'
+
Tn0
+
Ts0, Ds0n0), (
'w'
+
Wn0
+
Ws0, Ds0n0))
for
i, (w_t, v_d)
in
enumerate
(vw
+
vt
+
d):
if
w_t
or
v_d:
features[
'val/d-%d %s %d'
%
(i, w_t, v_d)]
=
1
return
features