字符串相似度算法 lua 实现
--字符串相似度算法 lua 实现function EditDistance( s, t, lim )
local s_len, t_len = #s, #t -- Calculate the sizes of the strings or arrays
if lim and math.abs( s_len - t_len ) >= lim then -- If sizes differ by lim, we can stop here
return lim
end
-- Convert string arguments to arrays of ints (ASCII values)
if type( s ) == "string" then
s = { string.byte( s, 1, s_len ) }
end
if type( t ) == "string" then
t = { string.byte( t, 1, t_len ) }
end
local min = math.min -- Localize for performance
local num_columns = t_len + 1 -- We use this a lot
local d = {} -- (s_len+1) * (t_len+1) is going to be the size of this array
-- This is technically a 2D array, but we're treating it as 1D. Remember that 2D access in the
-- form my_2d_array[ i, j ] can be converted to my_1d_array[ i * num_columns + j ], where
-- num_columns is the number of columns you had in the 2D array assuming row-major order and
-- that row and column indices start at 0 (we're starting at 0).
for i=0, s_len do
d[ i * num_columns ] = i -- Initialize cost of deletion
end
for j=0, t_len do
d[ j ] = j -- Initialize cost of insertion
end
for i=1, s_len do
local i_pos = i * num_columns
local best = lim -- Check to make sure something in this row will be below the limit
for j=1, t_len do
local add_cost = (s[ i ] ~= t[ j ] and 1 or 0)
local val = min(
d[ i_pos - num_columns + j ] + 1, -- Cost of deletion
d[ i_pos + j - 1 ] + 1, -- Cost of insertion
d[ i_pos - num_columns + j - 1 ] + add_cost -- Cost of substitution, it might not cost anything if it's the same
)
d[ i_pos + j ] = val
-- Is this eligible for tranposition?
if i > 1 and j > 1 and s[ i ] == t[ j - 1 ] and s[ i - 1 ] == t[ j ] then
d[ i_pos + j ] = min(
val, -- Current cost
d[ i_pos - num_columns - num_columns + j - 2 ] + add_cost -- Cost of transposition
)
end
if lim and val < best then
best = val
end
end
if lim and best >= lim then
return lim
end
end
return d[ #d ]
end
北大侠客行MUD,中国最好的MUD 有什么用呢?这个算法? 可以针对房间描述的掉字问题进行处理啊 很好,收了yct7 例如比较两个中文字符串
function Descompare(ss, tt)
local st_len, tt_len = #ss, #tt
if ss == tt then
return 1
elseif st_len == tt_len and EditDistance( ss, tt, 5) == 4 then
return 1
elseifmath.abs(st_len - tt_len) == 2 and EditDistance( ss, tt, 3) < 3 then
return 1
else
return 0
end
end
还不全面 可以自己修改啦
[ 本帖最后由 mygame 于 2011-8-7 10:39 PM 编辑 ] 介过太专业了ttk_00 对我的理解,就是挖金可以用!呵呵呵 ttk_25 好东西! 果断精华 学习了。。。
页:
[1]
2