shokai / Collective Intelligence study
translate from Python to Ruby
Clone this repository (size: 10.9 MB): HTTPS / SSH
$ hg clone http://bitbucket.org/shokai/collective-intelligence-study/
| commit 51: | aea87de2da8d |
| parent 50: | 772a026b96b6 |
| branch: | default |
p.48 K平均法まで
Sho Hashimoto /
shokai
16 months ago
16 months ago
Changed (Δ2.1 KB):
raw changeset »
03/clusters.rb (65 lines added, 0 lines removed)
03/generatefeedvector-jp.rb (6 lines added, 1 lines removed)
Up to file-list 03/clusters.rb:
| … | … | @@ -8,6 +8,69 @@ include Magick |
8 |
8 |
|
9 |
9 |
class Clusters |
10 |
10 |
|
11 |
def kcluster(rows, distance=:pearson, k=4) |
|
12 |
# それぞれのポイントの最小値と最大値を決める |
|
13 |
ranges = Array.new |
|
14 |
for i in 0...rows[0].length |
|
15 |
cols = Array.new |
|
16 |
for row in rows |
|
17 |
cols.push(row[i]) |
|
18 |
end |
|
19 |
ranges.push([cols.min, cols.max]) |
|
20 |
end |
|
21 |
||
22 |
# 重心をランダムにk個配置する |
|
23 |
clusters = Array.new |
|
24 |
k.times do |
|
25 |
tmp = Array.new |
|
26 |
for i in 0...rows[0].length |
|
27 |
tmp.push( rand()*(ranges[i].max-ranges[i].min)+ranges[i].min ) |
|
28 |
end |
|
29 |
clusters.push(tmp) |
|
30 |
end |
|
31 |
||
32 |
# 重心を再計算しながら100回、もしくは変化が無くなるまで移動させる |
|
33 |
lastmatches = nil |
|
34 |
for t in 0...100 |
|
35 |
puts 'Iteration ' + t.to_s |
|
36 |
bestmatches = Array.new # 空の配列作成 |
|
37 |
k.times do bestmatches.push(Array.new) end |
|
38 |
||
39 |
# それぞれの行に対して、もっとも近い重心を探し出す |
|
40 |
for j in 0...rows.length |
|
41 |
row = rows[j] |
|
42 |
bestmatch = 0 |
|
43 |
for i in 0...k |
|
44 |
d = self.method(distance).call(clusters[i],row) # 距離計算 |
|
45 |
bestmatch = i if d < self.method(distance).call(clusters[bestmatch],row) |
|
46 |
end |
|
47 |
bestmatches[bestmatch].push(j) |
|
48 |
end |
|
49 |
||
50 |
# 結果が前回と同じであれば完了 |
|
51 |
return bestmatches if bestmatches == lastmatches |
|
52 |
lastmatches = bestmatches |
|
53 |
||
54 |
# 重心をそのメンバーの平均に移動する |
|
55 |
for i in 0...k |
|
56 |
avgs = Array.new(rows[0].length, 0.0) # 初期値0.0の配列をいくつか作る |
|
57 |
if bestmatches[i].length > 0 |
|
58 |
for rowid in bestmatches[i] |
|
59 |
for m in 0...rows[rowid].length |
|
60 |
avgs[m] += rows[rowid][m] |
|
61 |
end |
|
62 |
end |
|
63 |
for j in 0...avgs.length |
|
64 |
avgs[j] /= bestmatches[j].length if bestmatches[j] != nil |
|
65 |
end |
|
66 |
clusters[i] = avgs |
|
67 |
end |
|
68 |
#return bestmatches |
|
69 |
end |
|
70 |
end |
|
71 |
||
72 |
end |
|
73 |
||
11 |
74 |
# 行列の入れ替え |
12 |
75 |
def rotatematrix(data) |
13 |
76 |
newdata = Array.new |
| … | … | @@ -174,6 +237,8 @@ class Clusters |
174 |
237 |
|
175 |
238 |
# ピアソン相関距離を計算 |
176 |
239 |
def pearson(v1,v2) |
240 |
v1 = [v1] if v1.class != Array |
|
241 |
v2 = [v2] if v2.class != Array |
|
177 |
242 |
# 単純な合計 |
178 |
243 |
sum1 = 0 |
179 |
244 |
v1.each{ |n| |
Up to file-list 03/generatefeedvector-jp.rb:
| … | … | @@ -35,7 +35,7 @@ class FeedVectorGeneratorJp |
35 |
35 |
def getWordsByKind(node, kind) |
36 |
36 |
list = Array.new |
37 |
37 |
while node do |
38 |
f = node.feature.split(/,/) |
|
38 |
f = node.feature.split(/,/) |
|
39 |
39 |
if /#{kind}/ =~ f[0] |
40 |
40 |
list.push(node.surface) |
41 |
41 |
end |
| … | … | @@ -56,6 +56,11 @@ class FeedVectorGeneratorJp |
56 |
56 |
n = mecab.parseToNode( CGI.unescapeHTML(txt.toutf8) ) |
57 |
57 |
# verbs = getWordsByKind(n, '動詞') |
58 |
58 |
nouns = getWordsByKind(n, '名詞') # 名詞のみ |
59 |
||
60 |
words = Array.new |
|
61 |
nouns.each{ |w| |
|
62 |
words.push(w) if w =~ /\w{2,}/ # 2文字以上 |
|
63 |
} |
|
59 |
64 |
return nouns |
60 |
65 |
end |
61 |
66 |
