shokai / bot-ahokai (http://twitter.com/ahokai)
twitter bot "ahokai". The license is public domain.
Clone this repository (size: 94.1 KB): HTTPS / SSH
$ hg clone http://bitbucket.org/shokai/bot-ahokai/
| commit 129: | 1343225231ec |
| parent 128: | 0fcc0d7dc269 |
| branch: | default |
| tags: | tip |
マルコフ連鎖作成を別ファイルに
Sho Hashimoto /
shokai
2 months ago
2 months ago
Changed (Δ8.8 KB):
raw changeset »
Post.rb (186 lines added, 0 lines removed)
Twitterers.rb (0 lines added, 1 lines removed)
post3gram.rb (5 lines added, 178 lines removed)
refollow.rb (0 lines added, 1 lines removed)
1 |
# -*- coding: utf-8 -*- |
|
2 |
require 'rubygems' |
|
3 |
require 'active_record' |
|
4 |
require 'kconv' |
|
5 |
require 'yaml' |
|
6 |
gem 'twitter' |
|
7 |
require 'twitter' |
|
8 |
require File.dirname(__FILE__) + "/model_ngram.rb" |
|
9 |
require File.dirname(__FILE__) + '/Twitterers.rb' |
|
10 |
require File.dirname(__FILE__) + '/Buzzwords.rb' |
|
11 |
$KCODE = 'UTF8' |
|
12 |
||
13 |
module Post |
|
14 |
def Post.markovStr(search=nil) |
|
15 |
# マルコフ連鎖 |
|
16 |
words = Array.new |
|
17 |
ngs = Ngram.find(:all) |
|
18 |
||
19 |
if search != nil |
|
20 |
puts "make post with '#{search}'" |
|
21 |
tmp = Array.new |
|
22 |
ngs.each{|ng| |
|
23 |
tmp.push(ng) if ng.a+ng.b+ng.c =~ /#{search}/i |
|
24 |
} |
|
25 |
if tmp.size > 0 |
|
26 |
ngs = tmp |
|
27 |
else |
|
28 |
puts "no match '#{search}'" |
|
29 |
end |
|
30 |
end |
|
31 |
start = ngs[rand(ngs.size)] |
|
32 |
||
33 |
words.push(start.a, start.b, start.c) |
|
34 |
puts start.to_s |
|
35 |
||
36 |
# 左へ伸ばす |
|
37 |
puts '---search left 3-grams---' |
|
38 |
left = start |
|
39 |
while left.head != true do |
|
40 |
begin |
|
41 |
ngs = Ngram.find(:all, :conditions => ["b=? and c=?", left.a, left.b]) |
|
42 |
left = ngs[rand(ngs.size)] |
|
43 |
puts left.to_s |
|
44 |
words.unshift(left.a) |
|
45 |
rescue |
|
46 |
break |
|
47 |
end |
|
48 |
end |
|
49 |
||
50 |
||
51 |
# 右へ伸ばす |
|
52 |
puts '---search right 3-grams---' |
|
53 |
right = start |
|
54 |
while true do |
|
55 |
begin |
|
56 |
ngs = Ngram.find(:all, :conditions => ["a=? and b=?",right.b , right.c]) |
|
57 |
right = ngs[rand(ngs.size)] |
|
58 |
puts right.to_s |
|
59 |
words.push(right.c) |
|
60 |
rescue |
|
61 |
break |
|
62 |
end |
|
63 |
break if right.tail && 0.7 > rand |
|
64 |
end |
|
65 |
||
66 |
||
67 |
# ほぼ確実にreply先を変更する |
|
68 |
followers = nil |
|
69 |
for i in 0...words.size do |
|
70 |
s = words[i] |
|
71 |
if s =~ /(@[a-zA-Z0-9_]+)/ |
|
72 |
if 0.9 > rand |
|
73 |
if followers == nil |
|
74 |
tws = Twitterers.new(config["user"], config["pass"]) |
|
75 |
followers = tws.followers |
|
76 |
end |
|
77 |
if followers.size > 0 |
|
78 |
reply = followers[rand(followers.size)] |
|
79 |
followers.delete(reply) |
|
80 |
words[i] = "@#{reply}" |
|
81 |
end |
|
82 |
end |
|
83 |
end |
|
84 |
for i in 0...words.size-1 do |
|
85 |
if words[i] == '@' && words[i+1] =~ /[a-zA-Z0-9_]+/ # @とusernameが分かれている時 |
|
86 |
if 0.9 > rand |
|
87 |
if followers == nil |
|
88 |
tws = Twitterers.new(config["user"], config["pass"]) |
|
89 |
followers = tws.followers |
|
90 |
end |
|
91 |
if followers.size > 0 |
|
92 |
reply = followers[rand(followers.size)] |
|
93 |
followers.delete(reply) |
|
94 |
words[i+1] = reply |
|
95 |
end |
|
96 |
end |
|
97 |
end |
|
98 |
end |
|
99 |
end |
|
100 |
||
101 |
# すごくたまにfollowerに話しかける |
|
102 |
if 0.02 > rand |
|
103 |
if followers == nil |
|
104 |
tws = Twitterers.new(config["user"], config["pass"]) |
|
105 |
followers = tws.followers |
|
106 |
end |
|
107 |
if followers.size > 0 |
|
108 |
reply = followers[rand(followers.size)] |
|
109 |
followers.delete(reply) |
|
110 |
words.unshift("@#{reply}") |
|
111 |
end |
|
112 |
end |
|
113 |
||
114 |
# String化 |
|
115 |
post = '' |
|
116 |
words.each{|s| |
|
117 |
post += ' ' if post =~ /[a-zA-Z]\Z/ && s =~ /\A[a-zA-Z]/ # 英単語はスペース開けて連結 |
|
118 |
if post =~/[@\#]\Z/ && s =~ /[a-zA-Z0-9_]/ # @replyや#hashtagが、@とusernameに分かれていた時 |
|
119 |
post += s + ' ' |
|
120 |
next |
|
121 |
end |
|
122 |
if s =~ /([@#][a-zA-Z0-9_]+)/ # @replyか#hashtag |
|
123 |
post += ' ' if post.last != ' ' |
|
124 |
post += s + ' ' |
|
125 |
else |
|
126 |
post += s |
|
127 |
end |
|
128 |
} |
|
129 |
||
130 |
return post.strip.to_s |
|
131 |
end |
|
132 |
||
133 |
def Post.has_ngWord(post) |
|
134 |
ngwords = Array.new |
|
135 |
open(File.dirname(__FILE__)+"/ngwords").read.each{ |line| |
|
136 |
ngwords.push(line.chomp) |
|
137 |
} |
|
138 |
ngwords.each{ |w| |
|
139 |
if post =~ /#{w}/ |
|
140 |
return true |
|
141 |
end |
|
142 |
} |
|
143 |
return false |
|
144 |
end |
|
145 |
||
146 |
def Post.tweet_length_check(post) |
|
147 |
min = config["tweet_len_min"] || 0 |
|
148 |
max = config["tweet_len_max"] || 200 |
|
149 |
len = post.split(//u).size |
|
150 |
return (min <= len && len <= max) |
|
151 |
end |
|
152 |
||
153 |
# 括弧が対応していなかったら修正する |
|
154 |
def Post.fix_brackets(str) |
|
155 |
brackets = [['<','>'],['<','>'],['(',')'],['(',')'], ['「','」'],['『','』'],['[',']'],['"','"']] |
|
156 |
delimiter = brackets.join('|').split('|').map{|b| |
|
157 |
if b =~ /\(|\)|\[|\]|\"|\'/ |
|
158 |
"\\#{b}" |
|
159 |
else |
|
160 |
b |
|
161 |
end |
|
162 |
}.join('|') |
|
163 |
words = str.split(/(#{delimiter})/) |
|
164 |
pairs = Array.new |
|
165 |
for i in 0...words.size do |
|
166 |
brackets.each{|b| |
|
167 |
if words[i] == b[0] # 左括弧 |
|
168 |
for j in i...words.size do |
|
169 |
if words[j] == b[1] && !pairs.index(i) && !pairs.index(j) # 対になる括弧が右にあるか |
|
170 |
next if pairs.size>0 && pairs.max > i # 括弧同士の重複 |
|
171 |
pairs << i |
|
172 |
pairs << j |
|
173 |
end |
|
174 |
end |
|
175 |
end |
|
176 |
} |
|
177 |
end |
|
178 |
for i in 0...words.size do |
|
179 |
if words[i] =~ /#{delimiter}/ && !pairs.index(i) |
|
180 |
words[i] = "" |
|
181 |
end |
|
182 |
end |
|
183 |
words.join |
|
184 |
end |
|
185 |
||
186 |
end |
Up to file-list Twitterers.rb:
3 |
3 |
require 'rubygems' |
4 |
4 |
require 'open-uri' |
5 |
5 |
require 'rexml/document' |
6 |
require 'kconv' |
|
7 |
6 |
|
8 |
7 |
class Twitterers |
9 |
8 |
| … | … | @@ -9,6 +9,7 @@ require 'twitter' |
9 |
9 |
require File.dirname(__FILE__) + "/model_ngram.rb" |
10 |
10 |
require File.dirname(__FILE__) + '/Twitterers.rb' |
11 |
11 |
require File.dirname(__FILE__) + '/Buzzwords.rb' |
12 |
require File.dirname(__FILE__) + '/Post.rb' |
|
12 |
13 |
$KCODE = 'UTF8' |
13 |
14 |
|
14 |
15 |
|
| … | … | @@ -33,180 +34,6 @@ ActiveRecord::Base.establish_connection( |
33 |
34 |
:timeout => 30000 |
34 |
35 |
) |
35 |
36 |
|
36 |
||
37 |
def markovStr(search=nil) |
|
38 |
# マルコフ連鎖 |
|
39 |
words = Array.new |
|
40 |
ngs = Ngram.find(:all) |
|
41 |
||
42 |
if search != nil |
|
43 |
puts "make post with '#{search}'" |
|
44 |
tmp = Array.new |
|
45 |
ngs.each{|ng| |
|
46 |
tmp.push(ng) if ng.a+ng.b+ng.c =~ /#{search}/i |
|
47 |
} |
|
48 |
if tmp.size > 0 |
|
49 |
ngs = tmp |
|
50 |
else |
|
51 |
puts "no match '#{search}'" |
|
52 |
end |
|
53 |
end |
|
54 |
start = ngs[rand(ngs.size)] |
|
55 |
||
56 |
words.push(start.a, start.b, start.c) |
|
57 |
puts start.to_s |
|
58 |
||
59 |
# 左へ伸ばす |
|
60 |
puts '---search left 3-grams---' |
|
61 |
left = start |
|
62 |
while left.head != true do |
|
63 |
begin |
|
64 |
ngs = Ngram.find(:all, :conditions => ["b=? and c=?", left.a, left.b]) |
|
65 |
left = ngs[rand(ngs.size)] |
|
66 |
puts left.to_s |
|
67 |
words.unshift(left.a) |
|
68 |
rescue |
|
69 |
break |
|
70 |
end |
|
71 |
end |
|
72 |
||
73 |
||
74 |
# 右へ伸ばす |
|
75 |
puts '---search right 3-grams---' |
|
76 |
right = start |
|
77 |
while true do |
|
78 |
begin |
|
79 |
ngs = Ngram.find(:all, :conditions => ["a=? and b=?",right.b , right.c]) |
|
80 |
right = ngs[rand(ngs.size)] |
|
81 |
puts right.to_s |
|
82 |
words.push(right.c) |
|
83 |
rescue |
|
84 |
break |
|
85 |
end |
|
86 |
break if right.tail && 0.7 > rand |
|
87 |
end |
|
88 |
||
89 |
||
90 |
# ほぼ確実にreply先を変更する |
|
91 |
followers = nil |
|
92 |
for i in 0...words.size do |
|
93 |
s = words[i] |
|
94 |
if s =~ /(@[a-zA-Z0-9_]+)/ |
|
95 |
if 0.9 > rand |
|
96 |
if followers == nil |
|
97 |
tws = Twitterers.new(config["user"], config["pass"]) |
|
98 |
followers = tws.followers |
|
99 |
end |
|
100 |
if followers.size > 0 |
|
101 |
reply = followers[rand(followers.size)] |
|
102 |
followers.delete(reply) |
|
103 |
words[i] = "@#{reply}" |
|
104 |
end |
|
105 |
end |
|
106 |
end |
|
107 |
for i in 0...words.size-1 do |
|
108 |
if words[i] == '@' && words[i+1] =~ /[a-zA-Z0-9_]+/ # @とusernameが分かれている時 |
|
109 |
if 0.9 > rand |
|
110 |
if followers == nil |
|
111 |
tws = Twitterers.new(config["user"], config["pass"]) |
|
112 |
followers = tws.followers |
|
113 |
end |
|
114 |
if followers.size > 0 |
|
115 |
reply = followers[rand(followers.size)] |
|
116 |
followers.delete(reply) |
|
117 |
words[i+1] = reply |
|
118 |
end |
|
119 |
end |
|
120 |
end |
|
121 |
end |
|
122 |
end |
|
123 |
||
124 |
# すごくたまにfollowerに話しかける |
|
125 |
if 0.02 > rand |
|
126 |
if followers == nil |
|
127 |
tws = Twitterers.new(config["user"], config["pass"]) |
|
128 |
followers = tws.followers |
|
129 |
end |
|
130 |
if followers.size > 0 |
|
131 |
reply = followers[rand(followers.size)] |
|
132 |
followers.delete(reply) |
|
133 |
words.unshift("@#{reply}") |
|
134 |
end |
|
135 |
end |
|
136 |
||
137 |
# String化 |
|
138 |
post = '' |
|
139 |
words.each{|s| |
|
140 |
post += ' ' if post =~ /[a-zA-Z]\Z/ && s =~ /\A[a-zA-Z]/ # 英単語はスペース開けて連結 |
|
141 |
if post =~/[@\#]\Z/ && s =~ /[a-zA-Z0-9_]/ # @replyや#hashtagが、@とusernameに分かれていた時 |
|
142 |
post += s + ' ' |
|
143 |
next |
|
144 |
end |
|
145 |
if s =~ /([@#][a-zA-Z0-9_]+)/ # @replyか#hashtag |
|
146 |
post += ' ' if post.last != ' ' |
|
147 |
post += s + ' ' |
|
148 |
else |
|
149 |
post += s |
|
150 |
end |
|
151 |
} |
|
152 |
||
153 |
return post.strip.to_s |
|
154 |
end |
|
155 |
||
156 |
def has_ngWord(post) |
|
157 |
ngwords = Array.new |
|
158 |
open(File.dirname(__FILE__)+"/ngwords").read.each{ |line| |
|
159 |
ngwords.push(line.chomp) |
|
160 |
} |
|
161 |
ngwords.each{ |w| |
|
162 |
if post =~ /#{w}/ |
|
163 |
return true |
|
164 |
end |
|
165 |
} |
|
166 |
return false |
|
167 |
end |
|
168 |
||
169 |
def tweet_length_check(post) |
|
170 |
min = config["tweet_len_min"] || 0 |
|
171 |
max = config["tweet_len_max"] || 200 |
|
172 |
len = post.split(//u).size |
|
173 |
return (min <= len && len <= max) |
|
174 |
end |
|
175 |
||
176 |
# 括弧が対応していなかったら修正する |
|
177 |
def fix_brackets(str) |
|
178 |
brackets = [['<','>'],['<','>'],['(',')'],['(',')'], ['「','」'],['『','』'],['[',']'],['"','"']] |
|
179 |
delimiter = brackets.join('|').split('|').map{|b| |
|
180 |
if b =~ /\(|\)|\[|\]|\"|\'/ |
|
181 |
"\\#{b}" |
|
182 |
else |
|
183 |
b |
|
184 |
end |
|
185 |
}.join('|') |
|
186 |
words = str.split(/(#{delimiter})/) |
|
187 |
pairs = Array.new |
|
188 |
for i in 0...words.size do |
|
189 |
brackets.each{|b| |
|
190 |
if words[i] == b[0] # 左括弧 |
|
191 |
for j in i...words.size do |
|
192 |
if words[j] == b[1] && !pairs.index(i) && !pairs.index(j) # 対になる括弧が右にあるか |
|
193 |
next if pairs.size>0 && pairs.max > i # 括弧同士の重複 |
|
194 |
pairs << i |
|
195 |
pairs << j |
|
196 |
end |
|
197 |
end |
|
198 |
end |
|
199 |
} |
|
200 |
end |
|
201 |
for i in 0...words.size do |
|
202 |
if words[i] =~ /#{delimiter}/ && !pairs.index(i) |
|
203 |
words[i] = "" |
|
204 |
end |
|
205 |
end |
|
206 |
words.join |
|
207 |
end |
|
208 |
||
209 |
||
210 |
37 |
# 検索語 |
211 |
38 |
search = ARGV.shift |
212 |
39 |
if config["blockngwords"] && has_ngWord(search) |
| … | … | @@ -231,14 +58,14 @@ if search == nil && rand < buzzratio |
231 |
58 |
search = nouns2[rand(nouns2.size)] |
232 |
59 |
end |
233 |
60 |
|
234 |
post = |
|
61 |
post = Post.markovStr(search).toutf8 |
|
235 |
62 |
if config["blockngwords"] |
236 |
63 |
for i in 1..30 do |
237 |
break if |
|
64 |
break if Post.tweet_length_check(post) && !Post.has_ngWord(post) |
|
238 |
65 |
puts post.split(//u).size |
239 |
66 |
puts "NG! #{post}" |
240 |
67 |
puts "remake markovString (#{i})" |
241 |
post = |
|
68 |
post = Post.markovStr(search).toutf8 |
|
242 |
69 |
if i >= 10 # remakeあきらめる |
243 |
70 |
puts 'couldn\'t make markov string.' |
244 |
71 |
exit(1) |
| … | … | @@ -246,6 +73,6 @@ if config["blockngwords"] |
246 |
73 |
end |
247 |
74 |
end |
248 |
75 |
puts post |
249 |
post = |
|
76 |
post = Post.fix_brackets(post) |
|
250 |
77 |
twit.update(post) if config["nopost"] == nil |
251 |
78 |
puts post |
