ruby mechanize クローラのサンプル
require "rubygems" require "mechanize" require "hpricot" require "kconv" require "uri" #require "timeout" #require "resolv" #require "resolv-replace" $KCODE="u" URL2="http://finance.nifty.com/cs/theme/lst/1.htm" agent=Mechanize.new() #Time out agent.read_timeout=10; agent.open_timeout=10; agent.max_history=1; #URL2="http://yahoo.com.jp" #puts "URL2"; begin page=agent.get(URL2) # puts page.header rescue Mechanize::ResponseCodeError => ex #puts ex.response_code; puts "Time out!" end text2=Hpricot(page.body) #puts text2 str=(text2/"#themeList") #puts str; str2=(str/"a") idno=1; kkno=1; str2.each{|i| theme2= i.inner_html.toutf8; url= i[ :href]; page2=agent.get(url).body tmp=Hpricot(page2) j2=(tmp/"#relativeStocks") j2=(j2/"a") j2.each(){|k| s123= k.inner_html.toutf8 s123a=s123.gsub(/\(.*/,"") s123b=s123.gsub(/.*\(/,"") s123b=s123b.gsub(/\).*/,"") #s124= k[ :href] puts idno.to_s+"\t"+kkno.to_s+"\t"+s123b+"\t"+theme2+"\t"+s123a; kkno=kkno+1; sleep 0.1; } #sleep 1; idno=idno+1; kkno=1; #exit(); } exit()