ruby mechanize クローラのサンプル 

ruby mechanize クローラのサンプル

require "rubygems"
require "mechanize"
require "hpricot"
require "kconv"
require "uri"
#require "timeout"
#require "resolv"
#require "resolv-replace"
$KCODE="u"


URL2="http://finance.nifty.com/cs/theme/lst/1.htm"

agent=Mechanize.new()

#Time out
agent.read_timeout=10;
agent.open_timeout=10;
agent.max_history=1;

#URL2="http://yahoo.com.jp"
#puts "URL2";
begin

        page=agent.get(URL2)
#       puts page.header

rescue Mechanize::ResponseCodeError => ex
        #puts ex.response_code;
        puts "Time out!"
end


text2=Hpricot(page.body)


#puts text2
str=(text2/"#themeList")

#puts str;

str2=(str/"a")
idno=1;
kkno=1;
str2.each{|i|


        theme2= i.inner_html.toutf8;
        url= i[ :href];

        page2=agent.get(url).body
        tmp=Hpricot(page2)
        j2=(tmp/"#relativeStocks")
        j2=(j2/"a")
        j2.each(){|k|
                s123= k.inner_html.toutf8
                s123a=s123.gsub(/\(.*/,"")

                s123b=s123.gsub(/.*\(/,"")
                s123b=s123b.gsub(/\).*/,"")

                #s124= k[ :href]
                puts idno.to_s+"\t"+kkno.to_s+"\t"+s123b+"\t"+theme2+"\t"+s123a;
                kkno=kkno+1;
                sleep 0.1;
        }
        #sleep 1;
        idno=idno+1;
    kkno=1;
        #exit();

}




exit()