特定企業ホームページのアンカーリンクを表示

require "rubygems"
require "mechanize"
require "hpricot"
require "kconv"
require "uri"
$KCODE="u"


#text="\"http://ntt.co.jp\""
#text="\"a href=\"http://www.toyota.co.jp/\""
#text="link: www.ntt.co.jp -site: ntt.co.jp site: jp"
#text="link:www.ntt.co.jp href=\"www.ntt.co.jp/\""
#text="link:http://toyota.jp"
#tmp123="http://www.inpex.co.jp/"
#tmp123="http://www.dnp.co.jp/"
#tmp123="http://toyota.jp/"
#tmp123="http://www.honda.co.jp/"
#tmp123="http://www.tepco.co.jp/"
tmp123="http://www.kddi.com/"
#tmp123="http://www.ntt.co.jp"
#tmp123="http://www.softbank.co.jp/"
#tmp123="http://www.yoshinoya.com/"
#tmp123="http://www.mcdonalds.co.jp/"
#tmp123="http://www.bk.mufg.jp/"
#tmp123="http://ja.wikipedia.org/wiki/%E4%B8%89%E8%8F%B1%E6%9D%B1%E4%BA%ACUFJ%E9%8A%80%E8%A1%8C/"
#text="link:http://www.dnp.co.jp"a

text="link:"+tmp123;
search=URI.encode(text.toutf8)
puts text

URL="http://search.yahoo.co.jp/search?p="+search+"&n=100"

agent=Mechanize.new()
page=agent.get(URL)
#puts agent.page.body

#qq=Hpricot(page.body)

#f=qq/"a[@href$='http://www.ntt.co.jp/']"
#puts f
#puts page.body

page.links.each{|link|
        begin
#               puts link.text+" "+link.href
                qq=Hpricot(agent.get(link.href).body);
                #puts (qq/"a")
                #jj= (qq/"a[@href$='http://www.ntt.co.jp/']")

                #jj= (qq/"a[@href$='http://toyota.jp/']")

                #jj= (qq/"a[@href$='http://www.dnp.co.jp/']")
                str="a[@href$='"+tmp123+"']"
                jj= (qq/str)
                puts jj.inner_html.toutf8
                puts "test"
        rescue
                puts "Expection"
        end
}

maxlen=3;
page=""
for i in 1..maxlen
        b=i*100+1
        puts "bbbbbbbb"+b.to_s;
        page=agent.get(URL+"&b="+b.to_s)
        puts page.body
        page.links.each{|link|
                begin
                        #puts link.text+" "+link.href
                        qq=Hpricot(agent.get(link.href).body);
                        #puts (qq/"a")
                        #jj= (qq/"a[@href$='http://www.ntt.co.jp/']")

                        #jj= (qq/"a[@href$='http://toyota.jp/']")

                        #jj= (qq/"a[@href$='http://www.dnp.co.jp/']")
                        str="a[@href$='"+tmp123+"']"
                        jj= (qq/str)
                        puts jj.inner_html.toutf8
                        puts "test"
                rescue
                        puts "Expection"
                end
        }
end