require "rubygems" require "mechanize" require "hpricot" require "kconv" require "uri" $KCODE="u" #text="\"http://ntt.co.jp\"" #text="\"a href=\"http://www.toyota.co.jp/\"" #text="link: www.ntt.co.jp -site: ntt.co.jp site: jp" #text="link:www.ntt.co.jp href=\"www.ntt.co.jp/\"" #text="link:http://toyota.jp" #tmp123="http://www.inpex.co.jp/" #tmp123="http://www.dnp.co.jp/" #tmp123="http://toyota.jp/" #tmp123="http://www.honda.co.jp/" #tmp123="http://www.tepco.co.jp/" tmp123="http://www.kddi.com/" #tmp123="http://www.ntt.co.jp" #tmp123="http://www.softbank.co.jp/" #tmp123="http://www.yoshinoya.com/" #tmp123="http://www.mcdonalds.co.jp/" #tmp123="http://www.bk.mufg.jp/" #tmp123="http://ja.wikipedia.org/wiki/%E4%B8%89%E8%8F%B1%E6%9D%B1%E4%BA%ACUFJ%E9%8A%80%E8%A1%8C/" #text="link:http://www.dnp.co.jp"a text="link:"+tmp123; search=URI.encode(text.toutf8) puts text URL="http://search.yahoo.co.jp/search?p="+search+"&n=100" agent=Mechanize.new() page=agent.get(URL) #puts agent.page.body #qq=Hpricot(page.body) #f=qq/"a[@href$='http://www.ntt.co.jp/']" #puts f #puts page.body page.links.each{|link| begin # puts link.text+" "+link.href qq=Hpricot(agent.get(link.href).body); #puts (qq/"a") #jj= (qq/"a[@href$='http://www.ntt.co.jp/']") #jj= (qq/"a[@href$='http://toyota.jp/']") #jj= (qq/"a[@href$='http://www.dnp.co.jp/']") str="a[@href$='"+tmp123+"']" jj= (qq/str) puts jj.inner_html.toutf8 puts "test" rescue puts "Expection" end } maxlen=3; page="" for i in 1..maxlen b=i*100+1 puts "bbbbbbbb"+b.to_s; page=agent.get(URL+"&b="+b.to_s) puts page.body page.links.each{|link| begin #puts link.text+" "+link.href qq=Hpricot(agent.get(link.href).body); #puts (qq/"a") #jj= (qq/"a[@href$='http://www.ntt.co.jp/']") #jj= (qq/"a[@href$='http://toyota.jp/']") #jj= (qq/"a[@href$='http://www.dnp.co.jp/']") str="a[@href$='"+tmp123+"']" jj= (qq/str) puts jj.inner_html.toutf8 puts "test" rescue puts "Expection" end } end