wikipediaから元号と西暦の換算表を作る
元号一覧(日本)のページをスプレイピング
http://ja.wikipedia.org/wiki/%E5%85%83%E5%8F%B7%E4%B8%80%E8%A6%A7_(%E6%97%A5%E6%9C%AC)
出力例
大化1,大化一,大化元,645
大化2,大化二,大化二,646
大化3,大化三,大化三,647
大化4,大化四,大化四,648
大化5,大化五,大化五,649
大化6,大化六,大化六,650
白雉1,白雉一,白雉元,650
白雉2,白雉二,白雉二,651
白雉3,白雉三,白雉三,652
.
.
.
平成21,平成二十一,平成二十一,2009
平成22,平成二十二,平成二十二,2010
平成23,平成二十三,平成二十三,2011
平成24,平成二十四,平成二十四,2012
平成25,平成二十五,平成二十五,2013
ソース:
require 'rubygems' require 'hpricot' require 'open-uri' def num_to_k(n) number = 0..9 kanji = ["","一","二","三","四","五","六","七","八","九"] num_kanji = Hash[number.zip(kanji)] digit = [1000,100,10] # digit = (1..3).map{ |i| 10 ** i }.reverse kanji_keta = ["千","百","十"] num_kanji_keta = Hash[digit.zip(kanji_keta)] num = n str = "" digit.each { |d| tmp = num / d str << (tmp == 0 ? "" : ((tmp == 1 ? "" : num_kanji[tmp]) + num_kanji_keta[d])) num %= d } str << num_kanji[num] return str end #text =Hpricot(open("gengou3.txt","r").read); text =Hpricot(open("http://ja.wikipedia.org/wiki/%E5%85%83%E5%8F%B7%E4%B8%80%E8%A6%A7_(%E6%97%A5%E6%9C%AC)","r").read); a=(text/"tr") j2=0; year=0; a.each{|i| if j2==2 then i2=((i/"th")/"a") gengou=i2.inner_text # puts i; if gengou.length>=1 then year=(i/"td")[3].inner_text.gsub("年","").to_i #year=(i/"td")[3].inner_text # puts year w_f_year=((i/"td")/"a")[2].inner_text.gsub("年","").to_i year.times{|j| gannen=num_to_k(j+1); if j==0 then gannen="元" end # puts w_f_year puts gengou.to_s+(j+1).to_s+","+gengou.to_s+num_to_k(j+1).to_s+","+gengou.to_s+gannen+","+(w_f_year+j).to_s; } end end if j2>=3 then # puts i; i2=((i/"th")/"a") if i2 != nil then gengou=i2.inner_text end # puts i; if gengou.length>=1 then if (i/"td")[3] != nil then year=(i/"td")[3].inner_text.gsub("年","") end # year=(i/"td")[3].inner_text #: puts year # w_f_year=((i/"td")/"a")[2].inner_text.gsub("年","").to_i #w_f_year=((i/"td")/"a")[2].inner_text.gsub("年","").to_i if ((i/"td"))[1] then w_f_year=((i/"td"))[1].inner_text.chomp.gsub("\n","").gsub(/.*((.*)).*/,'\1') # w_f_year.chomp.gsub!(/年.*/,"") year=year.to_i w_f_year=w_f_year.gsub(/年.*/,"").to_i end # puts year year.times{|j| gannen=num_to_k(j+1); if j==0 then gannen="元" end # puts w_f_year puts gengou.to_s+(j+1).to_s+","+gengou.to_s+num_to_k(j+1).to_s+","+gengou.to_s+gannen+","+(w_f_year+j).to_s; } end end j2=j2+1; };
数字から漢数字は
http://d.hatena.ne.jp/redcat_prog/20121118/1353217490
を利用させていただきました。
元号と西暦の換算は、
専門家ではないので微妙にいろいろ間違っているかもしれません。
未来と元年フラグをつけたもの
例
大化1,大化一,大化元,645,1
大化2,大化二,大化二,646,0
大化3,大化三,大化三,647,0
大化4,大化四,大化四,648,0
大化5,大化五,大化五,649,0
大化6,大化六,大化六,650,0
白雉1,白雉一,白雉元,650,1
白雉2,白雉二,白雉二,651,0
白雉3,白雉三,白雉三,652,0
…
平成2016,平成二千十六,平成二千十六,4004,0
平成2017,平成二千十七,平成二千十七,4005,0
平成2018,平成二千十八,平成二千十八,4006,0
平成2019,平成二千十九,平成二千十九,4007,0
平成2020,平成二千二十,平成二千二十,4008,0
平成2021,平成二千二十一,平成二千二十一,4009,0
平成2022,平成二千二十二,平成二千二十二,4010,0
平成2023,平成二千二十三,平成二千二十三,4011,0
平成2024,平成二千二十四,平成二千二十四,4012,0
平成2025,平成二千二十五,平成二千二十五,4013,0
require 'rubygems' require 'hpricot' require 'open-uri' def num_to_k(n) number = 0..9 kanji = ["","一","二","三","四","五","六","七","八","九"] num_kanji = Hash[number.zip(kanji)] digit = [1000,100,10] # digit = (1..3).map{ |i| 10 ** i }.reverse kanji_keta = ["千","百","十"] num_kanji_keta = Hash[digit.zip(kanji_keta)] num = n str = "" digit.each { |d| tmp = num / d str << (tmp == 0 ? "" : ((tmp == 1 ? "" : num_kanji[tmp]) + num_kanji_keta[d])) num %= d } str << num_kanji[num] return str end #text =Hpricot(open("gengou3.txt","r").read); text =Hpricot(open("http://ja.wikipedia.org/wiki/%E5%85%83%E5%8F%B7%E4%B8%80%E8%A6%A7_(%E6%97%A5%E6%9C%AC)","r").read); a=(text/"tr") j2=0; year=0; tmptmp=0 tmptmp2=0 tmptmp3=0 a.each{|i| if j2==2 then i2=((i/"th")/"a") gengou=i2.inner_text # puts i; if gengou.length>=1 then year=(i/"td")[3].inner_text.gsub("年","").to_i #year=(i/"td")[3].inner_text # puts year w_f_year=((i/"td")/"a")[2].inner_text.gsub("年","").to_i year.times{|j| flg=0; gannen=num_to_k(j+1); if j==0 then gannen="元" flg=1 end # puts w_f_year puts gengou.to_s+(j+1).to_s+","+gengou.to_s+num_to_k(j+1).to_s+","+gengou.to_s+gannen+","+(w_f_year+j).to_s+","+flg.to_s; } end end if j2>=3 then # puts i; i2=((i/"th")/"a") if i2 != nil then gengou=i2.inner_text end # puts i; if gengou.length>=1 then if (i/"td")[3] != nil then year=(i/"td")[3].inner_text.gsub("年","") end # year=(i/"td")[3].inner_text #: puts year # w_f_year=((i/"td")/"a")[2].inner_text.gsub("年","").to_i #w_f_year=((i/"td")/"a")[2].inner_text.gsub("年","").to_i if ((i/"td"))[1] then w_f_year=((i/"td"))[1].inner_text.chomp.gsub("\n","").gsub(/.*((.*)).*/,'\1') # w_f_year.chomp.gsub!(/年.*/,"") year=year.to_i w_f_year=w_f_year.gsub(/年.*/,"").to_i end # puts year year.times{|j| flg=0; gannen=num_to_k(j+1); if j==0 then gannen="元" flg=1; end # puts w_f_year puts gengou.to_s+(j+1).to_s+","+gengou.to_s+num_to_k(j+1).to_s+","+gengou.to_s+gannen+","+(w_f_year+j).to_s+","+flg.to_s ; tmptmp=j; tmptmp2=gengou; tmptmp3=w_f_year } end end j2=j2+1; }; year=2000 gengou=tmptmp2 w_f_year=tmptmp3 year.times{|j| gannen=num_to_k(j+tmptmp+2); flg=0; # puts w_f_year puts gengou.to_s+(tmptmp+j+2).to_s+","+gengou.to_s+num_to_k(j+tmptmp+2).to_s+","+gengou.to_s+gannen+","+(w_f_year+tmptmp+1+j).to_s+","+flg.to_s ; }