wikipediaから元号と西暦の換算表を作る

wikipediaから元号と西暦の換算表を作る
元号一覧(日本)のページをスプレイピング
http://ja.wikipedia.org/wiki/%E5%85%83%E5%8F%B7%E4%B8%80%E8%A6%A7_(%E6%97%A5%E6%9C%AC)
出力例

大化1,大化一,大化元,645
大化2,大化二,大化二,646
大化3,大化三,大化三,647
大化4,大化四,大化四,648
大化5,大化五,大化五,649
大化6,大化六,大化六,650
白雉1,白雉一,白雉元,650
白雉2,白雉二,白雉二,651
白雉3,白雉三,白雉三,652
.
.
.
平成21,平成二十一,平成二十一,2009
平成22,平成二十二,平成二十二,2010
平成23,平成二十三,平成二十三,2011
平成24,平成二十四,平成二十四,2012
平成25,平成二十五,平成二十五,2013


ソース:

require 'rubygems'
require 'hpricot'
require 'open-uri'

def num_to_k(n)
  number = 0..9
  kanji = ["","","","","","","","","",""]
  num_kanji = Hash[number.zip(kanji)]
  digit = [1000,100,10]
  # digit = (1..3).map{ |i| 10 ** i }.reverse
  kanji_keta = ["","",""]
  num_kanji_keta = Hash[digit.zip(kanji_keta)]
  num = n
  str = ""
  digit.each { |d|
    tmp = num / d
    str << (tmp == 0 ? "" : ((tmp == 1 ? "" : num_kanji[tmp]) + num_kanji_keta[d]))
    num %= d
  }
  str << num_kanji[num]
  return str
end




#text =Hpricot(open("gengou3.txt","r").read);

text =Hpricot(open("http://ja.wikipedia.org/wiki/%E5%85%83%E5%8F%B7%E4%B8%80%E8%A6%A7_(%E6%97%A5%E6%9C%AC)","r").read);


a=(text/"tr")
j2=0;
year=0;
a.each{|i|

	if j2==2 then


		i2=((i/"th")/"a")

		gengou=i2.inner_text
	#	puts i;

		if gengou.length>=1 then
			year=(i/"td")[3].inner_text.gsub("","").to_i

			#year=(i/"td")[3].inner_text
#			puts year
			w_f_year=((i/"td")/"a")[2].inner_text.gsub("","").to_i


			year.times{|j|
				

				gannen=num_to_k(j+1);
				if j==0 then
					gannen=""
				end 
#				puts w_f_year
				puts gengou.to_s+(j+1).to_s+","+gengou.to_s+num_to_k(j+1).to_s+","+gengou.to_s+gannen+","+(w_f_year+j).to_s;
			}
		end
	end

	if j2>=3 then
#	puts i;
		i2=((i/"th")/"a")
		if i2 != nil then
			gengou=i2.inner_text
		end
	#	puts i;
		if gengou.length>=1 then
			if (i/"td")[3] != nil then			
				year=(i/"td")[3].inner_text.gsub("","")
			end
#			year=(i/"td")[3].inner_text
#:			puts year

#			w_f_year=((i/"td")/"a")[2].inner_text.gsub("年","").to_i

			#w_f_year=((i/"td")/"a")[2].inner_text.gsub("年","").to_i
			if ((i/"td"))[1] then
				w_f_year=((i/"td"))[1].inner_text.chomp.gsub("\n","").gsub(/.*(.*).*/,'\1') #			w_f_year.chomp.gsub!(/年.*/,"")
				year=year.to_i
				w_f_year=w_f_year.gsub(/.*/,"").to_i
			end
#			puts year
			year.times{|j|
				

				gannen=num_to_k(j+1);
				if j==0 then
					gannen=""
				end 
#				puts w_f_year
				puts gengou.to_s+(j+1).to_s+","+gengou.to_s+num_to_k(j+1).to_s+","+gengou.to_s+gannen+","+(w_f_year+j).to_s;
			}
		end
	end
	j2=j2+1;
};

数字から漢数字は
http://d.hatena.ne.jp/redcat_prog/20121118/1353217490
を利用させていただきました。

元号と西暦の換算は、
専門家ではないので微妙にいろいろ間違っているかもしれません。


未来と元年フラグをつけたもの

大化1,大化一,大化元,645,1
大化2,大化二,大化二,646,0
大化3,大化三,大化三,647,0
大化4,大化四,大化四,648,0
大化5,大化五,大化五,649,0
大化6,大化六,大化六,650,0
白雉1,白雉一,白雉元,650,1
白雉2,白雉二,白雉二,651,0
白雉3,白雉三,白雉三,652,0

平成2016,平成二千十六,平成二千十六,4004,0
平成2017,平成二千十七,平成二千十七,4005,0
平成2018,平成二千十八,平成二千十八,4006,0
平成2019,平成二千十九,平成二千十九,4007,0
平成2020,平成二千二十,平成二千二十,4008,0
平成2021,平成二千二十一,平成二千二十一,4009,0
平成2022,平成二千二十二,平成二千二十二,4010,0
平成2023,平成二千二十三,平成二千二十三,4011,0
平成2024,平成二千二十四,平成二千二十四,4012,0
平成2025,平成二千二十五,平成二千二十五,4013,0

require 'rubygems'
require 'hpricot'
require 'open-uri'

def num_to_k(n)
  number = 0..9
  kanji = ["","","","","","","","","",""]
  num_kanji = Hash[number.zip(kanji)]
  digit = [1000,100,10]
  # digit = (1..3).map{ |i| 10 ** i }.reverse
  kanji_keta = ["","",""]
  num_kanji_keta = Hash[digit.zip(kanji_keta)]
  num = n
  str = ""
  digit.each { |d|
    tmp = num / d
    str << (tmp == 0 ? "" : ((tmp == 1 ? "" : num_kanji[tmp]) + num_kanji_keta[d]))
    num %= d
  }
  str << num_kanji[num]
  return str
end




#text =Hpricot(open("gengou3.txt","r").read);

text =Hpricot(open("http://ja.wikipedia.org/wiki/%E5%85%83%E5%8F%B7%E4%B8%80%E8%A6%A7_(%E6%97%A5%E6%9C%AC)","r").read);


a=(text/"tr")
j2=0;
year=0;
tmptmp=0
tmptmp2=0
tmptmp3=0
a.each{|i|

        if j2==2 then


                i2=((i/"th")/"a")

                gengou=i2.inner_text
        #       puts i;

                if gengou.length>=1 then
                        year=(i/"td")[3].inner_text.gsub("","").to_i

                        #year=(i/"td")[3].inner_text
#                       puts year
                        w_f_year=((i/"td")/"a")[2].inner_text.gsub("","").to_i


                        year.times{|j|

                                flg=0;
                                gannen=num_to_k(j+1);
                                if j==0 then
                                        gannen=""
                                        flg=1
                                end
#                               puts w_f_year
                                puts gengou.to_s+(j+1).to_s+","+gengou.to_s+num_to_k(j+1).to_s+","+gengou.to_s+gannen+","+(w_f_year+j).to_s+","+flg.to_s;
                        }
                end
        end

        if j2>=3 then
#       puts i;
                i2=((i/"th")/"a")
                if i2 != nil then
                        gengou=i2.inner_text
                end
        #       puts i;
                if gengou.length>=1 then
                        if (i/"td")[3] != nil then
                                year=(i/"td")[3].inner_text.gsub("","")
                        end
#                       year=(i/"td")[3].inner_text
#:                      puts year

#                       w_f_year=((i/"td")/"a")[2].inner_text.gsub("年","").to_i

                        #w_f_year=((i/"td")/"a")[2].inner_text.gsub("年","").to_i
                        if ((i/"td"))[1] then
                                w_f_year=((i/"td"))[1].inner_text.chomp.gsub("\n","").gsub(/.*(.*).*/,'\1') #                       w_f_year.chomp.gsub!(/年.*/,"")
                                year=year.to_i
                                w_f_year=w_f_year.gsub(/.*/,"").to_i
                        end
#                       puts year
                        year.times{|j|

                                flg=0;
                                gannen=num_to_k(j+1);
                                if j==0 then
                                        gannen=""
                                        flg=1;
                                end
#                               puts w_f_year
                                puts gengou.to_s+(j+1).to_s+","+gengou.to_s+num_to_k(j+1).to_s+","+gengou.to_s+gannen+","+(w_f_year+j).to_s+","+flg.to_s
;
                                tmptmp=j;
                                tmptmp2=gengou;
                                tmptmp3=w_f_year

                        }
                end
        end
        j2=j2+1;
};



year=2000
gengou=tmptmp2
w_f_year=tmptmp3
year.times{|j|

        gannen=num_to_k(j+tmptmp+2);
        flg=0;
        #                       puts w_f_year
        puts gengou.to_s+(tmptmp+j+2).to_s+","+gengou.to_s+num_to_k(j+tmptmp+2).to_s+","+gengou.to_s+gannen+","+(w_f_year+tmptmp+1+j).to_s+","+flg.to_s
        ;

}