修正版:
次のルールでカタカナをアルファベットに変換
カタカナの「ニ」と漢字の「二」の
違いに注意。
また、アルファベット3文字を優先的に
置換を行う。
katakana.kana
ャ,lya
ュ,lyu
ョ,lyo
ヮ,lwa
ッ,ltu
ヴ,va
ヵ,lka
ヶ,lke
ア,_a
イ,_i
ウ,_u
エ,_e
オ,_o
カ,ka
キ,ki
ク,ku
ケ,ke
コ,ko
サ,sa
シ,si
ス,su
セ,se
ソ,so
タ,ta
チ,ti
ツ,tu
テ,te
ト,to
ナ,na
ニ,ni
ヌ,nu
ネ,ne
ノ,no
ハ,ha
ヒ,hi
フ,hu
へ,he
ホ,ho
マ,ma
ミ,mi
ム,mu
メ,me
モ,mo
ヤ,ya
ユ,yu
ヨ,yo
ラ,ra
リ,ri
ル,ru
レ,re
ロ,ro
ワ,wa
ヲ,wo
ン,nn
ァ,la
ィ,li
ゥ,lu
ェ,le
ォ,lo
ガ,ga
ギ,gi
グ,gu
ゲ,ge
ゴ,go
ザ,za
ジ,zi
ズ,zu
ゼ,ze
ゾ,zo
ダ,da
ヂ,di
ヅ,du
デ,de
ド,do
バ,ba
ビ,bi
ブ,bu
ベ,be
ボ,bo
パ,pa
ピ,pi
プ,pu
ぺ,pe
ポ,po
sedの場合sjisダメ文字の影響でうまくいかないい場合は、以下のrubyをつかう。
awk -F, 'BEGIN{print "sed \""}{print "s\/"$1"\/"$2"/g"";"}END{print "\""}' ka
takana.kana > kana.sed
sedでsjisダメ文字の影響でうまくいかない場合は、以下のrubyをつかう。
ruby -KSとして実行すのがポイント。
例えば
ruby -KS katakanatoalpha.rb
KSで実行する。
!#/bin/ruby -KS a=File.open(ARGV[0]);a.each{|str| str=str.gsub(/ア/,"_a") str=str.gsub(/イ/,"_i") str=str.gsub(/ウ/,"_u") str=str.gsub(/エ/,"_e") str=str.gsub(/オ/,"_o") str=str.gsub(/カ/,"ka") str=str.gsub(/キ/,"ki") str=str.gsub(/ク/,"ku") str=str.gsub(/ケ/,"ke") str=str.gsub(/コ/,"ko") str=str.gsub(/サ/,"sa") str=str.gsub(/シ/,"si") str=str.gsub(/ス/,"su") str=str.gsub(/セ/,"se") str=str.gsub(/ソ/,"so") str=str.gsub(/タ/,"ta") str=str.gsub(/チ/,"ti") str=str.gsub(/ツ/,"tu") str=str.gsub(/テ/,"te") str=str.gsub(/ト/,"to") str=str.gsub(/ナ/,"na") str=str.gsub(/ニ/,"ni") str=str.gsub(/ヌ/,"nu") str=str.gsub(/ネ/,"ne") str=str.gsub(/ノ/,"no") str=str.gsub(/ハ/,"ha") str=str.gsub(/ヒ/,"hi") str=str.gsub(/フ/,"hu") str=str.gsub(/へ/,"he") str=str.gsub(/ホ/,"ho") str=str.gsub(/マ/,"ma") str=str.gsub(/ミ/,"mi") str=str.gsub(/ム/,"mu") str=str.gsub(/メ/,"me") str=str.gsub(/モ/,"mo") str=str.gsub(/ヤ/,"ya") str=str.gsub(/ユ/,"yu") str=str.gsub(/ヨ/,"yo") str=str.gsub(/ラ/,"ra") str=str.gsub(/リ/,"ri") str=str.gsub(/ル/,"ru") str=str.gsub(/レ/,"re") str=str.gsub(/ロ/,"ro") str=str.gsub(/ワ/,"wa") str=str.gsub(/ヲ/,"wo") str=str.gsub(/ン/,"nn") str=str.gsub(/ァ/,"la") str=str.gsub(/ィ/,"li") str=str.gsub(/ゥ/,"lu") str=str.gsub(/ェ/,"le") str=str.gsub(/ォ/,"lo") str=str.gsub(/ガ/,"ga") str=str.gsub(/ギ/,"gi") str=str.gsub(/グ/,"gu") str=str.gsub(/ゲ/,"ge") str=str.gsub(/ゴ/,"go") str=str.gsub(/ザ/,"za") str=str.gsub(/ジ/,"zi") str=str.gsub(/ズ/,"zu") str=str.gsub(/ゼ/,"ze") str=str.gsub(/ゾ/,"zo") str=str.gsub(/ダ/,"da") str=str.gsub(/ヂ/,"di") str=str.gsub(/ヅ/,"du") str=str.gsub(/デ/,"de") str=str.gsub(/ド/,"do") str=str.gsub(/バ/,"ba") str=str.gsub(/ビ/,"bi") str=str.gsub(/ブ/,"bu") str=str.gsub(/ベ/,"be") str=str.gsub(/ボ/,"bo") str=str.gsub(/パ/,"pa") str=str.gsub(/ピ/,"pi") str=str.gsub(/プ/,"pu") str=str.gsub(/ぺ/,"pe") str=str.gsub(/ポ/,"po") str=str.gsub(/ャ/,"lya") str=str.gsub(/ュ/,"lyu") str=str.gsub(/ョ/,"lyo") str=str.gsub(/ヮ/,"lwa") str=str.gsub(/ッ/,"ltu") str=str.gsub(/ヴ/,"va") str=str.gsub(/ヵ/,"lka") str=str.gsub(/ヶ/,"lke") puts str}
$ awk -F, 'BEGIN{print "a=File.open(ARGV[0]);a.each{|str|"}{print "str=str.gsub
(\/"$1"\/,\""$2"\")"}END{print "puts str}"}' katakana.kana > katakana.rb
awk -F, 'BEGIN{print "sed \""}{print "s\/"$1"\/"$2"/g"";"}END{print "\""}' ka
takana.kana > kana.sed
$ awk -F, 'BEGIN{print "a=File.open(ARGV[0]);a.each{|str|"}{print "str=str.gsub
(\/"$1"\/,\""$2"\")"}END{print "puts str}"}' katakana.kana > katakana.rb
デコード
アルファベット→カタカナ
rubyを作るスクリプト
cat henkan.sh wk -F, 'BEGIN{print "a=File.open(ARGV[0]);a.each{|str|"}{print "str=str.gsub (\ "$2"\/,\""$1"\")"}END{print "puts str}"}' katakana.kana > katakana.rb
$ cat katakana.rb a=File.open(ARGV[0]);a.each{|str| str=str.gsub (/lya/,"ャ") str=str.gsub (/lyu/,"ュ") str=str.gsub (/lyo/,"ョ") str=str.gsub (/lwa/,"ヮ") str=str.gsub (/ltu/,"ッ") str=str.gsub (/va/,"ヴ") str=str.gsub (/lka/,"ヵ") str=str.gsub (/lke/,"ヶ") str=str.gsub (/_a/,"ア") str=str.gsub (/_i/,"イ") str=str.gsub (/_u/,"ウ") str=str.gsub (/_e/,"エ") str=str.gsub (/_o/,"オ") str=str.gsub (/ka/,"カ") str=str.gsub (/ki/,"キ") str=str.gsub (/ku/,"ク") str=str.gsub (/ke/,"ケ") str=str.gsub (/ko/,"コ") str=str.gsub (/sa/,"サ") str=str.gsub (/si/,"シ") str=str.gsub (/su/,"ス") str=str.gsub (/se/,"セ") str=str.gsub (/so/,"ソ") str=str.gsub (/ta/,"タ") str=str.gsub (/ti/,"チ") str=str.gsub (/tu/,"ツ") str=str.gsub (/te/,"テ") str=str.gsub (/to/,"ト") str=str.gsub (/na/,"ナ") str=str.gsub (/ni/,"ニ") str=str.gsub (/nu/,"ヌ") str=str.gsub (/ne/,"ネ") str=str.gsub (/no/,"ノ") str=str.gsub (/ha/,"ハ") str=str.gsub (/hi/,"ヒ") str=str.gsub (/hu/,"フ") str=str.gsub (/he/,"へ") str=str.gsub (/ho/,"ホ") str=str.gsub (/ma/,"マ") str=str.gsub (/mi/,"ミ") str=str.gsub (/mu/,"ム") str=str.gsub (/me/,"メ") str=str.gsub (/mo/,"モ") str=str.gsub (/ya/,"ヤ") str=str.gsub (/yu/,"ユ") str=str.gsub (/yo/,"ヨ") str=str.gsub (/ra/,"ラ") str=str.gsub (/ri/,"リ") str=str.gsub (/ru/,"ル") str=str.gsub (/re/,"レ") str=str.gsub (/ro/,"ロ") str=str.gsub (/wa/,"ワ") str=str.gsub (/wo/,"ヲ") str=str.gsub (/nn/,"ン") str=str.gsub (/la/,"ァ") str=str.gsub (/li/,"ィ") str=str.gsub (/lu/,"ゥ") str=str.gsub (/le/,"ェ") str=str.gsub (/lo/,"ォ") str=str.gsub (/ga/,"ガ") str=str.gsub (/gi/,"ギ") str=str.gsub (/gu/,"グ") str=str.gsub (/ge/,"ゲ") str=str.gsub (/go/,"ゴ") str=str.gsub (/za/,"ザ") str=str.gsub (/zi/,"ジ") str=str.gsub (/zu/,"ズ") str=str.gsub (/ze/,"ゼ") str=str.gsub (/zo/,"ゾ") str=str.gsub (/da/,"ダ") str=str.gsub (/di/,"ヂ") str=str.gsub (/du/,"ヅ") str=str.gsub (/de/,"デ") str=str.gsub (/do/,"ド") str=str.gsub (/ba/,"バ") str=str.gsub (/bi/,"ビ") str=str.gsub (/bu/,"ブ") str=str.gsub (/be/,"ベ") str=str.gsub (/bo/,"ボ") str=str.gsub (/pa/,"パ") str=str.gsub (/pi/,"ピ") str=str.gsub (/pu/,"プ") str=str.gsub (/pe/,"ぺ") str=str.gsub (/po/,"ポ") puts str}