#!/usr/bin/perl -- # # Anthy: Converter, from udict to ucdic. # Tue,20 Oct,2009 # Sat,31 Oct,2009 - Sun,01 Nov,2009 # Copyright(C)2009 G-HAL (fenix.ne.jp) # # DO NOT set an encoding, because we want to a function length() returns a byte length. #use encoding "EUC-JP"; use strict; { my $line_number = 0; my $off = 0; my $base_yomi_indep; my $base_yomi_dep ; my $base_cand_indep; my $base_wt_indep ; while () { chomp( $_ ); my $input = $_; ++$line_number; if ($input =~ /^#/i) { print $input ."\n"; } elsif ($input =~ /^-/i) { $off = 0; print "#". $input ."\n"; } else { if ($input =~ /^([^ ]+)[\s]+\(([^ ]*)\)+[\s]+([^ ]+)[\s]+([^ ]+)/) { my $yomi_indep = $1; my $yomi_dep = $2; my $wt = $3; my $cand_indep = $4; ++$off; if (length($yomi_dep) < 1) { $yomi_dep = "*"; } if ($off < 2) { $base_yomi_indep = $yomi_indep; $base_yomi_dep = $yomi_dep; $base_cand_indep = $cand_indep; $base_wt_indep = $wt; } else { my $head_str = "- 3"; # my $lefthalf_str = "$yomi_indep $yomi_dep $base_yomi_indep $base_yomi_dep"; # my $righthalf_str = "$cand_indep $yomi_dep $base_cand_indep $base_yomi_dep"; my $lefthalf_str = "$yomi_indep * $base_yomi_indep *"; my $righthalf_str = "$cand_indep * $base_cand_indep *"; my $wt_str = "$wt * $base_wt_indep *"; my $tail_str = "\\make_reverse"; my $tab1_len = (40 - (int((8 + length($lefthalf_str)) / 8) * 8)) / 8; if ($tab1_len < 0) { $tab1_len = 0; } my $tab2_len = (40 - (int((8 + length($righthalf_str)) / 8) * 8)) / 8; if ($tab2_len < 0) { $tab2_len = 0; } my $tab3_len = (24 - (int((8 + length($wt_str)) / 8) * 8)) / 8; if ($tab3_len < 0) { $tab3_len = 0; } print $head_str ."\t". $lefthalf_str . ("\t" x $tab1_len) . "\t" . $righthalf_str . ("\t" x $tab2_len) . "\t" . $wt_str . ("\t" x $tab3_len) . "\t" . $tail_str . "\n"; } } else { print "Illigal Line: $line_number\n"; } } } exit 0; } __END__ # [ EOF ]