#!/usr/bin/perl -- # # Anthy: Converter, from OCHAIRE to corpus.txt. # Sat,16 May,2009 # Copyright(C)2009 G-HAL (fenix.ne.jp) # use encoding "EUC-JP"; use strict; { my $output_mode = ""; while () { chomp( $_ ); my $input = $_; if ($input =~ /^--- (\w*)$/i) { $output_mode = $1; } elsif ($output_mode eq "OCHAIRE") { if ($input =~ /^\+([^ ]+) ([0-9]+) (.+?) ([-]*[0-9]+) ([-]*[0-9]+) ([-]*[0-9]+) ([-]*[0-9]+) ([-]*[0-9]+) (T[0-9]+ F[0-9]+)$/) { my $ochaire_key = $1; my $ochaire_len = $2; my $ochaire_core = $3; my $ochaire_ka = $4; my $ochaire_wI = $5; my $ochaire_woI = $6; my $ochaire_woD = $7; my $ochaire_wD = $8; if ((2 == $ochaire_len) and (0 != $ochaire_wI) and (0 != $ochaire_wD)) { if ($ochaire_core =~ /^([0-9]+) "([^"]+)" ([0-9]+) "([^"]+)"$/) { my $p1_len = $1; my $p1_cand = $2; my $p2_len = $3; my $p2_cand = $4; print "|". substr($ochaire_key,0,$p1_len) ."|". substr($ochaire_key,$p1_len,$p2_len) ."| |". $p1_cand ."|". $p2_cand ."|\n"; } } } } } exit 0; } __END__ # [ EOF ]