#!/usr/bin/perl -- # # Anthy user dictionay splitter Script # Thu,06 Sep,2007 # Sun,24 Aug,2008 # Sun,25 Jan,2009 # Tue,03 Feb,2009 # Sun,08 Feb,2009 - Mon,09 Feb,2009 # Wed,22 Apr,2009 # Sat,25 Apr,2009 # Wed,06 Jan,2010 # Wed,18 Aug,2010 # Copyright(C)2007-2010 G-HAL (fenix.ne.jp) # use strict; use IO::File; my $argc = @ARGV; my $debug = 0; my $dummy_file = "dummy"; my $output_file = $dummy_file; my $output_raw_file = $dummy_file; my $output_O1wIwD_file = $dummy_file; my $output_O1wIwoD_file = $dummy_file; my $output_O2wIwD_file = $dummy_file; my $output_O2wIwoD_file = $dummy_file; my $output_O2woIwD_file = $dummy_file; my $output_O2woIwoD_file = $dummy_file; my $output_O3wIwD_file = $dummy_file; my $output_O3wIwoD_file = $dummy_file; my $output_O3woIwD_file = $dummy_file; my $output_O3woIwoD_file = $dummy_file; my $output_Oe_file = $dummy_file; my $output_2I_file = $dummy_file; my $output_2D_file = $dummy_file; my $output_3I_file = $dummy_file; my $output_3D_file = $dummy_file; my $output_2ID_file = $dummy_file; sub close_all_files { if ($output_file ne $dummy_file) { close( OUT ); $output_file = $dummy_file; } if ($output_raw_file ne $dummy_file) { close( OUT_raw ); $output_raw_file = $dummy_file; } if ($output_O1wIwD_file ne $dummy_file) { close( OUT_O1wIwD ); $output_O1wIwD_file = $dummy_file; } if ($output_O1wIwoD_file ne $dummy_file) { close( OUT_O1wIwoD ); $output_O1wIwoD_file = $dummy_file; } if ($output_O2wIwD_file ne $dummy_file) { close( OUT_O2wIwD ); $output_O2wIwD_file = $dummy_file; } if ($output_O2wIwoD_file ne $dummy_file) { close( OUT_O2wIwoD ); $output_O2wIwoD_file = $dummy_file; } if ($output_O2woIwD_file ne $dummy_file) { close( OUT_O2woIwD ); $output_O2woIwD_file = $dummy_file; } if ($output_O2woIwoD_file ne $dummy_file) { close( OUT_O2woIwoD ); $output_O2woIwoD_file = $dummy_file; } if ($output_O3wIwD_file ne $dummy_file) { close( OUT_O3wIwD ); $output_O3wIwD_file = $dummy_file; } if ($output_O3wIwoD_file ne $dummy_file) { close( OUT_O3wIwoD ); $output_O3wIwoD_file = $dummy_file; } if ($output_O3woIwD_file ne $dummy_file) { close( OUT_O3woIwD ); $output_O3woIwD_file = $dummy_file; } if ($output_O3woIwoD_file ne $dummy_file) { close( OUT_O3woIwoD ); $output_O3woIwoD_file = $dummy_file; } if ($output_Oe_file ne $dummy_file) { close( OUT_Oe ); $output_Oe_file = $dummy_file; } if ($output_2I_file ne $dummy_file) { close( OUT_D_2I ); $output_2I_file = $dummy_file; } if ($output_2D_file ne $dummy_file) { close( OUT_D_2D ); $output_2D_file = $dummy_file; } if ($output_3I_file ne $dummy_file) { close( OUT_D_3I ); $output_3I_file = $dummy_file; } if ($output_3D_file ne $dummy_file) { close( OUT_D_3D ); $output_3D_file = $dummy_file; } if ($output_2ID_file ne $dummy_file) { close( OUT_D_2ID ); $output_2ID_file = $dummy_file; } return; } { if ($debug) { printf "%s:%d: DEBUG\n", __FILE__, __LINE__; } my $output_mode = ""; my $output_file_prepend = ""; if (1 <= $argc) { $output_file_prepend = @ARGV[0] ."."; } while () { if ($debug) { printf STDERR $_; } chomp( $_ ); my $input = $_; if ($input =~ /^--- (\w*)$/i) { close_all_files(); $output_mode = $1; $output_file = $output_file_prepend ."anthy.userdic.". $output_mode; $output_raw_file = $output_file_prepend ."anthy.userdic.RAW.". $output_mode; if ($debug) { printf "%s:%d: DEBUG\n", __FILE__, __LINE__; } if (!open(OUT,">> $output_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_raw,">> $output_raw_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if ($output_mode =~ /OCHAIRE/) { $output_O1wIwD_file = $output_file . "1.wIwD"; $output_O1wIwoD_file = $output_file . "1.wIwoD"; $output_O2wIwD_file = $output_file . "2.wIwD"; $output_O2wIwoD_file = $output_file . "2.wIwoD"; $output_O2woIwD_file = $output_file . "2.woIwD"; $output_O2woIwoD_file = $output_file . "2.woIwoD"; $output_O3wIwD_file = $output_file . "3.wIwD"; $output_O3wIwoD_file = $output_file . "3.wIwoD"; $output_O3woIwD_file = $output_file . "3.woIwD"; $output_O3woIwoD_file = $output_file . "3.woIwoD"; $output_Oe_file = $output_file . ".etc"; if (!open(OUT_O1wIwD,">> $output_O1wIwD_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_O1wIwoD,">> $output_O1wIwoD_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_O2wIwD,">> $output_O2wIwD_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_O2wIwoD,">> $output_O2wIwoD_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_O2woIwD,">> $output_O2woIwD_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_O2woIwoD,">> $output_O2woIwoD_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_O3wIwD,">> $output_O3wIwD_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_O3wIwoD,">> $output_O3wIwoD_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_O3woIwD,">> $output_O3woIwD_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_O3woIwoD,">> $output_O3woIwoD_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_Oe,">> $output_Oe_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; } if ($output_mode =~ /DATA_SAMPLING/) { $output_2I_file = $output_file . ".2I"; $output_2D_file = $output_file . ".2D"; $output_3I_file = $output_file . ".3I"; $output_3D_file = $output_file . ".3D"; $output_2ID_file = $output_file . "._2ID"; if (!open(OUT_2I,">> $output_2I_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_2D,">> $output_2D_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_3I,">> $output_3I_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_3D,">> $output_3D_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; if (!open(OUT_2ID,">> $output_2ID_file")) { printf "%s:%d: Can't open resultfile.\n", __FILE__, __LINE__; exit -1; }; } } else { print OUT_raw $input ."\n"; if ($output_mode eq "PREDICTION") { if ($input =~ /^([^ ]+) +[O]*([0-9]+) +("[^"]+")(.*?)(| T[0-9]+ F[0-9]+)$/) { print OUT $1 ." "; my $check = $3; while ($check =~ /^([^ ]+) +[O]*([0-9]+) +(.+)$/) { print OUT $1 ." "; $check = $3; } print OUT $check ."\n"; } else { printf "Syntax mismatch. PREDICTION:". $input ."\n"; } } elsif ($output_mode eq "CAND_HISTORY") { if ($input =~ /^([^ ]+)([ O0-9]+)("[^"]+")(.*?)(| T[0-9]+ F[0-9]+)$/) { print OUT $1 ." ". $3 ."\n"; } else { printf "Syntax mismatch. CAND_HISTORY:". $input ."\n"; } } elsif ($output_mode eq "PREFIX_HISTORY") { if ($input =~ /^([^ ]+)([ O0-9]+)("[^"]+")(.*?)(| T[0-9]+ F[0-9]+)$/) { print OUT $1 ." ". $3 ."\n"; } else { printf "Syntax mismatch. PREFIX_HISTORY:". $input ."\n"; } } elsif ($output_mode eq "INDEP_HISTORY") { if ($input =~ /^([^ ]+)([ O0-9]+)("[^"]+")(.*?)(| T[0-9]+ F[0-9]+)$/) { print OUT $1 ." ". $3 ."\n"; } else { printf "Syntax mismatch. INDEP_HISTORY:". $input ."\n"; } } elsif ($output_mode eq "SUFFIX_HISTORY") { if ($input =~ /^([^ ]+)([ O0-9]+)("[^"]+")(.*?)(| T[0-9]+ F[0-9]+)$/) { print OUT $1 ." ". $3 ."\n"; } else { printf "Syntax mismatch. SUFFIX_HISTORY:". $input ."\n"; } } elsif ($output_mode eq "OCHAIRE") { if ($input =~ /^([^ ]+ [0-9]+) +(.+?)(|( [-]*[0-9]+)* T[0-9]+ F[0-9]+)$/) { my $outbuf = $1 ." "; my $check = $2; my $stamp = $3; while ($check =~ /^ *([0-9]+) +("[^"]+")(.*)$/) { $outbuf = $outbuf . $2 ." "; $check = $3; } $outbuf = $outbuf . $check; print OUT $outbuf ."\n"; if ($input =~ /^([^ ]+ [0-9]+) +(.+?) (0|-1) (0|-1) (0|-1) (0|-1) (0|-1) (T[0-9]+ F[0-9]+)$/) { # Old-Style } elsif ($input =~ /^([^ ]+) ([0-9]+) +(.+?) ([-]*[0-9]+) ([-]*[0-9]+) ([-]*[0-9]+) ([-]*[0-9]+) ([-]*[0-9]+) (T[0-9]+ F[0-9]+)$/) { my $ochaire_len = $2; my $ochaire_wI = $5; my $ochaire_woI = $6; my $ochaire_woD = $7; my $ochaire_wD = $8; if (1 == $ochaire_len) { if (0 == $ochaire_woD) { #print OUT_O1wIwD $outbuf . " " . $stamp . "\n"; print OUT_O1wIwD $input ."\n"; } else { #print OUT_O1wIwoD $outbuf . " " . $stamp . "\n"; print OUT_O1wIwoD $input ."\n"; } } elsif (2 == $ochaire_len) { if (0 == $ochaire_wI) { if (0 == $ochaire_woD) { #print OUT_O2woIwD $outbuf . " " . $stamp . "\n"; print OUT_O2woIwD $input ."\n"; } else { #print OUT_O2woIwoD $outbuf . " " . $stamp . "\n"; print OUT_O2woIwoD $input ."\n"; } } else { if (0 == $ochaire_woD) { #print OUT_O2wIwD $outbuf . " " . $stamp . "\n"; print OUT_O2wIwD $input ."\n"; } else { #print OUT_O2wIwoD $outbuf . " " . $stamp . "\n"; print OUT_O2wIwoD $input ."\n"; } } } elsif (3 <= $ochaire_len) { if (0 == $ochaire_wI) { if (0 == $ochaire_woD) { #print OUT_O3woIwD $outbuf . " " . $stamp . "\n"; print OUT_O3woIwD $input ."\n"; } else { #print OUT_O3woIwoD $outbuf . " " . $stamp . "\n"; print OUT_O3woIwoD $input ."\n"; } } else { if (0 == $ochaire_woD) { #print OUT_O3wIwD $outbuf . " " . $stamp . "\n"; print OUT_O3wIwD $input ."\n"; } else { #print OUT_O3wIwoD $outbuf . " " . $stamp . "\n"; print OUT_O3wIwoD $input ."\n"; } } } } else { #print OUT_Oe $outbuf . " " . $stamp . "\n"; print OUT_Oe $input ."\n"; } } else { printf "Syntax mismatch. OCHAIRE:". $input ."\n"; } } elsif ($output_mode eq "DATA_SAMPLING") { if ($input =~ /^(.+?)(| T[0-9]+ F[0-9]+)$/) { my $core_data = $1; my $timestamp = $2; print OUT $core_data ."\n"; if ($core_data =~ /^\+2I_/) { print OUT_2I $core_data . $timestamp ."\n"; } elsif ($core_data =~ /^\+2D_/) { print OUT_2D $core_data . $timestamp ."\n"; } elsif ($core_data =~ /^\+3I_/) { print OUT_3I $core_data . $timestamp ."\n"; } elsif ($core_data =~ /^\+3D_/) { print OUT_3D $core_data . $timestamp ."\n"; } elsif ($core_data =~ /^\+2ID_/) { print OUT_2ID $core_data . $timestamp ."\n"; } } else { printf "Syntax mismatch. DATA_SAMPLING:". $input ."\n"; } } else { if ($input =~ /^(.+?)(| T[0-9]+ F[0-9]+)$/) { print OUT $1 ."\n"; } else { printf "Syntax mismatch. others:". $input ."\n"; } } } } close_all_files(); exit 0; } __END__ # [ EOF ]