diff -upr mlterm-2.9.4.org/mlterm/ml_screen.c mlterm-2.9.4.cjk_word_separate_patch/mlterm/ml_screen.c --- mlterm-2.9.4.org/mlterm/ml_screen.c 2006-10-24 19:10:42.000000000 +0900 +++ mlterm-2.9.4.cjk_word_separate_patch/mlterm/ml_screen.c 2009-12-25 00:00:00.000000000 +0900 @@ -5,6 +5,7 @@ #include "ml_screen.h" #include /* abs */ +#include #include #include /* malloc/free */ #include /* strdup */ @@ -57,28 +58,134 @@ is_word_separator( ml_char_t * ch ) { - char * p ; - char c ; - - if( ml_char_cs(ch) != US_ASCII) - { - return 0 ; - } - - p = word_separators ; - c = ml_char_bytes(ch)[0] ; - - while( *p) - { - if( c == *p) - { - return 1 ; + switch (ml_char_cs(ch)) { + case ISO646_IRV: + case ISO646_EN: + case US_ASCII: + case JISX0201_ROMAN: + { + const char c = ml_char_bytes(ch)[0]; + char* p = word_separators; + for (; *p; ++ p) { + if (c == *p) { + return 1; + } + } + return 0; } + break; - p ++ ; + case JISX0208_1983: + case JISX0208_1983_MAC_EXT: + case JISX0208_1990: + case JISX0212_1990: + case JISX0213_2000_1: + case JISX0213_2000_2: + { + const uint16_t c = ntohs(*((const uint16_t*)ml_char_bytes(ch))) & 0x7F7FUL; + if (c <= 0x227F) { + switch (c) { + default: + return -2; /* Sign */ + case 0x212B: + case 0x212C: + case 0x2133: + case 0x2134: + case 0x2135: + case 0x2136: + case 0x2137: + case 0x2138: + case 0x2139: + case 0x213A: + case 0x213C: + return -1; /* JP */ + } + } else if (c <= 0x237F) { + return -3; /* English */ + } else if (c <= 0x257F) { + return -1; /* JP */ + } else if (c <= 0x267F) { + return -4; /* Greek */ + } else if (c <= 0x277F) { + return -5; /* Cyrillic */ + } else if (c <= 0x2F7F) { + return -6; /* Sign, Extended */ + } + return -1; /* JP */ + } + break; + + case ISO10646_UCS2_1: + case ISO10646_UCS4_1: + { + const uint32_t c = ntohl(*((const uint32_t*)ml_char_bytes(ch))); + if (c <= 0x0033FF) { + if (c <= 0x00037F) { + return -16; /* ? */ + } else if (c <= 0x0003FF) { + return -15; /* ? */ + } else if (c <= 0x00052F) { + return -14; /* ? */ + } else if (c <= 0x0006FF) { + return -13; /* ? */ + } else if (c <= 0x0008FF) { + return -12; /* ? */ + } else if (c <= 0x000FFF) { + return -11; /* Thai */ + } else if (c <= 0x0010FF) { + return -10; /* ? */ + } else if (c <= 0x0011FF) { + return -1; /* Korea */ + } else if (c <= 0x0016FF) { + return -9; /* ? */ + } else if (c <= 0x001CFF) { + return -6; /* ? */ + } else if (c <= 0x001DFF) { + return -8; /* ? */ + } else if (c <= 0x001FFF) { + return -7; /* German? */ + } else if (c <= 0x00303F) { + return -6; /* Sign */ + } else if (c <= 0x0030FF) { + return -1; /* JP */ + } else if (c <= 0x00318F) { + return -1; /* Korea */ + } else if (c <= 0x0031EF) { + return -6; /* Sign */ + } else if (c <= 0x0031FF) { + return -1; /* JP */ + } + return -6; /* Sign */ + } else if (c <= 0x004DFF) { + return -1; /* CJK KANJI Extended-A */ + } else if (c <= 0x009FFF) { + return -1; /* CJK KANJI */ + } else if (c <= 0x00ABFF) { + return -17; /* ? */ + } else if (c <= 0x00D7AF) { + return -1; /* Korea, Precomposed Character */ + } else if (c <= 0x00F8FF) { + return -17; /* ? */ + } else if (c <= 0x00FAFF) { + return -1; /* CJK KANJI Compatible */ + } else if (c <= 0x00FEFF) { + return -17; /* ? */ + } else if (c <= 0x00FFFF) { + return -3; /* Full Width */ + } else if (c <= 0x01FFFF) { + return -17; /* ? */ + } else if (c <= 0x02A6FF) { + return -1; /* CJK KANJI Extended-B */ + } else if (c <= 0x02F7FF) { + return -17; /* ? */ + } else if (c <= 0x02FA1F) { + return -1; /* CJK KANJI Additional */ + } + return -17; /* ? */ + } + break; } - - return 0 ; + return 0; } /* @@ -1442,6 +1549,8 @@ ml_screen_get_word_region( ml_line_t * base_line ; ml_char_t * ch ; int flag ; + int flag_detail ; + int flag_detail_tmp ; if( ( base_line = ml_screen_get_line( screen , base_row)) == NULL || ml_line_is_empty( base_line)) @@ -1449,7 +1558,8 @@ ml_screen_get_word_region( return 0 ; } - if( is_word_separator( ml_char_at( base_line , base_char_index))) + flag_detail = is_word_separator( ml_char_at( base_line , base_char_index)); + if( 0 < flag_detail) { *beg_char_index = base_char_index ; *end_char_index = base_char_index ; @@ -1490,7 +1600,8 @@ ml_screen_get_word_region( ch = ml_char_at( line , char_index) ; - if( is_word_separator(ch) || flag != ml_char_is_biwidth( ch)) + flag_detail_tmp = is_word_separator(ch); + if( 0 < flag_detail_tmp || flag != ml_char_is_biwidth( ch) || flag_detail != flag_detail_tmp) { *beg_char_index = char_index + 1 ; @@ -1530,7 +1641,8 @@ ml_screen_get_word_region( ch = ml_char_at( line , char_index) ; - if( is_word_separator(ch) || flag != ml_char_is_biwidth( ch)) + flag_detail_tmp = is_word_separator(ch); + if( 0 < flag_detail_tmp || flag != ml_char_is_biwidth( ch) || flag_detail != flag_detail_tmp) { *end_char_index = char_index - 1 ;