diff -upr mlterm-3.6.1.org/mlterm/ml_screen.c mlterm-3.6.1.cjk_word_separate_patch/mlterm/ml_screen.c --- mlterm-3.6.1.org/mlterm/ml_screen.c 2015-12-15 21:24:59.000000000 +0900 +++ mlterm-3.6.1.cjk_word_separate_patch/mlterm/ml_screen.c 2016-02-20 15:40:29.000000000 +0900 @@ -6,6 +6,7 @@ #include /* abs */ #include +#include #include #include /* malloc/free */ #include /* strdup */ @@ -60,28 +61,135 @@ is_word_separator( ml_char_t * ch ) { - char * p ; - char c ; - - if( ml_char_cs(ch) != US_ASCII) - { - return 0 ; - } - - p = word_separators ; - c = ml_char_code(ch) ; - - while( *p) - { - if( c == *p) - { - return 1 ; + switch (ml_char_cs(ch)) { + case ISO646_IRV: + case ISO646_EN: + case US_ASCII: + case JISX0201_ROMAN: + { + const char c = ml_char_code(ch); + char* p = word_separators; + for (; *p; ++ p) { + if (c == *p) { + return 1; + } + } + return 0; } + break; - p ++ ; + case JISX0208_1983: + case JISX0208_1983_MAC_EXT: + case JISX0208_1990: + case JISX0212_1990: + case JISX0213_2000_1: + case JISX0213_2000_2: + { + const uint16_t c = ml_char_code(ch) & 0x7F7FUL; + if (c <= 0x227F) { + switch (c) { + default: + return -2; /* Sign */ + case 0x212B: + case 0x212C: + case 0x2133: + case 0x2134: + case 0x2135: + case 0x2136: + case 0x2137: + case 0x2138: + case 0x2139: + case 0x213A: + case 0x213C: + return -1; /* JP */ + } + } else if (c <= 0x237F) { + return -3; /* English */ + } else if (c <= 0x257F) { + return -1; /* JP */ + } else if (c <= 0x267F) { + return -4; /* Greek */ + } else if (c <= 0x277F) { + return -5; /* Cyrillic */ + } else if (c <= 0x2F7F) { + return -6; /* Sign, Extended */ + } + return -1; /* JP */ + } + break; + + case ISO10646_UCS2_1: + case ISO10646_UCS4_1: + { + const uint32_t c = ml_char_code(ch); + if (c <= 0x0033FF) { + if (c <= 0x00037F) { + return -16; /* ? */ + } else if (c <= 0x0003FF) { + return -15; /* ? */ + } else if (c <= 0x00052F) { + return -14; /* ? */ + } else if (c <= 0x0006FF) { + return -13; /* ? */ + } else if (c <= 0x0008FF) { + return -12; /* ? */ + } else if (c <= 0x000FFF) { + return -11; /* Thai */ + } else if (c <= 0x0010FF) { + return -10; /* ? */ + } else if (c <= 0x0011FF) { + return -1; /* Korea */ + } else if (c <= 0x0016FF) { + return -9; /* ? */ + } else if (c <= 0x001CFF) { + return -6; /* ? */ + } else if (c <= 0x001DFF) { + return -8; /* ? */ + } else if (c <= 0x001FFF) { + return -7; /* German? */ + } else if (c <= 0x00303F) { + return -6; /* Sign */ + } else if (c <= 0x0030FF) { + return -1; /* JP */ + } else if (c <= 0x00318F) { + return -1; /* Korea */ + } else if (c <= 0x0031EF) { + return -6; /* Sign */ + } else if (c <= 0x0031FF) { + return -1; /* JP */ + } + return -6; /* Sign */ + } else if (c <= 0x004DFF) { + return -1; /* CJK KANJI Extended-A */ + } else if (c <= 0x009FFF) { + return -1; /* CJK KANJI */ + } else if (c <= 0x00ABFF) { + return -17; /* ? */ + } else if (c <= 0x00D7AF) { + return -1; /* Korea, Precomposed Character */ + } else if (c <= 0x00F8FF) { + return -17; /* ? */ + } else if (c <= 0x00FAFF) { + return -1; /* CJK KANJI Compatible */ + } else if (c <= 0x00FEFF) { + return -17; /* ? */ + } else if (c <= 0x00FFFF) { + return -3; /* Full Width */ + } else if (c <= 0x01FFFF) { + return -17; /* ? */ + } else if (c <= 0x02A6FF) { + return -1; /* CJK KANJI Extended-B */ + } else if (c <= 0x02F7FF) { + return -17; /* ? */ + } else if (c <= 0x02FA1F) { + return -1; /* CJK KANJI Additional */ + } + return -17; /* ? */ + } + break; } - return 0 ; + return 0; } @@ -1624,6 +1732,8 @@ ml_screen_get_word_region( ml_line_t * base_line ; ml_char_t * ch ; int flag ; + int flag_detail ; + int flag_detail_tmp ; if( ( base_line = ml_screen_get_line( screen , base_row)) == NULL || ml_line_is_empty( base_line)) @@ -1631,7 +1741,8 @@ ml_screen_get_word_region( return 0 ; } - if( is_word_separator( ml_char_at( base_line , base_char_index))) + flag_detail = is_word_separator( ml_char_at( base_line , base_char_index)); + if( 0 < flag_detail) { *beg_char_index = base_char_index ; *end_char_index = base_char_index ; @@ -1672,7 +1783,8 @@ ml_screen_get_word_region( ch = ml_char_at( line , char_index) ; - if( is_word_separator(ch) || flag != ml_char_is_fullwidth( ch)) + flag_detail_tmp = is_word_separator(ch); + if( 0 < flag_detail_tmp || flag != ml_char_is_fullwidth( ch) || flag_detail != flag_detail_tmp) { *beg_char_index = char_index + 1 ; @@ -1712,7 +1824,8 @@ ml_screen_get_word_region( ch = ml_char_at( line , char_index) ; - if( is_word_separator(ch) || flag != ml_char_is_fullwidth( ch)) + flag_detail_tmp = is_word_separator(ch); + if( 0 < flag_detail_tmp || flag != ml_char_is_fullwidth( ch) || flag_detail != flag_detail_tmp) { *end_char_index = char_index - 1 ;