Ticket #1882: checkcurlybrace-widechars-tests.diff
File checkcurlybrace-widechars-tests.diff, 19.0 KB (added by vitalif, 13 years ago) |
---|
-
lib/search.h
diff --git a/lib/search.h b/lib/search.h index ae38518..f59ee9d 100644
a b typedef struct mc_search_struct 91 91 off_t start_buffer; 92 92 /* some data for regexp */ 93 93 int num_results; 94 gboolean is_utf8; 94 95 mc_search_matchinfo_t *regex_match_info; 95 96 GString *regex_buffer; 96 97 #ifdef SEARCH_TYPE_PCRE -
lib/search/regex.c
diff --git a/lib/search/regex.c b/lib/search/regex.c index 97f5fa9..2d4e5fd 100644
a b mc_search_regex__get_token_by_num (const mc_search_t * lc_mc_search, gsize lc_in 384 384 385 385 static gboolean 386 386 mc_search_regex__replace_handle_esc_seq (const GString * replace_str, const gsize current_pos, 387 gsize * skip_len, int *ret , char *next_char)387 gsize * skip_len, int *ret) 388 388 { 389 389 char *curr_str = &(replace_str->str[current_pos]); 390 391 *next_char = *(curr_str + 1); 390 char c = *(curr_str + 1); 392 391 393 392 if (replace_str->len > current_pos + 2) 394 393 { 395 if ( *next_char== '{')394 if (c == '{') 396 395 { 397 396 for (*skip_len = 2; /* \{ */ 398 397 current_pos + *skip_len < replace_str->len 399 && (*(curr_str + *skip_len)) != '}'; (*skip_len)++); 400 if (current_pos + *skip_len < replace_str->len) /* } */ 398 && *(curr_str + *skip_len) >= '0' 399 && *(curr_str + *skip_len) <= '7'; (*skip_len)++); 400 if (current_pos + *skip_len < replace_str->len && 401 *(curr_str + *skip_len) == '}') 402 { 401 403 (*skip_len)++; 402 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ; 403 return FALSE; 404 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ; 405 return FALSE; 406 } 407 else 408 { 409 *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL; 410 return TRUE; 411 } 404 412 } 405 413 406 if ( *next_char== 'x')414 if (c == 'x') 407 415 { 408 416 *skip_len = 2; /* \x */ 409 *next_char= *(curr_str + 2);410 if ( *next_char== '{')417 c = *(curr_str + 2); 418 if (c == '{') 411 419 { 412 420 for (*skip_len = 3; /* \x{ */ 413 421 current_pos + *skip_len < replace_str->len 414 && (*(curr_str + *skip_len)) != '}'; (*skip_len)++); 415 if (current_pos + *skip_len < replace_str->len) 422 && g_ascii_isxdigit ((guchar) *(curr_str + *skip_len)); (*skip_len)++); 423 if (current_pos + *skip_len < replace_str->len && 424 *(curr_str + *skip_len) == '}') 425 { 416 426 (*skip_len)++; 417 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ; 418 return FALSE; 427 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ; 428 return FALSE; 429 } 430 else 431 { 432 *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL; 433 return TRUE; 434 } 419 435 } 420 else if (!g_ascii_isxdigit ((guchar) * next_char))436 else if (!g_ascii_isxdigit ((guchar) c)) 421 437 { 422 438 *skip_len = 2; /* \x without number behind */ 423 439 *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL; … … mc_search_regex__replace_handle_esc_seq (const GString * replace_str, const gsiz 425 441 } 426 442 else 427 443 { 428 *next_char= *(curr_str + 3);429 if (!g_ascii_isxdigit ((guchar) * next_char))444 c = *(curr_str + 3); 445 if (!g_ascii_isxdigit ((guchar) c)) 430 446 *skip_len = 3; /* \xH */ 431 447 else 432 448 *skip_len = 4; /* \xHH */ … … mc_search_regex__replace_handle_esc_seq (const GString * replace_str, const gsiz 436 452 } 437 453 } 438 454 439 if (strchr ("ntvbrfa", *next_char) != NULL)455 if (strchr ("ntvbrfa", c) != NULL) 440 456 { 441 457 *skip_len = 2; 442 458 *ret = REPLACE_PREPARE_T_ESCAPE_SEQ; … … mc_search_regex__process_replace_str (const GString * replace_str, const gsize c 489 505 490 506 if ((*curr_str == '\\') && (replace_str->len > current_pos + 1)) 491 507 { 492 char next_char;493 494 508 if (strutils_is_char_escaped (replace_str->str, curr_str)) 495 509 { 496 510 *skip_len = 1; … … mc_search_regex__process_replace_str (const GString * replace_str, const gsize c 505 519 } 506 520 507 521 if (!mc_search_regex__replace_handle_esc_seq 508 (replace_str, current_pos, skip_len, &ret , &next_char))522 (replace_str, current_pos, skip_len, &ret)) 509 523 return ret; 510 524 511 525 ret = REPLACE_PREPARE_T_REPLACE_FLAG; 512 526 *skip_len += 2; 513 switch ( next_char)527 switch (*(curr_str + 1)) 514 528 { 515 529 case 'U': 516 530 *replace_flags |= REPLACE_T_UPP_TRANSFORM; … … mc_search_regex__process_append_str (GString * dest_str, const char *from, gsize 604 618 605 619 static void 606 620 mc_search_regex__process_escape_sequence (GString * dest_str, const char *from, gsize len, 607 replace_transform_type_t * replace_flags )621 replace_transform_type_t * replace_flags, gboolean is_utf8) 608 622 { 609 623 gsize i = 0; 610 char c = 0; 624 unsigned int c = 0; 625 char b; 611 626 612 627 if (len == (gsize) (-1)) 613 628 len = strlen (from); … … mc_search_regex__process_escape_sequence (GString * dest_str, const char *from, 634 649 break; 635 650 } 636 651 } 637 else if (from[i] >= '0' && from[i] <= ' 9')652 else if (from[i] >= '0' && from[i] <= '7') 638 653 for (; i < len && from[i] >= '0' && from[i] <= '7'; i++) 639 654 c = c * 8 + from[i] - '0'; 640 655 else … … mc_search_regex__process_escape_sequence (GString * dest_str, const char *from, 667 682 return; 668 683 } 669 684 } 670 g_string_append_len (dest_str, &c, 1); 685 if (c < 0x80 || !is_utf8) 686 { 687 b = (char)c; 688 g_string_append_len (dest_str, &b, 1); 689 } 690 else if (c < 0x800) 691 { 692 b = 0xC0|(c>>6); 693 g_string_append_len (dest_str, &b, 1); 694 b = 0x80|(c&0x3F); 695 g_string_append_len (dest_str, &b, 1); 696 } 697 else if (c < 0x10000) 698 { 699 b = 0xE0|(c>>12); 700 g_string_append_len (dest_str, &b, 1); 701 b = 0x80|((c>>6)&0x3F); 702 g_string_append_len (dest_str, &b, 1); 703 b = 0x80|(c&0x3F); 704 g_string_append_len (dest_str, &b, 1); 705 } 706 else if (c < 0x10FFFF) 707 { 708 b = 0xF0|(c>>16); 709 g_string_append_len (dest_str, &b, 1); 710 b = 0x80|((c>>12)&0x3F); 711 g_string_append_len (dest_str, &b, 1); 712 b = 0x80|((c>>6)&0x3F); 713 g_string_append_len (dest_str, &b, 1); 714 b = 0x80|(c&0x3F); 715 g_string_append_len (dest_str, &b, 1); 716 } 671 717 } 672 718 673 719 /* --------------------------------------------------------------------------------------------- */ … … mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * lc_mc_ 744 790 } 745 791 } 746 792 #endif /* SEARCH_TYPE_GLIB */ 793 lc_mc_search->is_utf8 = str_isutf8(charset) ? TRUE : FALSE; 747 794 } 748 795 749 796 /* --------------------------------------------------------------------------------------------- */ … … mc_search_regex_prepare_replace_str (mc_search_t * lc_mc_search, GString * repla 914 961 &replace_flags); 915 962 /* call process_escape_sequence without starting '\\' */ 916 963 mc_search_regex__process_escape_sequence (ret, replace_str->str + loop + 1, len - 1, 917 &replace_flags );964 &replace_flags, lc_mc_search->is_utf8); 918 965 prev_str = replace_str->str + loop + len; 919 966 loop += len - 1; 920 967 continue; -
lib/tests/search/regex_process_escape_sequence.c
diff --git a/lib/tests/search/regex_process_escape_sequence.c b/lib/tests/search/regex_process_escape_sequence.c index cc74d04..126cd9b 100644
a b 27 27 #include "regex.c" /* for testing static functions*/ 28 28 29 29 /* --------------------------------------------------------------------------------------------- */ 30 #define test_helper_valid_data(from, etalon, dest_str, replace_flags ) { \30 #define test_helper_valid_data(from, etalon, dest_str, replace_flags, utf) { \ 31 31 dest_str = g_string_new(""); \ 32 mc_search_regex__process_escape_sequence (dest_str, from, -1, &replace_flags ); \32 mc_search_regex__process_escape_sequence (dest_str, from, -1, &replace_flags, utf); \ 33 33 fail_if (strcmp(dest_str->str, etalon), "dest_str(%s) != %s", dest_str->str, etalon); \ 34 34 g_string_free(dest_str, TRUE); \ 35 35 } … … START_TEST (test_regex_process_escape_sequence_valid) 41 41 GString *dest_str; 42 42 replace_transform_type_t replace_flags = REPLACE_T_NO_TRANSFORM; 43 43 44 test_helper_valid_data("{101}", "A", dest_str, replace_flags); 45 test_helper_valid_data("x42", "B", dest_str, replace_flags); 46 test_helper_valid_data("x{4344}", "CD", dest_str, replace_flags); 47 48 test_helper_valid_data("n", "\n", dest_str, replace_flags); 49 test_helper_valid_data("t", "\t", dest_str, replace_flags); 50 test_helper_valid_data("v", "\v", dest_str, replace_flags); 51 test_helper_valid_data("b", "\b", dest_str, replace_flags); 52 test_helper_valid_data("r", "\r", dest_str, replace_flags); 53 test_helper_valid_data("f", "\f", dest_str, replace_flags); 54 test_helper_valid_data("a", "\a", dest_str, replace_flags); 55 } 56 END_TEST 57 58 /* --------------------------------------------------------------------------------------------- */ 59 60 START_TEST (test_regex_process_escape_sequence_invalid) 61 { 62 GString *dest_str; 63 replace_transform_type_t replace_flags = REPLACE_T_NO_TRANSFORM; 64 65 test_helper_valid_data("{101", "{101", dest_str, replace_flags); 66 test_helper_valid_data("101}", "101}", dest_str, replace_flags); 67 test_helper_valid_data("{ab}", "{ab}", dest_str, replace_flags); 68 test_helper_valid_data("xqw", "xqw", dest_str, replace_flags); 69 test_helper_valid_data("x{41", "x{41", dest_str, replace_flags); 70 test_helper_valid_data("x{qwer}", "x{qwer}", dest_str, replace_flags); 71 test_helper_valid_data("s", "s", dest_str, replace_flags); 72 test_helper_valid_data("Q", "Q", dest_str, replace_flags); 73 test_helper_valid_data("1", "1", dest_str, replace_flags); 44 test_helper_valid_data("{101}", "A", dest_str, replace_flags, FALSE); 45 test_helper_valid_data("x42", "B", dest_str, replace_flags, FALSE); 46 test_helper_valid_data("x{444}", "D", dest_str, replace_flags, FALSE); 47 test_helper_valid_data("x{444}", "ф", dest_str, replace_flags, TRUE); 48 49 test_helper_valid_data("n", "\n", dest_str, replace_flags, FALSE); 50 test_helper_valid_data("t", "\t", dest_str, replace_flags, FALSE); 51 test_helper_valid_data("v", "\v", dest_str, replace_flags, FALSE); 52 test_helper_valid_data("b", "\b", dest_str, replace_flags, FALSE); 53 test_helper_valid_data("r", "\r", dest_str, replace_flags, FALSE); 54 test_helper_valid_data("f", "\f", dest_str, replace_flags, FALSE); 55 test_helper_valid_data("a", "\a", dest_str, replace_flags, FALSE); 74 56 } 75 57 END_TEST 76 58 … … main (void) 87 69 88 70 /* Add new tests here: *************** */ 89 71 tcase_add_test (tc_core, test_regex_process_escape_sequence_valid); 90 tcase_add_test (tc_core, test_regex_process_escape_sequence_invalid);91 72 /* *********************************** */ 92 73 93 74 suite_add_tcase (s, tc_core); -
lib/tests/search/regex_replace_esc_seq.c
diff --git a/lib/tests/search/regex_replace_esc_seq.c b/lib/tests/search/regex_replace_esc_seq.c index 4fc965a..bdd15ab 100644
a b 27 27 #include "regex.c" /* for testing static functions*/ 28 28 29 29 /* --------------------------------------------------------------------------------------------- */ 30 #define test_helper_check_valid_data( a, b, c, d, e, f , g, h) \30 #define test_helper_check_valid_data( a, b, c, d, e, f ) \ 31 31 { \ 32 32 fail_unless( a == b, "ret_value != %s", (b) ? "TRUE": "FALSE" ); \ 33 33 fail_unless( c == d, "skip_len(%d) != %d", c, d ); \ 34 34 if (f!=0) fail_unless( e == f, "ret(%d) != %d", e, f ); \ 35 fail_unless( g == h, "next_char('%c':%d) != %d", g, g, h ); \ 36 } \ 35 } 36 37 #define test_helper_handle_esc_seq( pos, r, skip, flag ) \ 38 { \ 39 skip_len = 0;\ 40 test_helper_check_valid_data(\ 41 mc_search_regex__replace_handle_esc_seq( replace_str, pos, &skip_len, &ret ), r,\ 42 skip_len, skip,\ 43 ret, flag\ 44 ); \ 45 } 37 46 38 47 /* --------------------------------------------------------------------------------------------- */ 39 48 … … START_TEST (test_regex_replace_esc_seq_prepare_valid) 42 51 GString *replace_str; 43 52 gsize skip_len; 44 53 int ret; 45 char next_char;46 54 47 55 replace_str = g_string_new("bla-bla\\{123}bla-bla\\xabc234 bla-bla\\x{456abcd}bla-bla\\xtre\\n\\t\\v\\b\\r\\f\\a"); 48 56 49 /* \\{123} */ 50 skip_len=0; 51 test_helper_check_valid_data( 52 mc_search_regex__replace_handle_esc_seq ( replace_str, 7, &skip_len, &ret, &next_char ), FALSE, 53 skip_len, 6, 54 ret, REPLACE_PREPARE_T_ESCAPE_SEQ, 55 next_char, '{' 56 ); 57 58 /* \\xab */ 59 skip_len=0; 60 test_helper_check_valid_data( 61 mc_search_regex__replace_handle_esc_seq ( replace_str, 20, &skip_len, &ret, &next_char ), FALSE, 62 skip_len, 4, 63 ret, REPLACE_PREPARE_T_ESCAPE_SEQ, 64 next_char, 'b' 65 ); 66 67 /* \\x{456abcd} */ 68 skip_len=0; 69 test_helper_check_valid_data( 70 mc_search_regex__replace_handle_esc_seq ( replace_str, 36, &skip_len, &ret, &next_char ), FALSE, 71 skip_len, 11, 72 ret, REPLACE_PREPARE_T_ESCAPE_SEQ, 73 next_char, '{' 74 ); 75 76 /* \\xtre */ 77 skip_len=0; 78 test_helper_check_valid_data( 79 mc_search_regex__replace_handle_esc_seq ( replace_str, 54, &skip_len, &ret, &next_char ), FALSE, 80 skip_len, 2, 81 ret, REPLACE_PREPARE_T_NOTHING_SPECIAL, 82 next_char, 't' 83 ); 84 85 /* \\n */ 86 skip_len=0; 87 test_helper_check_valid_data( 88 mc_search_regex__replace_handle_esc_seq ( replace_str, 59, &skip_len, &ret, &next_char ), FALSE, 89 skip_len, 2, 90 ret, REPLACE_PREPARE_T_ESCAPE_SEQ, 91 next_char, 'n' 92 ); 93 94 /* \\t */ 95 skip_len=0; 96 test_helper_check_valid_data( 97 mc_search_regex__replace_handle_esc_seq ( replace_str, 61, &skip_len, &ret, &next_char ), FALSE, 98 skip_len, 2, 99 ret, REPLACE_PREPARE_T_ESCAPE_SEQ, 100 next_char, 't' 101 ); 102 103 /* \\v */ 104 skip_len=0; 105 test_helper_check_valid_data( 106 mc_search_regex__replace_handle_esc_seq ( replace_str, 63, &skip_len, &ret, &next_char ), FALSE, 107 skip_len, 2, 108 ret, REPLACE_PREPARE_T_ESCAPE_SEQ, 109 next_char, 'v' 110 ); 111 112 /* \\b */ 113 skip_len=0; 114 test_helper_check_valid_data( 115 mc_search_regex__replace_handle_esc_seq ( replace_str, 65, &skip_len, &ret, &next_char ), FALSE, 116 skip_len, 2, 117 ret, REPLACE_PREPARE_T_ESCAPE_SEQ, 118 next_char, 'b' 119 ); 120 121 /* \\r */ 122 skip_len=0; 123 test_helper_check_valid_data( 124 mc_search_regex__replace_handle_esc_seq ( replace_str, 67, &skip_len, &ret, &next_char ), FALSE, 125 skip_len, 2, 126 ret, REPLACE_PREPARE_T_ESCAPE_SEQ, 127 next_char, 'r' 128 ); 129 130 /* \\f */ 131 skip_len=0; 132 test_helper_check_valid_data( 133 mc_search_regex__replace_handle_esc_seq ( replace_str, 69, &skip_len, &ret, &next_char ), FALSE, 134 skip_len, 2, 135 ret, REPLACE_PREPARE_T_ESCAPE_SEQ, 136 next_char, 'f' 137 ); 138 139 /* \\a */ 140 skip_len=0; 141 test_helper_check_valid_data( 142 mc_search_regex__replace_handle_esc_seq ( replace_str, 71, &skip_len, &ret, &next_char ), FALSE, 143 skip_len, 2, 144 ret, REPLACE_PREPARE_T_ESCAPE_SEQ, 145 next_char, 'a' 146 ); 57 test_helper_handle_esc_seq( 7, FALSE, 6, REPLACE_PREPARE_T_ESCAPE_SEQ ); /* \\{123} */ 58 test_helper_handle_esc_seq( 20, FALSE, 4, REPLACE_PREPARE_T_ESCAPE_SEQ ); /* \\xab */ 59 test_helper_handle_esc_seq( 36, FALSE, 11, REPLACE_PREPARE_T_ESCAPE_SEQ ); /* \\x{456abcd} */ 60 test_helper_handle_esc_seq( 54, FALSE, 2, REPLACE_PREPARE_T_NOTHING_SPECIAL ); /* \\xtre */ 61 test_helper_handle_esc_seq( 59, FALSE, 2, REPLACE_PREPARE_T_ESCAPE_SEQ ); /* \\n */ 62 test_helper_handle_esc_seq( 61, FALSE, 2, REPLACE_PREPARE_T_ESCAPE_SEQ ); /* \\t */ 63 test_helper_handle_esc_seq( 63, FALSE, 2, REPLACE_PREPARE_T_ESCAPE_SEQ ); /* \\v */ 64 test_helper_handle_esc_seq( 65, FALSE, 2, REPLACE_PREPARE_T_ESCAPE_SEQ ); /* \\b */ 65 test_helper_handle_esc_seq( 67, FALSE, 2, REPLACE_PREPARE_T_ESCAPE_SEQ ); /* \\r */ 66 test_helper_handle_esc_seq( 69, FALSE, 2, REPLACE_PREPARE_T_ESCAPE_SEQ ); /* \\f */ 67 test_helper_handle_esc_seq( 71, FALSE, 2, REPLACE_PREPARE_T_ESCAPE_SEQ ); /* \\a */ 147 68 148 69 g_string_free(replace_str, TRUE); 149 70 } … … START_TEST (test_regex_replace_esc_seq_prepare_invalid) 157 78 GString *replace_str; 158 79 gsize skip_len; 159 80 int ret; 160 char next_char;161 81 162 82 replace_str = g_string_new("\\{123 \\x{qwerty} \\12} \\x{456a-bcd}bla-bla\\satre"); 163 83 164 /* \\{123 */ 165 skip_len=0; 166 test_helper_check_valid_data( 167 mc_search_regex__replace_handle_esc_seq ( replace_str, 0, &skip_len, &ret, &next_char ), TRUE, 168 skip_len, 0, 169 0, 0, 170 next_char, '{' 171 ); 172 173 /* \\x{qwerty} */ 174 skip_len=0; 175 test_helper_check_valid_data( 176 mc_search_regex__replace_handle_esc_seq ( replace_str, 6, &skip_len, &ret, &next_char ), TRUE, 177 skip_len, 0, 178 0, 0, 179 next_char, 'x' 180 ); 181 /* \\12} */ 182 skip_len=0; 183 test_helper_check_valid_data( 184 mc_search_regex__replace_handle_esc_seq ( replace_str, 17, &skip_len, &ret, &next_char ), TRUE, 185 skip_len, 0, 186 0, 0, 187 next_char, '1' 188 ); 189 190 /* \\x{456a-bcd} */ 191 skip_len=0; 192 test_helper_check_valid_data( 193 mc_search_regex__replace_handle_esc_seq ( replace_str, 22, &skip_len, &ret, &next_char ), TRUE, 194 skip_len, 0, 195 0, 0, 196 next_char, 'x' 197 ); 198 199 /* \\satre */ 200 skip_len=0; 201 test_helper_check_valid_data( 202 mc_search_regex__replace_handle_esc_seq ( replace_str, 41, &skip_len, &ret, &next_char ), TRUE, 203 skip_len, 0, 204 0, 0, 205 next_char, 's' 206 ); 84 test_helper_handle_esc_seq( 0, TRUE, 5, REPLACE_PREPARE_T_NOTHING_SPECIAL ); /* \\{123 */ 85 test_helper_handle_esc_seq( 6, TRUE, 3, REPLACE_PREPARE_T_NOTHING_SPECIAL ); /* \\x{qwerty} */ 86 test_helper_handle_esc_seq( 17, TRUE, 0, REPLACE_PREPARE_T_NOTHING_SPECIAL ); /* \\12} */ 87 test_helper_handle_esc_seq( 22, TRUE, 7, REPLACE_PREPARE_T_NOTHING_SPECIAL ); /* \\x{456a-bcd} */ 88 test_helper_handle_esc_seq( 41, TRUE, 0, REPLACE_PREPARE_T_NOTHING_SPECIAL ); /* \\satre */ 207 89 208 90 g_string_free(replace_str, TRUE); 209 91 }