Ticket #1882: mc-4.7.0pre-searchPCRE-escape-sequences-and-UTF8.patch
File mc-4.7.0pre-searchPCRE-escape-sequences-and-UTF8.patch, 9.6 KB (added by vitalif, 15 years ago) |
---|
-
src/search/regex.c
diff -NaurpBb mc-4.7.0-pre1/src/search/regex.c mc-4.7.0-pre1-orig/src/search/regex.c
old new mc_search__regex_str_append_if_special ( 63 63 const char *special_chars[] = { 64 64 "\\s", "\\S", 65 65 "\\d", "\\D", 66 "\\ b", "\\B",66 "\\B", "\\B", 67 67 "\\w", "\\W", 68 68 "\\t", "\\n", 69 69 "\\r", "\\f", … … static int 348 348 mc_search_regex__process_replace_str (const GString * replace_str, const gsize current_pos, 349 349 gsize * skip_len, replace_transform_type_t * replace_flags) 350 350 { 351 int ret = -1; /* nothing special */351 int ret = -1; 352 352 char *tmp_str; 353 353 const char *curr_str = &(replace_str->str[current_pos]); 354 354 … … mc_search_regex__process_replace_str (co 360 360 if (*curr_str == '$' && *(curr_str + 1) == '{' && (*(curr_str + 2) & (char) 0xf0) == 0x30) { 361 361 if (strutils_is_char_escaped (replace_str->str, curr_str)) { 362 362 *skip_len = 1; 363 return -1; /* nothing special */363 return -1; 364 364 } 365 365 366 366 for (*skip_len = 0; … … mc_search_regex__process_replace_str (co 368 368 && (*(curr_str + 2 + *skip_len) & (char) 0xf0) == 0x30; (*skip_len)++); 369 369 370 370 if (*(curr_str + 2 + *skip_len) != '}') 371 return -1; /* nothing special */371 return -1; 372 372 373 373 tmp_str = g_strndup (curr_str + 2, *skip_len); 374 374 if (tmp_str == NULL) 375 return -1; /* nothing special */375 return -1; 376 376 377 377 ret = atoi (tmp_str); 378 378 g_free (tmp_str); 379 379 380 380 *skip_len += 3; /* ${} */ 381 return ret; /* capture buffer index >= 0 */381 return ret; 382 382 } 383 383 384 384 if (*curr_str == '\\') { 385 char next_char;386 385 if (strutils_is_char_escaped (replace_str->str, curr_str)) { 387 386 *skip_len = 1; 388 return -1; /* nothing special */387 return -1; 389 388 } 390 389 391 next_char = *(curr_str + 1); 392 if ((next_char & (char) 0xf0) == 0x30) { 393 ret = next_char - '0'; /* capture buffer index >= 0 */ 390 if ((*(curr_str + 1) & (char) 0xf0) == 0x30) { 391 ret = *(curr_str + 1) - '0'; 394 392 *skip_len = 2; /* \\ and one digit */ 395 393 return ret; 396 394 } 397 398 if (next_char == '{') 399 { 400 for (*skip_len = 2; /* \{ */ 401 current_pos + *skip_len < replace_str->len 402 && (*(curr_str + *skip_len)) != '}'; (*skip_len)++); 403 if (current_pos + *skip_len < replace_str->len) 404 (*skip_len)++; 405 return -3; /* escape sequence */ 406 } 407 408 if (next_char == 'x') 409 { 410 *skip_len = 2; /* \x */ 411 next_char = *(curr_str + 2); 412 if (next_char == '{') 413 { 414 for (*skip_len = 3; /* \x{ */ 415 current_pos + *skip_len < replace_str->len 416 && (*(curr_str + *skip_len)) != '}'; (*skip_len)++); 417 if (current_pos + *skip_len < replace_str->len) 418 (*skip_len)++; 419 return -3; /* escape sequence */ 420 } 421 else if ((next_char < '0' || next_char > '9') && 422 (next_char < 'a' || next_char > 'f') && 423 (next_char < 'A' || next_char > 'F')) 424 { 425 *skip_len = 2; /* \x without number behind */ 426 return -1; /* nothing special */ 427 } 428 else 429 { 430 next_char = *(curr_str + 3); 431 if ((next_char < '0' || next_char > '9') && 432 (next_char < 'a' || next_char > 'f') && 433 (next_char < 'A' || next_char > 'F')) 434 *skip_len = 3; /* \xH */ 435 else 436 *skip_len = 4; /* \xHH */ 437 return -3; /* escape sequence */ 438 } 439 } 440 441 if (next_char == 'n' || next_char == 't' || 442 next_char == 'v' || next_char == 'b' || 443 next_char == 'r' || next_char == 'f' || 444 next_char == 'a') 445 { 446 *skip_len = 2; 447 return -3; /* escape sequence */ 448 } 449 450 ret = -2; /* replace flag */ 395 ret = -2; 451 396 *skip_len += 2; 452 switch ( next_char) {397 switch (*(curr_str + 1)) { 453 398 case 'U': 454 399 *replace_flags |= REPLACE_T_UPP_TRANSFORM; 455 400 *replace_flags &= ~REPLACE_T_LOW_TRANSFORM; … … mc_search_regex__process_replace_str (co 468 413 *replace_flags = REPLACE_T_NO_TRANSFORM; 469 414 break; 470 415 default: 471 ret = -1; /* nothing special */416 ret = -1; 472 417 break; 473 418 } 474 419 } … … mc_search_regex__process_append_str (GSt 524 469 525 470 } 526 471 527 static void528 mc_search_regex__process_escape_sequence (GString * dest_str, const char *from, gsize len,529 replace_transform_type_t * replace_flags)530 {531 gsize i = 0;532 char c = 0;533 if (len == (gsize) -1)534 len = strlen (from);535 if (len <= 0)536 return;537 if (from[i] == '{')538 i++;539 if (i >= len)540 return;541 if (from[i] == 'x')542 {543 i++;544 if (i < len && from[i] == '{')545 i++;546 for (; i < len; i++)547 {548 if (from[i] >= '0' && from[i] <= '9')549 c = c*16 + from[i] - '0';550 else if (from[i] >= 'a' && from[i] <= 'f')551 c = c*16 + 10 + from[i] - 'a';552 else if (from[i] >= 'A' && from[i] <= 'F')553 c = c*16 + 10 + from[i] - 'A';554 else555 break;556 }557 }558 else if (from[i] >= '0' && from[i] <= '9')559 for (; i < len && from[i] >= '0' && from[i] <= '7'; i++)560 c = c*8 + from[i] - '0';561 else562 {563 switch (from[i])564 {565 case 'n': c = '\n'; break;566 case 't': c = '\t'; break;567 case 'v': c = '\v'; break;568 case 'b': c = '\b'; break;569 case 'r': c = '\r'; break;570 case 'f': c = '\f'; break;571 case 'a': c = '\a'; break;572 default:573 mc_search_regex__process_append_str(dest_str, from, len, replace_flags);574 return;575 }576 }577 g_string_append_len (dest_str, &c, 1);578 }579 580 472 /*** public functions ****************************************************************************/ 581 473 582 474 void 583 475 mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * mc_search, 584 476 mc_search_cond_t * mc_search_cond) 585 477 { 586 int utf8 = 0;478 GString *tmp = NULL; 587 479 #ifdef SEARCH_TYPE_GLIB 588 480 GError *error = NULL; 589 481 #else /* SEARCH_TYPE_GLIB */ … … mc_search__cond_struct_new_init_regex (c 591 483 int erroffset; 592 484 #endif /* SEARCH_TYPE_GLIB */ 593 485 594 #ifdef SEARCH_TYPE_GLIB595 486 if (!mc_search->is_case_sentitive) { 596 GString *tmp = g_string_new_len (mc_search_cond->str->str, mc_search_cond->str->len);487 tmp = g_string_new_len (mc_search_cond->str->str, mc_search_cond->str->len); 597 488 g_string_free (mc_search_cond->str, TRUE); 598 489 mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp->str, tmp->len); 599 490 g_string_free (tmp, TRUE); 600 491 } 492 #ifdef SEARCH_TYPE_GLIB 601 493 mc_search_cond->regex_handle = 602 494 g_regex_new (mc_search_cond->str->str, G_REGEX_OPTIMIZE | G_REGEX_RAW | G_REGEX_DOTALL, 0, 603 495 &error); … … mc_search__cond_struct_new_init_regex (c 609 501 return; 610 502 } 611 503 #else /* SEARCH_TYPE_GLIB */ 612 if (charset && !strncasecmp(charset, "utf-8", 6))613 utf8 = 1;614 504 mc_search_cond->regex_handle = 615 pcre_compile (mc_search_cond->str->str, (!mc_search->is_case_sentitive ? PCRE_CASELESS : 0) | PCRE_MULTILINE | (utf8 ? PCRE_UTF8 : 0) |PCRE_EXTRA, &error, &erroffset, NULL);505 pcre_compile (mc_search_cond->str->str, PCRE_EXTRA, &error, &erroffset, NULL); 616 506 if (mc_search_cond->regex_handle == NULL) { 617 507 mc_search->error = MC_SEARCH_E_REGEX_COMPILE; 618 508 mc_search->error_str = g_strdup (error); … … mc_search_regex_prepare_replace_str (mc_ 745 635 for (loop = 0; loop < replace_str->len - 1; loop++) { 746 636 index = mc_search_regex__process_replace_str (replace_str, loop, &len, &replace_flags); 747 637 748 /* nothing special */749 638 if (index == -1) { 750 639 if (len != 0) { 751 640 mc_search_regex__process_append_str (ret, prev_str, … … mc_search_regex_prepare_replace_str (mc_ 759 648 continue; 760 649 } 761 650 762 /* replace flag (transform) */763 651 if (index == -2) { 764 652 if (loop) 765 653 mc_search_regex__process_append_str (ret, prev_str, … … mc_search_regex_prepare_replace_str (mc_ 770 658 continue; 771 659 } 772 660 773 /* escape sequence */774 if (index == -3) {775 mc_search_regex__process_append_str (ret, prev_str,776 replace_str->str - prev_str + loop,777 &replace_flags);778 mc_search_regex__process_escape_sequence (ret, replace_str->str - loop + 1, len - 1,779 &replace_flags);780 prev_str = replace_str->str + loop + len;781 loop += len - 1;782 continue;783 }784 785 661 if (index > mc_search->num_rezults) { 786 662 g_string_free (ret, TRUE); 787 663 mc_search->error = MC_SEARCH_E_REGEX_REPLACE;