Ticket #1882: mc-4.7.0.1-regex-replace-escape-sequences.diff
File mc-4.7.0.1-regex-replace-escape-sequences.diff, 9.3 KB (added by vitalif, 15 years ago) |
---|
-
src/search/regex.c
old new static int 348 348 mc_search_regex__process_replace_str (const GString * replace_str, const gsize current_pos, 349 349 gsize * skip_len, replace_transform_type_t * replace_flags) 350 350 { 351 int ret = -1; 351 int ret = -1; /* nothing special */ 352 352 char *tmp_str; 353 353 const char *curr_str = &(replace_str->str[current_pos]); 354 354 … … mc_search_regex__process_replace_str (co 360 360 if (*curr_str == '$' && *(curr_str + 1) == '{' && (*(curr_str + 2) & (char) 0xf0) == 0x30) { 361 361 if (strutils_is_char_escaped (replace_str->str, curr_str)) { 362 362 *skip_len = 1; 363 return -1; 363 return -1; /* nothing special */ 364 364 } 365 365 366 366 for (*skip_len = 0; … … mc_search_regex__process_replace_str (co 368 368 && (*(curr_str + 2 + *skip_len) & (char) 0xf0) == 0x30; (*skip_len)++); 369 369 370 370 if (*(curr_str + 2 + *skip_len) != '}') 371 return -1; 371 return -1; /* nothing special */ 372 372 373 373 tmp_str = g_strndup (curr_str + 2, *skip_len); 374 374 if (tmp_str == NULL) 375 return -1; 375 return -1; /* nothing special */ 376 376 377 377 ret = atoi (tmp_str); 378 378 g_free (tmp_str); 379 379 380 380 *skip_len += 3; /* ${} */ 381 return ret; 381 return ret; /* capture buffer index >= 0 */ 382 382 } 383 383 384 384 if (*curr_str == '\\') { 385 char next_char; 385 386 if (strutils_is_char_escaped (replace_str->str, curr_str)) { 386 387 *skip_len = 1; 387 return -1; 388 return -1; /* nothing special */ 388 389 } 389 390 390 if ((*(curr_str + 1) & (char) 0xf0) == 0x30) { 391 ret = *(curr_str + 1) - '0'; 391 next_char = *(curr_str + 1); 392 if ((next_char & (char) 0xf0) == 0x30) { 393 ret = next_char - '0'; /* capture buffer index >= 0 */ 392 394 *skip_len = 2; /* \\ and one digit */ 393 395 return ret; 394 396 } 395 ret = -2; 397 398 if (next_char == '{') 399 { 400 for (*skip_len = 2; /* \{ */ 401 current_pos + *skip_len < replace_str->len 402 && (*(curr_str + *skip_len)) != '}'; (*skip_len)++); 403 if (current_pos + *skip_len < replace_str->len) 404 (*skip_len)++; 405 return -3; /* escape sequence */ 406 } 407 408 if (next_char == 'x') 409 { 410 *skip_len = 2; /* \x */ 411 next_char = *(curr_str + 2); 412 if (next_char == '{') 413 { 414 for (*skip_len = 3; /* \x{ */ 415 current_pos + *skip_len < replace_str->len 416 && (*(curr_str + *skip_len)) != '}'; (*skip_len)++); 417 if (current_pos + *skip_len < replace_str->len) 418 (*skip_len)++; 419 return -3; /* escape sequence */ 420 } 421 else if ((next_char < '0' || next_char > '9') && 422 (next_char < 'a' || next_char > 'f') && 423 (next_char < 'A' || next_char > 'F')) 424 { 425 *skip_len = 2; /* \x without number behind */ 426 return -1; /* nothing special */ 427 } 428 else 429 { 430 next_char = *(curr_str + 3); 431 if ((next_char < '0' || next_char > '9') && 432 (next_char < 'a' || next_char > 'f') && 433 (next_char < 'A' || next_char > 'F')) 434 *skip_len = 3; /* \xH */ 435 else 436 *skip_len = 4; /* \xHH */ 437 return -3; /* escape sequence */ 438 } 439 } 440 441 if (next_char == 'n' || next_char == 't' || 442 next_char == 'v' || next_char == 'b' || 443 next_char == 'r' || next_char == 'f' || 444 next_char == 'a') 445 { 446 *skip_len = 2; 447 return -3; /* escape sequence */ 448 } 449 450 ret = -2; /* replace flag */ 396 451 *skip_len += 2; 397 switch ( *(curr_str + 1)) {452 switch (next_char) { 398 453 case 'U': 399 454 *replace_flags |= REPLACE_T_UPP_TRANSFORM; 400 455 *replace_flags &= ~REPLACE_T_LOW_TRANSFORM; … … mc_search_regex__process_replace_str (co 413 468 *replace_flags = REPLACE_T_NO_TRANSFORM; 414 469 break; 415 470 default: 416 ret = -1; 471 ret = -1; /* nothing special */ 417 472 break; 418 473 } 419 474 } … … mc_search_regex__process_append_str (GSt 469 524 470 525 } 471 526 527 static void 528 mc_search_regex__process_escape_sequence (GString * dest_str, const char *from, gsize len, 529 replace_transform_type_t * replace_flags) 530 { 531 gsize i = 0; 532 char c = 0; 533 if (len == (gsize) -1) 534 len = strlen (from); 535 if (len <= 0) 536 return; 537 if (from[i] == '{') 538 i++; 539 if (i >= len) 540 return; 541 if (from[i] == 'x') 542 { 543 i++; 544 if (i < len && from[i] == '{') 545 i++; 546 for (; i < len; i++) 547 { 548 if (from[i] >= '0' && from[i] <= '9') 549 c = c*16 + from[i] - '0'; 550 else if (from[i] >= 'a' && from[i] <= 'f') 551 c = c*16 + 10 + from[i] - 'a'; 552 else if (from[i] >= 'A' && from[i] <= 'F') 553 c = c*16 + 10 + from[i] - 'A'; 554 else 555 break; 556 } 557 } 558 else if (from[i] >= '0' && from[i] <= '9') 559 for (; i < len && from[i] >= '0' && from[i] <= '7'; i++) 560 c = c*8 + from[i] - '0'; 561 else 562 { 563 switch (from[i]) 564 { 565 case 'n': c = '\n'; break; 566 case 't': c = '\t'; break; 567 case 'v': c = '\v'; break; 568 case 'b': c = '\b'; break; 569 case 'r': c = '\r'; break; 570 case 'f': c = '\f'; break; 571 case 'a': c = '\a'; break; 572 default: 573 mc_search_regex__process_append_str(dest_str, from, len, replace_flags); 574 return; 575 } 576 } 577 g_string_append_len (dest_str, &c, 1); 578 } 579 472 580 /*** public functions ****************************************************************************/ 473 581 474 582 void 475 583 mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * lc_mc_search, 476 584 mc_search_cond_t * mc_search_cond) 477 585 { 478 GString *tmp = NULL;586 int utf8 = 0; 479 587 #ifdef SEARCH_TYPE_GLIB 480 588 GError *error = NULL; 481 589 #else /* SEARCH_TYPE_GLIB */ … … mc_search__cond_struct_new_init_regex (c 483 591 int erroffset; 484 592 #endif /* SEARCH_TYPE_GLIB */ 485 593 594 #ifdef SEARCH_TYPE_GLIB 486 595 if (!lc_mc_search->is_case_sentitive) { 487 tmp = g_string_new_len (mc_search_cond->str->str, mc_search_cond->str->len);596 GString *tmp = g_string_new_len (mc_search_cond->str->str, mc_search_cond->str->len); 488 597 g_string_free (mc_search_cond->str, TRUE); 489 598 mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp->str, tmp->len); 490 599 g_string_free (tmp, TRUE); 491 600 } 492 #ifdef SEARCH_TYPE_GLIB493 601 mc_search_cond->regex_handle = 494 602 g_regex_new (mc_search_cond->str->str, G_REGEX_OPTIMIZE | G_REGEX_RAW | G_REGEX_DOTALL, 0, 495 603 &error); … … mc_search__cond_struct_new_init_regex (c 501 609 return; 502 610 } 503 611 #else /* SEARCH_TYPE_GLIB */ 612 if (charset && !strncasecmp(charset, "utf-8", 6)) 613 utf8 = 1; 504 614 mc_search_cond->regex_handle = 505 pcre_compile (mc_search_cond->str->str, PCRE_EXTRA, &error, &erroffset, NULL);615 pcre_compile (mc_search_cond->str->str, (!mc_search->is_case_sentitive ? PCRE_CASELESS : 0) | PCRE_MULTILINE | (utf8 ? PCRE_UTF8 : 0) | PCRE_EXTRA, &error, &erroffset, NULL); 506 616 if (mc_search_cond->regex_handle == NULL) { 507 617 lc_mc_search->error = MC_SEARCH_E_REGEX_COMPILE; 508 618 lc_mc_search->error_str = g_strdup (error); … … mc_search_regex_prepare_replace_str (mc_ 635 745 for (loop = 0; loop < replace_str->len - 1; loop++) { 636 746 lc_index = mc_search_regex__process_replace_str (replace_str, loop, &len, &replace_flags); 637 747 748 /* nothing special */ 638 749 if (lc_index == -1) { 639 750 if (len != 0) { 640 751 mc_search_regex__process_append_str (ret, prev_str, … … mc_search_regex_prepare_replace_str (mc_ 648 759 continue; 649 760 } 650 761 762 /* replace flag (transform) */ 651 763 if (lc_index == -2) { 652 764 if (loop) 653 765 mc_search_regex__process_append_str (ret, prev_str, … … mc_search_regex_prepare_replace_str (mc_ 658 770 continue; 659 771 } 660 772 773 /* escape sequence */ 774 if (lc_index == -3) { 775 mc_search_regex__process_append_str (ret, prev_str, 776 replace_str->str - prev_str + loop, 777 &replace_flags); 778 mc_search_regex__process_escape_sequence (ret, replace_str->str - loop + 1, len - 1, 779 &replace_flags); 780 prev_str = replace_str->str + loop + len; 781 loop += len - 1; 782 continue; 783 } 784 661 785 if (lc_index > lc_mc_search->num_rezults) { 662 786 g_string_free (ret, TRUE); 663 787 lc_mc_search->error = MC_SEARCH_E_REGEX_REPLACE;