Context Navigation

Back to Ticket #1882

Ticket #1882: mc-git20110403-regex-replace-escape-sequences.diff

File mc-git20110403-regex-replace-escape-sequences.diff, 10.0 KB (added by vitalif, 14 years ago)
This same patch, ported to git version 12eb8b62...

lib/search/regex.c

Patch for mc git version (4.8.0, 2011-04-03):
Enables use of escape sequences inside regex replace strings,
Enables UTF-8 caseless search in PCRE.
Supported escape sequences: \DEC, \xHEX, \0OCT, \n, \t, \v,
\b, \r, \f, \a. Any of them could be enclosed into \{}.

diff --git a/lib/search/regex.c b/lib/search/regex.c
index 6917972..e63f3cc 100644

  static int
 mc_search_regex__process_replace_str (const GString * replace_str, const gsize current_pos,
                                       gsize * skip_len, replace_transform_type_t * replace_flags)
+{
     int ret = -1;
+    int ret = -1; /* nothing special */
     char *tmp_str;
     const char *curr_str = &(replace_str->str[current_pos]);
-…
+ mc_search_regex__process_replace_str (const GString * replace_str, const gsize c
     *skip_len = 0;
+    if (*curr_str == '$' && *(curr_str + 1) == '{' && (*(curr_str + 2) & (char) 0xf0) == 0x30)
+    if (replace_str->len > current_pos + 2 &&
+        *curr_str == '$' &&
+        *(curr_str + 1) == '{' &&
+        (*(curr_str + 2) & (char) 0xf0) == 0x30)
+    {
         if (strutils_is_char_escaped (replace_str->str, curr_str))
+        {
             *skip_len = 1;
             return -1;
+            return -1; /* nothing special */
+        }
         for (*skip_len = 0;
-…
+ mc_search_regex__process_replace_str (const GString * replace_str, const gsize c
              && (*(curr_str + 2 + *skip_len) & (char) 0xf0) == 0x30; (*skip_len)++);
         if (*(curr_str + 2 + *skip_len) != '}')
             return -1;
+            return -1; /* nothing special */
         tmp_str = g_strndup (curr_str + 2, *skip_len);
         if (tmp_str == NULL)
             return -1;
+            return -1; /* nothing special */
         ret = atoi (tmp_str);
         g_free (tmp_str);
         *skip_len += 3;         /* ${} */
         return ret;
+        return ret; /* capture buffer index >= 0 */
+    }
+    if (*curr_str == '\\')
+    if (replace_str->len > current_pos + 1 &&
+        *curr_str == '\\')
+    {
+        char next_char;
         if (strutils_is_char_escaped (replace_str->str, curr_str))
+        {
             *skip_len = 1;
             return -1;
+            return -1; /* nothing special */
+        }
+        next_char = *(curr_str + 1);
         if (g_ascii_isdigit (*(curr_str + 1)))
+        {
             ret = g_ascii_digit_value (*(curr_str + 1));
+            ret = g_ascii_digit_value (*(curr_str + 1)); /* capture buffer index >= 0 */
             *skip_len = 2;      /* \\ and one digit */
             return ret;
+        }
+        ret = -2;
+        if (replace_str->len > current_pos+2)
+        {
+            if (next_char == '{')
+            {
+                for (*skip_len = 2; /* \{ */
+                     current_pos + *skip_len < replace_str->len
+                     && (*(curr_str + *skip_len)) != '}'; (*skip_len)++);
+                if (current_pos + *skip_len < replace_str->len) /* } */
+                    (*skip_len)++;
+                return -3; /* escape sequence */
+            }
+            if (next_char == 'x')
+            {
+                *skip_len = 2; /* \x */
+                next_char = *(curr_str + 2);
+                if (next_char == '{')
+                {
+                    for (*skip_len = 3; /* \x{ */
+                         current_pos + *skip_len < replace_str->len
+                         && (*(curr_str + *skip_len)) != '}'; (*skip_len)++);
+                    if (current_pos + *skip_len < replace_str->len)
+                        (*skip_len)++;
+                    return -3; /* escape sequence */
+                }
+                else if ((next_char < '0' || next_char > '9') &&
+                    (next_char < 'a' || next_char > 'f') &&
+                    (next_char < 'A' || next_char > 'F'))
+                {
+                    *skip_len = 2; /* \x without number behind */
+                    return -1; /* nothing special */
+                }
+                else
+                {
+                    next_char = *(curr_str + 3);
+                    if ((next_char < '0' || next_char > '9') &&
+                        (next_char < 'a' || next_char > 'f') &&
+                        (next_char < 'A' || next_char > 'F'))
+                        *skip_len = 3; /* \xH */
+                    else
+                        *skip_len = 4; /* \xHH */
+                    return -3; /* escape sequence */
+                }
+            }
+        }
+        if (next_char == 'n' || next_char == 't' ||
+            next_char == 'v' || next_char == 'b' ||
+            next_char == 'r' || next_char == 'f' ||
+            next_char == 'a')
+        {
+            *skip_len = 2;
+            return -3; /* escape sequence */
+        }
+        ret = -2; /* replace flag */
         *skip_len += 2;
         switch (*(curr_str + 1))
+        switch (next_char)
+        {
         case 'U':
             *replace_flags |= REPLACE_T_UPP_TRANSFORM;
-…
+ mc_search_regex__process_replace_str (const GString * replace_str, const gsize c
             *replace_flags = REPLACE_T_NO_TRANSFORM;
             break;
         default:
             ret = -1;
+            ret = -1; /* nothing special */
             break;
+        }
+    }
-…
+ mc_search_regex__process_append_str (GString * dest_str, const char *from, gsize
+}
+static void
+mc_search_regex__process_escape_sequence (GString * dest_str, const char *from, gsize len,
+                                          replace_transform_type_t * replace_flags)
+{
+    gsize i = 0;
+    char c = 0;
+    if (len == (gsize) -1)
+        len = strlen (from);
+    if (len <= 0)
+        return;
+    if (from[i] == '{')
+        i++;
+    if (i >= len)
+        return;
+    if (from[i] == 'x')
+    {
+        i++;
+        if (i < len && from[i] == '{')
+            i++;
+        for (; i < len; i++)
+        {
+            if (from[i] >= '0' && from[i] <= '9')
+                c = c*16 + from[i] - '0';
+            else if (from[i] >= 'a' && from[i] <= 'f')
+                c = c*16 + 10 + from[i] - 'a';
+            else if (from[i] >= 'A' && from[i] <= 'F')
+                c = c*16 + 10 + from[i] - 'A';
+            else
+                break;
+        }
+    }
+    else if (from[i] >= '0' && from[i] <= '9')
+        for (; i < len && from[i] >= '0' && from[i] <= '7'; i++)
+            c = c*8 + from[i] - '0';
+    else
+    {
+        switch (from[i])
+        {
+            case 'n': c = '\n'; break;
+            case 't': c = '\t'; break;
+            case 'v': c = '\v'; break;
+            case 'b': c = '\b'; break;
+            case 'r': c = '\r'; break;
+            case 'f': c = '\f'; break;
+            case 'a': c = '\a'; break;
+            default:
+                mc_search_regex__process_append_str(dest_str, from, len, replace_flags);
+                return;
+        }
+    }
+    g_string_append_len (dest_str, &c, 1);
+}
 /*** public functions ****************************************************************************/
 void
-…
+ mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * lc_mc_
 #ifdef SEARCH_TYPE_GLIB
     GError *error = NULL;
 #else /* SEARCH_TYPE_GLIB */
+    int utf8 = 0;
     const char *error;
     int erroffset;
 #endif /* SEARCH_TYPE_GLIB */
+#ifdef SEARCH_TYPE_GLIB
     if (!lc_mc_search->is_case_sensitive)
+    {
         GString *tmp;
-…
+ mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * lc_mc_
         mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp);
         g_string_free (tmp, TRUE);
+    }
-#ifdef SEARCH_TYPE_GLIB
     mc_search_cond->regex_handle =
         g_regex_new (mc_search_cond->str->str, G_REGEX_OPTIMIZE | G_REGEX_RAW | G_REGEX_DOTALL, 0,
                      &error);
-…
+ mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * lc_mc_
         return;
+    }
 #else /* SEARCH_TYPE_GLIB */
+    mc_search_cond->regex_handle =
+        pcre_compile (mc_search_cond->str->str, PCRE_EXTRA, &error, &erroffset, NULL);
+    if (charset && !strncasecmp(charset, "utf-8", 6))
+        utf8 = 1;
+    mc_search_cond->regex_handle = pcre_compile (mc_search_cond->str->str,
+        (!mc_search->is_case_sentitive ? PCRE_CASELESS : 0) |
+        PCRE_MULTILINE |
+        (utf8 ? PCRE_UTF8 : 0) |
+        PCRE_EXTRA, &error, &erroffset, NULL);
     if (mc_search_cond->regex_handle == NULL)
+    {
         lc_mc_search->error = MC_SEARCH_E_REGEX_COMPILE;
-…
+ mc_search_regex_prepare_replace_str (mc_search_t * lc_mc_search, GString * repla
+    {
         lc_index = mc_search_regex__process_replace_str (replace_str, loop, &len, &replace_flags);
+        /* nothing special */
         if (lc_index == -1)
+        {
             if (len != 0)
-…
+ mc_search_regex_prepare_replace_str (mc_search_t * lc_mc_search, GString * repla
             continue;
+        }
+        /* replace flag (transform) */
         if (lc_index == -2)
+        {
             if (loop)
-…
+ mc_search_regex_prepare_replace_str (mc_search_t * lc_mc_search, GString * repla
             continue;
+        }
+        /* escape sequence */
+        if (lc_index == -3) {
+            mc_search_regex__process_append_str (ret, prev_str,
+                                                 replace_str->str + loop - prev_str,
+                                                 &replace_flags);
+            /* call process_escape_sequence without starting '\\' */
+            mc_search_regex__process_escape_sequence (ret, replace_str->str + loop + 1, len - 1,
+                                                      &replace_flags);
+            prev_str = replace_str->str + loop + len;
+            loop += len - 1;
+            continue;
+        }
+        /* invalid capture buffer number */
         if (lc_index > lc_mc_search->num_results)
+        {
             g_string_free (ret, TRUE);

Download in other formats:

Original Format