1 | /* |
---|
2 | Search text engine. |
---|
3 | Regex search |
---|
4 | |
---|
5 | Copyright (C) 2009-2016 |
---|
6 | Free Software Foundation, Inc. |
---|
7 | |
---|
8 | Written by: |
---|
9 | Slava Zanko <slavazanko@gmail.com>, 2009, 2010, 2011, 2013 |
---|
10 | Vitaliy Filippov <vitalif@yourcmc.ru>, 2011 |
---|
11 | Andrew Borodin <aborodin@vmail.ru>, 2013-2015 |
---|
12 | |
---|
13 | This file is part of the Midnight Commander. |
---|
14 | |
---|
15 | The Midnight Commander is free software: you can redistribute it |
---|
16 | and/or modify it under the terms of the GNU General Public License as |
---|
17 | published by the Free Software Foundation, either version 3 of the License, |
---|
18 | or (at your option) any later version. |
---|
19 | |
---|
20 | The Midnight Commander is distributed in the hope that it will be useful, |
---|
21 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
22 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
23 | GNU General Public License for more details. |
---|
24 | |
---|
25 | You should have received a copy of the GNU General Public License |
---|
26 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
27 | */ |
---|
28 | |
---|
29 | #include <config.h> |
---|
30 | |
---|
31 | #include <stdlib.h> |
---|
32 | |
---|
33 | #include "lib/global.h" |
---|
34 | #include "lib/strutil.h" |
---|
35 | #include "lib/search.h" |
---|
36 | #include "lib/strescape.h" |
---|
37 | |
---|
38 | #include "internal.h" |
---|
39 | |
---|
40 | /*** global variables ****************************************************************************/ |
---|
41 | |
---|
42 | /*** file scope macro definitions ****************************************************************/ |
---|
43 | |
---|
44 | #define REPLACE_PREPARE_T_NOTHING_SPECIAL -1 |
---|
45 | #define REPLACE_PREPARE_T_REPLACE_FLAG -2 |
---|
46 | #define REPLACE_PREPARE_T_ESCAPE_SEQ -3 |
---|
47 | |
---|
48 | /*** file scope type declarations ****************************************************************/ |
---|
49 | |
---|
50 | typedef enum |
---|
51 | { |
---|
52 | REPLACE_T_NO_TRANSFORM = 0, |
---|
53 | REPLACE_T_UPP_TRANSFORM_CHAR = 1, |
---|
54 | REPLACE_T_LOW_TRANSFORM_CHAR = 2, |
---|
55 | REPLACE_T_UPP_TRANSFORM = 4, |
---|
56 | REPLACE_T_LOW_TRANSFORM = 8 |
---|
57 | } replace_transform_type_t; |
---|
58 | |
---|
59 | |
---|
60 | /*** file scope variables ************************************************************************/ |
---|
61 | |
---|
62 | /*** file scope functions ************************************************************************/ |
---|
63 | |
---|
64 | #ifndef SEARCH_TYPE_GLIB |
---|
65 | static gboolean |
---|
66 | mc_search__regex_str_append_if_special (GString * copy_to, const GString * regex_str, |
---|
67 | gsize * offset) |
---|
68 | { |
---|
69 | const char *special_chars[] = { |
---|
70 | "\\s", "\\S", |
---|
71 | "\\d", "\\D", |
---|
72 | "\\b", "\\B", |
---|
73 | "\\w", "\\W", |
---|
74 | "\\t", "\\n", |
---|
75 | "\\r", "\\f", |
---|
76 | "\\a", "\\e", |
---|
77 | "\\x", "\\X", |
---|
78 | "\\c", "\\C", |
---|
79 | "\\l", "\\L", |
---|
80 | "\\u", "\\U", |
---|
81 | "\\E", "\\Q", |
---|
82 | NULL |
---|
83 | }; |
---|
84 | |
---|
85 | char *tmp_regex_str; |
---|
86 | const char **spec_chr; |
---|
87 | |
---|
88 | tmp_regex_str = &(regex_str->str[*offset]); |
---|
89 | |
---|
90 | for (spec_chr = special_chars; *spec_chr != NULL; spec_chr++) |
---|
91 | { |
---|
92 | gsize spec_chr_len; |
---|
93 | |
---|
94 | spec_chr_len = strlen (*spec_chr); |
---|
95 | |
---|
96 | if (strncmp (tmp_regex_str, *spec_chr, spec_chr_len) == 0 |
---|
97 | && !strutils_is_char_escaped (regex_str->str, tmp_regex_str)) |
---|
98 | { |
---|
99 | if (strncmp ("\\x", *spec_chr, spec_chr_len) == 0) |
---|
100 | { |
---|
101 | if (tmp_regex_str[spec_chr_len] != '{') |
---|
102 | spec_chr_len += 2; |
---|
103 | else |
---|
104 | { |
---|
105 | while ((spec_chr_len < regex_str->len - *offset) |
---|
106 | && tmp_regex_str[spec_chr_len] != '}') |
---|
107 | spec_chr_len++; |
---|
108 | if (tmp_regex_str[spec_chr_len] == '}') |
---|
109 | spec_chr_len++; |
---|
110 | } |
---|
111 | } |
---|
112 | g_string_append_len (copy_to, tmp_regex_str, spec_chr_len); |
---|
113 | *offset += spec_chr_len; |
---|
114 | return TRUE; |
---|
115 | } |
---|
116 | } |
---|
117 | |
---|
118 | return FALSE; |
---|
119 | } |
---|
120 | |
---|
121 | /* --------------------------------------------------------------------------------------------- */ |
---|
122 | |
---|
123 | static void |
---|
124 | mc_search__cond_struct_new_regex_hex_add (const char *charset, GString * str_to, |
---|
125 | const char *one_char, gsize str_len) |
---|
126 | { |
---|
127 | // GString *upp, *low; |
---|
128 | gsize loop; |
---|
129 | |
---|
130 | // upp = mc_search__toupper_case_str (charset, one_char, str_len); |
---|
131 | // low = mc_search__tolower_case_str (charset, one_char, str_len); |
---|
132 | |
---|
133 | // for (loop = 0; loop < upp->len; loop++) |
---|
134 | for (loop = 0; loop < str_len; loop++) |
---|
135 | { |
---|
136 | gchar tmp_str[10 + 1]; /* longest content is "[\\x%02X\\x%02X]" */ |
---|
137 | gint tmp_len; |
---|
138 | |
---|
139 | // if (loop >= low->len || upp->str[loop] == low->str[loop]) |
---|
140 | // tmp_len = |
---|
141 | // g_snprintf (tmp_str, sizeof (tmp_str), "\\x%02X", (unsigned char) upp->str[loop]); |
---|
142 | // else |
---|
143 | // tmp_len = |
---|
144 | // g_snprintf (tmp_str, sizeof (tmp_str), "[\\x%02X\\x%02X]", |
---|
145 | // (unsigned char) upp->str[loop], (unsigned char) low->str[loop]); |
---|
146 | tmp_len = |
---|
147 | g_snprintf (tmp_str, sizeof (tmp_str), "\\x%02X", one_char [loop]); |
---|
148 | |
---|
149 | g_string_append_len (str_to, tmp_str, tmp_len); |
---|
150 | } |
---|
151 | |
---|
152 | // g_string_free (upp, TRUE); |
---|
153 | // g_string_free (low, TRUE); |
---|
154 | } |
---|
155 | |
---|
156 | /* --------------------------------------------------------------------------------------------- */ |
---|
157 | |
---|
158 | static void |
---|
159 | mc_search__cond_struct_new_regex_accum_append (const char *charset, GString * str_to, |
---|
160 | GString * str_from) |
---|
161 | { |
---|
162 | GString *recoded_part; |
---|
163 | gsize loop = 0; |
---|
164 | |
---|
165 | recoded_part = g_string_sized_new (32); |
---|
166 | |
---|
167 | while (loop < str_from->len) |
---|
168 | { |
---|
169 | gchar *one_char; |
---|
170 | gsize one_char_len; |
---|
171 | gboolean just_letters; |
---|
172 | |
---|
173 | one_char = |
---|
174 | mc_search__get_one_symbol (charset, &(str_from->str[loop]), |
---|
175 | min (str_from->len - loop, 6), &just_letters); |
---|
176 | one_char_len = strlen (one_char); |
---|
177 | |
---|
178 | if (one_char_len == 0) |
---|
179 | loop++; |
---|
180 | else |
---|
181 | { |
---|
182 | loop += one_char_len; |
---|
183 | |
---|
184 | if (just_letters) |
---|
185 | mc_search__cond_struct_new_regex_hex_add (charset, recoded_part, one_char, |
---|
186 | one_char_len); |
---|
187 | else |
---|
188 | g_string_append_len (recoded_part, one_char, one_char_len); |
---|
189 | } |
---|
190 | |
---|
191 | g_free (one_char); |
---|
192 | } |
---|
193 | |
---|
194 | g_string_append_len (str_to, recoded_part->str, recoded_part->len); |
---|
195 | g_string_free (recoded_part, TRUE); |
---|
196 | g_string_set_size (str_from, 0); |
---|
197 | } |
---|
198 | |
---|
199 | /* --------------------------------------------------------------------------------------------- */ |
---|
200 | |
---|
201 | static GString * |
---|
202 | mc_search__cond_struct_new_regex_ci_str (const char *charset, const GString * astr) |
---|
203 | { |
---|
204 | GString *accumulator, *spec_char, *ret_str; |
---|
205 | gsize loop; |
---|
206 | |
---|
207 | ret_str = g_string_sized_new (64); |
---|
208 | accumulator = g_string_sized_new (64); |
---|
209 | spec_char = g_string_sized_new (64); |
---|
210 | loop = 0; |
---|
211 | |
---|
212 | // while (loop <= astr->len) |
---|
213 | while (loop < astr->len) |
---|
214 | { |
---|
215 | if (mc_search__regex_str_append_if_special (spec_char, astr, &loop)) |
---|
216 | { |
---|
217 | mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator); |
---|
218 | g_string_append_len (ret_str, spec_char->str, spec_char->len); |
---|
219 | g_string_set_size (spec_char, 0); |
---|
220 | continue; |
---|
221 | } |
---|
222 | |
---|
223 | if (astr->str[loop] == '[' && !strutils_is_char_escaped (astr->str, &(astr->str[loop]))) |
---|
224 | { |
---|
225 | mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator); |
---|
226 | |
---|
227 | while (loop < astr->len && !(astr->str[loop] == ']' |
---|
228 | && !strutils_is_char_escaped (astr->str, |
---|
229 | &(astr->str[loop])))) |
---|
230 | { |
---|
231 | g_string_append_c (ret_str, astr->str[loop]); |
---|
232 | loop++; |
---|
233 | } |
---|
234 | |
---|
235 | g_string_append_c (ret_str, astr->str[loop]); |
---|
236 | loop++; |
---|
237 | continue; |
---|
238 | } |
---|
239 | /* |
---|
240 | TODO: handle [ and ] |
---|
241 | */ |
---|
242 | g_string_append_c (accumulator, astr->str[loop]); |
---|
243 | loop++; |
---|
244 | } |
---|
245 | mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator); |
---|
246 | |
---|
247 | g_string_free (accumulator, TRUE); |
---|
248 | g_string_free (spec_char, TRUE); |
---|
249 | |
---|
250 | return ret_str; |
---|
251 | } |
---|
252 | #endif /* !SEARCH_TYPE_GLIB */ |
---|
253 | |
---|
254 | /* --------------------------------------------------------------------------------------------- */ |
---|
255 | |
---|
256 | #ifdef SEARCH_TYPE_GLIB |
---|
257 | /* A thin wrapper above g_regex_match_full that makes sure the string passed |
---|
258 | * to it is valid UTF-8 (unless G_REGEX_RAW compile flag was set), as it is a |
---|
259 | * requirement by glib and it might crash otherwise. See: mc ticket 3449. |
---|
260 | * Be careful: there might be embedded NULs in the strings. */ |
---|
261 | static gboolean |
---|
262 | mc_search__g_regex_match_full_safe (const GRegex * regex, |
---|
263 | const gchar * string, |
---|
264 | gssize string_len, |
---|
265 | gint start_position, |
---|
266 | GRegexMatchFlags match_options, |
---|
267 | GMatchInfo ** match_info, GError ** error) |
---|
268 | { |
---|
269 | char *string_safe, *p, *end; |
---|
270 | gboolean ret; |
---|
271 | |
---|
272 | // The processing below causes problems when 0x00 in hex arguments. |
---|
273 | // So we always just call the match function with the input argument. |
---|
274 | |
---|
275 | // if ((g_regex_get_compile_flags (regex) & G_REGEX_RAW) |
---|
276 | // || g_utf8_validate (string, string_len, NULL)) |
---|
277 | { |
---|
278 | return g_regex_match_full (regex, string, string_len, start_position, match_options, |
---|
279 | match_info, error); |
---|
280 | } |
---|
281 | |
---|
282 | if (string_len < 0) |
---|
283 | { |
---|
284 | string_len = strlen (string); |
---|
285 | } |
---|
286 | |
---|
287 | /* Correctly handle embedded NULs while copying */ |
---|
288 | p = string_safe = g_malloc (string_len); |
---|
289 | memcpy (string_safe, string, string_len); |
---|
290 | end = p + string_len; |
---|
291 | |
---|
292 | while (p < end) |
---|
293 | { |
---|
294 | gunichar c = g_utf8_get_char_validated (p, -1); |
---|
295 | if (c != (gunichar) (-1) && c != (gunichar) (-2)) |
---|
296 | { |
---|
297 | p = g_utf8_next_char (p); |
---|
298 | } |
---|
299 | else |
---|
300 | { |
---|
301 | /* U+FFFD would be the proper choice, but then we'd have to |
---|
302 | maintain mapping between old and new offsets. |
---|
303 | So rather do a byte by byte replacement. */ |
---|
304 | *p++ = '\0'; |
---|
305 | } |
---|
306 | } |
---|
307 | |
---|
308 | ret = |
---|
309 | g_regex_match_full (regex, string_safe, string_len, start_position, match_options, |
---|
310 | match_info, error); |
---|
311 | g_free (string_safe); |
---|
312 | return ret; |
---|
313 | } |
---|
314 | #endif /* SEARCH_TYPE_GLIB */ |
---|
315 | |
---|
316 | /* --------------------------------------------------------------------------------------------- */ |
---|
317 | |
---|
318 | static mc_search__found_cond_t |
---|
319 | mc_search__regex_found_cond_one (mc_search_t * lc_mc_search, mc_search_regex_t * regex, |
---|
320 | GString * search_str) |
---|
321 | { |
---|
322 | #ifdef SEARCH_TYPE_GLIB |
---|
323 | GError *mcerror = NULL; |
---|
324 | |
---|
325 | if (!mc_search__g_regex_match_full_safe |
---|
326 | (regex, search_str->str, search_str->len, 0, G_REGEX_MATCH_NEWLINE_ANY, |
---|
327 | &lc_mc_search->regex_match_info, &mcerror)) |
---|
328 | { |
---|
329 | g_match_info_free (lc_mc_search->regex_match_info); |
---|
330 | lc_mc_search->regex_match_info = NULL; |
---|
331 | if (mcerror != NULL) |
---|
332 | { |
---|
333 | lc_mc_search->error = MC_SEARCH_E_REGEX; |
---|
334 | lc_mc_search->error_str = |
---|
335 | str_conv_gerror_message (mcerror, _("Regular expression error")); |
---|
336 | g_error_free (mcerror); |
---|
337 | return COND__FOUND_ERROR; |
---|
338 | } |
---|
339 | return COND__NOT_FOUND; |
---|
340 | } |
---|
341 | lc_mc_search->num_results = g_match_info_get_match_count (lc_mc_search->regex_match_info); |
---|
342 | #else /* SEARCH_TYPE_GLIB */ |
---|
343 | lc_mc_search->num_results = pcre_exec (regex, lc_mc_search->regex_match_info, |
---|
344 | search_str->str, search_str->len, 0, 0, |
---|
345 | lc_mc_search->iovector, MC_SEARCH__NUM_REPLACE_ARGS); |
---|
346 | if (lc_mc_search->num_results < 0) |
---|
347 | { |
---|
348 | return COND__NOT_FOUND; |
---|
349 | } |
---|
350 | #endif /* SEARCH_TYPE_GLIB */ |
---|
351 | return COND__FOUND_OK; |
---|
352 | |
---|
353 | } |
---|
354 | |
---|
355 | /* --------------------------------------------------------------------------------------------- */ |
---|
356 | |
---|
357 | static mc_search__found_cond_t |
---|
358 | mc_search__regex_found_cond (mc_search_t * lc_mc_search, GString * search_str) |
---|
359 | { |
---|
360 | gsize loop1; |
---|
361 | |
---|
362 | for (loop1 = 0; loop1 < lc_mc_search->conditions->len; loop1++) |
---|
363 | { |
---|
364 | mc_search_cond_t *mc_search_cond; |
---|
365 | mc_search__found_cond_t ret; |
---|
366 | |
---|
367 | mc_search_cond = (mc_search_cond_t *) g_ptr_array_index (lc_mc_search->conditions, loop1); |
---|
368 | |
---|
369 | if (!mc_search_cond->regex_handle) |
---|
370 | continue; |
---|
371 | |
---|
372 | ret = |
---|
373 | mc_search__regex_found_cond_one (lc_mc_search, mc_search_cond->regex_handle, |
---|
374 | search_str); |
---|
375 | if (ret != COND__NOT_FOUND) |
---|
376 | return ret; |
---|
377 | } |
---|
378 | return COND__NOT_ALL_FOUND; |
---|
379 | } |
---|
380 | |
---|
381 | /* --------------------------------------------------------------------------------------------- */ |
---|
382 | |
---|
383 | static int |
---|
384 | mc_search_regex__get_max_num_of_replace_tokens (const gchar * str, gsize len) |
---|
385 | { |
---|
386 | int max_token = 0; |
---|
387 | gsize loop; |
---|
388 | for (loop = 0; loop < len - 1; loop++) |
---|
389 | { |
---|
390 | if (str[loop] == '\\' && g_ascii_isdigit (str[loop + 1])) |
---|
391 | { |
---|
392 | if (strutils_is_char_escaped (str, &str[loop])) |
---|
393 | continue; |
---|
394 | if (max_token < str[loop + 1] - '0') |
---|
395 | max_token = str[loop + 1] - '0'; |
---|
396 | continue; |
---|
397 | } |
---|
398 | if (str[loop] == '$' && str[loop + 1] == '{') |
---|
399 | { |
---|
400 | gsize tmp_len; |
---|
401 | |
---|
402 | if (strutils_is_char_escaped (str, &str[loop])) |
---|
403 | continue; |
---|
404 | |
---|
405 | for (tmp_len = 0; |
---|
406 | loop + tmp_len + 2 < len && (str[loop + 2 + tmp_len] & (char) 0xf0) == 0x30; |
---|
407 | tmp_len++); |
---|
408 | |
---|
409 | if (str[loop + 2 + tmp_len] == '}') |
---|
410 | { |
---|
411 | int tmp_token; |
---|
412 | char *tmp_str; |
---|
413 | |
---|
414 | tmp_str = g_strndup (&str[loop + 2], tmp_len); |
---|
415 | tmp_token = atoi (tmp_str); |
---|
416 | if (max_token < tmp_token) |
---|
417 | max_token = tmp_token; |
---|
418 | g_free (tmp_str); |
---|
419 | } |
---|
420 | } |
---|
421 | } |
---|
422 | return max_token; |
---|
423 | } |
---|
424 | |
---|
425 | /* --------------------------------------------------------------------------------------------- */ |
---|
426 | |
---|
427 | static char * |
---|
428 | mc_search_regex__get_token_by_num (const mc_search_t * lc_mc_search, gsize lc_index) |
---|
429 | { |
---|
430 | int fnd_start = 0, fnd_end = 0; |
---|
431 | |
---|
432 | #ifdef SEARCH_TYPE_GLIB |
---|
433 | g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &fnd_start, &fnd_end); |
---|
434 | #else /* SEARCH_TYPE_GLIB */ |
---|
435 | fnd_start = lc_mc_search->iovector[lc_index * 2 + 0]; |
---|
436 | fnd_end = lc_mc_search->iovector[lc_index * 2 + 1]; |
---|
437 | #endif /* SEARCH_TYPE_GLIB */ |
---|
438 | |
---|
439 | if (fnd_end == fnd_start) |
---|
440 | return g_strdup (""); |
---|
441 | |
---|
442 | return g_strndup (lc_mc_search->regex_buffer->str + fnd_start, fnd_end - fnd_start); |
---|
443 | |
---|
444 | } |
---|
445 | |
---|
446 | /* --------------------------------------------------------------------------------------------- */ |
---|
447 | |
---|
448 | static gboolean |
---|
449 | mc_search_regex__replace_handle_esc_seq (const GString * replace_str, const gsize current_pos, |
---|
450 | gsize * skip_len, int *ret) |
---|
451 | { |
---|
452 | char *curr_str = &(replace_str->str[current_pos]); |
---|
453 | char c = curr_str[1]; |
---|
454 | |
---|
455 | if (replace_str->len > current_pos + 2) |
---|
456 | { |
---|
457 | if (c == '{') |
---|
458 | { |
---|
459 | for (*skip_len = 2; /* \{ */ |
---|
460 | current_pos + *skip_len < replace_str->len && curr_str[*skip_len] >= '0' |
---|
461 | && curr_str[*skip_len] <= '7'; (*skip_len)++) |
---|
462 | ; |
---|
463 | |
---|
464 | if (current_pos + *skip_len < replace_str->len && curr_str[*skip_len] == '}') |
---|
465 | { |
---|
466 | (*skip_len)++; |
---|
467 | *ret = REPLACE_PREPARE_T_ESCAPE_SEQ; |
---|
468 | return FALSE; |
---|
469 | } |
---|
470 | else |
---|
471 | { |
---|
472 | *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL; |
---|
473 | return TRUE; |
---|
474 | } |
---|
475 | } |
---|
476 | |
---|
477 | if (c == 'x') |
---|
478 | { |
---|
479 | *skip_len = 2; /* \x */ |
---|
480 | c = curr_str[2]; |
---|
481 | if (c == '{') |
---|
482 | { |
---|
483 | for (*skip_len = 3; /* \x{ */ |
---|
484 | current_pos + *skip_len < replace_str->len |
---|
485 | && g_ascii_isxdigit ((guchar) curr_str[*skip_len]); (*skip_len)++) |
---|
486 | ; |
---|
487 | |
---|
488 | if (current_pos + *skip_len < replace_str->len && curr_str[*skip_len] == '}') |
---|
489 | { |
---|
490 | (*skip_len)++; |
---|
491 | *ret = REPLACE_PREPARE_T_ESCAPE_SEQ; |
---|
492 | return FALSE; |
---|
493 | } |
---|
494 | else |
---|
495 | { |
---|
496 | *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL; |
---|
497 | return TRUE; |
---|
498 | } |
---|
499 | } |
---|
500 | else if (!g_ascii_isxdigit ((guchar) c)) |
---|
501 | { |
---|
502 | *skip_len = 2; /* \x without number behind */ |
---|
503 | *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL; |
---|
504 | return FALSE; |
---|
505 | } |
---|
506 | else |
---|
507 | { |
---|
508 | c = curr_str[3]; |
---|
509 | if (!g_ascii_isxdigit ((guchar) c)) |
---|
510 | *skip_len = 3; /* \xH */ |
---|
511 | else |
---|
512 | *skip_len = 4; /* \xHH */ |
---|
513 | *ret = REPLACE_PREPARE_T_ESCAPE_SEQ; |
---|
514 | return FALSE; |
---|
515 | } |
---|
516 | } |
---|
517 | } |
---|
518 | |
---|
519 | if (strchr ("ntvbrfa", c) != NULL) |
---|
520 | { |
---|
521 | *skip_len = 2; |
---|
522 | *ret = REPLACE_PREPARE_T_ESCAPE_SEQ; |
---|
523 | return FALSE; |
---|
524 | } |
---|
525 | return TRUE; |
---|
526 | } |
---|
527 | |
---|
528 | /* --------------------------------------------------------------------------------------------- */ |
---|
529 | |
---|
530 | static int |
---|
531 | mc_search_regex__process_replace_str (const GString * replace_str, const gsize current_pos, |
---|
532 | gsize * skip_len, replace_transform_type_t * replace_flags) |
---|
533 | { |
---|
534 | int ret = -1; |
---|
535 | const char *curr_str = &(replace_str->str[current_pos]); |
---|
536 | |
---|
537 | if (current_pos > replace_str->len) |
---|
538 | return REPLACE_PREPARE_T_NOTHING_SPECIAL; |
---|
539 | |
---|
540 | *skip_len = 0; |
---|
541 | |
---|
542 | if (replace_str->len > current_pos + 2 && curr_str[0] == '$' && curr_str[1] == '{' |
---|
543 | && (curr_str[2] & (char) 0xf0) == 0x30) |
---|
544 | { |
---|
545 | char *tmp_str; |
---|
546 | |
---|
547 | if (strutils_is_char_escaped (replace_str->str, curr_str)) |
---|
548 | { |
---|
549 | *skip_len = 1; |
---|
550 | return REPLACE_PREPARE_T_NOTHING_SPECIAL; |
---|
551 | } |
---|
552 | |
---|
553 | for (*skip_len = 0; |
---|
554 | current_pos + *skip_len + 2 < replace_str->len |
---|
555 | && (curr_str[2 + *skip_len] & (char) 0xf0) == 0x30; (*skip_len)++) |
---|
556 | ; |
---|
557 | |
---|
558 | if (curr_str[2 + *skip_len] != '}') |
---|
559 | return REPLACE_PREPARE_T_NOTHING_SPECIAL; |
---|
560 | |
---|
561 | tmp_str = g_strndup (curr_str + 2, *skip_len); |
---|
562 | if (tmp_str == NULL) |
---|
563 | return REPLACE_PREPARE_T_NOTHING_SPECIAL; |
---|
564 | |
---|
565 | ret = atoi (tmp_str); |
---|
566 | g_free (tmp_str); |
---|
567 | |
---|
568 | *skip_len += 3; /* ${} */ |
---|
569 | return ret; /* capture buffer index >= 0 */ |
---|
570 | } |
---|
571 | |
---|
572 | if (curr_str[0] == '\\' && replace_str->len > current_pos + 1) |
---|
573 | { |
---|
574 | if (strutils_is_char_escaped (replace_str->str, curr_str)) |
---|
575 | { |
---|
576 | *skip_len = 1; |
---|
577 | return REPLACE_PREPARE_T_NOTHING_SPECIAL; |
---|
578 | } |
---|
579 | |
---|
580 | if (g_ascii_isdigit (curr_str[1])) |
---|
581 | { |
---|
582 | ret = g_ascii_digit_value (curr_str[1]); /* capture buffer index >= 0 */ |
---|
583 | *skip_len = 2; /* \\ and one digit */ |
---|
584 | return ret; |
---|
585 | } |
---|
586 | |
---|
587 | if (!mc_search_regex__replace_handle_esc_seq (replace_str, current_pos, skip_len, &ret)) |
---|
588 | return ret; |
---|
589 | |
---|
590 | ret = REPLACE_PREPARE_T_REPLACE_FLAG; |
---|
591 | *skip_len += 2; |
---|
592 | |
---|
593 | switch (curr_str[1]) |
---|
594 | { |
---|
595 | case 'U': |
---|
596 | *replace_flags |= REPLACE_T_UPP_TRANSFORM; |
---|
597 | *replace_flags &= ~REPLACE_T_LOW_TRANSFORM; |
---|
598 | break; |
---|
599 | case 'u': |
---|
600 | *replace_flags |= REPLACE_T_UPP_TRANSFORM_CHAR; |
---|
601 | break; |
---|
602 | case 'L': |
---|
603 | *replace_flags |= REPLACE_T_LOW_TRANSFORM; |
---|
604 | *replace_flags &= ~REPLACE_T_UPP_TRANSFORM; |
---|
605 | break; |
---|
606 | case 'l': |
---|
607 | *replace_flags |= REPLACE_T_LOW_TRANSFORM_CHAR; |
---|
608 | break; |
---|
609 | case 'E': |
---|
610 | *replace_flags = REPLACE_T_NO_TRANSFORM; |
---|
611 | break; |
---|
612 | default: |
---|
613 | ret = REPLACE_PREPARE_T_NOTHING_SPECIAL; |
---|
614 | break; |
---|
615 | } |
---|
616 | } |
---|
617 | return ret; |
---|
618 | } |
---|
619 | |
---|
620 | /* --------------------------------------------------------------------------------------------- */ |
---|
621 | |
---|
622 | static void |
---|
623 | mc_search_regex__process_append_str (GString * dest_str, const char *from, gsize len, |
---|
624 | replace_transform_type_t * replace_flags) |
---|
625 | { |
---|
626 | gsize loop; |
---|
627 | gsize char_len; |
---|
628 | |
---|
629 | if (len == (gsize) (-1)) |
---|
630 | len = strlen (from); |
---|
631 | |
---|
632 | if (*replace_flags == REPLACE_T_NO_TRANSFORM) |
---|
633 | { |
---|
634 | g_string_append_len (dest_str, from, len); |
---|
635 | return; |
---|
636 | } |
---|
637 | |
---|
638 | for (loop = 0; loop < len; loop += char_len) |
---|
639 | { |
---|
640 | GString *tmp_string = NULL; |
---|
641 | char *tmp_str; |
---|
642 | |
---|
643 | tmp_str = mc_search__get_one_symbol (NULL, from + loop, len - loop, NULL); |
---|
644 | char_len = strlen (tmp_str); |
---|
645 | |
---|
646 | if ((*replace_flags & REPLACE_T_UPP_TRANSFORM_CHAR) != 0) |
---|
647 | { |
---|
648 | *replace_flags &= ~REPLACE_T_UPP_TRANSFORM_CHAR; |
---|
649 | tmp_string = mc_search__toupper_case_str (NULL, tmp_str, char_len); |
---|
650 | g_string_append_len (dest_str, tmp_string->str, tmp_string->len); |
---|
651 | g_string_free (tmp_string, TRUE); |
---|
652 | } |
---|
653 | else if ((*replace_flags & REPLACE_T_LOW_TRANSFORM_CHAR) != 0) |
---|
654 | { |
---|
655 | *replace_flags &= ~REPLACE_T_LOW_TRANSFORM_CHAR; |
---|
656 | tmp_string = mc_search__tolower_case_str (NULL, tmp_str, char_len); |
---|
657 | g_string_append_len (dest_str, tmp_string->str, tmp_string->len); |
---|
658 | g_string_free (tmp_string, TRUE); |
---|
659 | } |
---|
660 | else if ((*replace_flags & REPLACE_T_UPP_TRANSFORM) != 0) |
---|
661 | { |
---|
662 | tmp_string = mc_search__toupper_case_str (NULL, tmp_str, char_len); |
---|
663 | g_string_append_len (dest_str, tmp_string->str, tmp_string->len); |
---|
664 | g_string_free (tmp_string, TRUE); |
---|
665 | } |
---|
666 | else if ((*replace_flags & REPLACE_T_LOW_TRANSFORM) != 0) |
---|
667 | { |
---|
668 | tmp_string = mc_search__tolower_case_str (NULL, tmp_str, char_len); |
---|
669 | g_string_append_len (dest_str, tmp_string->str, tmp_string->len); |
---|
670 | g_string_free (tmp_string, TRUE); |
---|
671 | } |
---|
672 | |
---|
673 | g_free (tmp_str); |
---|
674 | } |
---|
675 | } |
---|
676 | |
---|
677 | /* --------------------------------------------------------------------------------------------- */ |
---|
678 | |
---|
679 | static void |
---|
680 | mc_search_regex__process_escape_sequence (GString * dest_str, const char *from, gsize len, |
---|
681 | replace_transform_type_t * replace_flags, |
---|
682 | gboolean is_utf8) |
---|
683 | { |
---|
684 | gsize i = 0; |
---|
685 | unsigned int c = 0; |
---|
686 | char b; |
---|
687 | |
---|
688 | if (len == (gsize) (-1)) |
---|
689 | len = strlen (from); |
---|
690 | if (len == 0) |
---|
691 | return; |
---|
692 | |
---|
693 | if (from[i] == '{') |
---|
694 | i++; |
---|
695 | if (i >= len) |
---|
696 | return; |
---|
697 | |
---|
698 | if (from[i] == 'x') |
---|
699 | { |
---|
700 | i++; |
---|
701 | if (i < len && from[i] == '{') |
---|
702 | i++; |
---|
703 | for (; i < len; i++) |
---|
704 | { |
---|
705 | if (from[i] >= '0' && from[i] <= '9') |
---|
706 | c = c * 16 + from[i] - '0'; |
---|
707 | else if (from[i] >= 'a' && from[i] <= 'f') |
---|
708 | c = c * 16 + 10 + from[i] - 'a'; |
---|
709 | else if (from[i] >= 'A' && from[i] <= 'F') |
---|
710 | c = c * 16 + 10 + from[i] - 'A'; |
---|
711 | else |
---|
712 | break; |
---|
713 | } |
---|
714 | } |
---|
715 | else if (from[i] >= '0' && from[i] <= '7') |
---|
716 | for (; i < len && from[i] >= '0' && from[i] <= '7'; i++) |
---|
717 | c = c * 8 + from[i] - '0'; |
---|
718 | else |
---|
719 | { |
---|
720 | switch (from[i]) |
---|
721 | { |
---|
722 | case 'n': |
---|
723 | c = '\n'; |
---|
724 | break; |
---|
725 | case 't': |
---|
726 | c = '\t'; |
---|
727 | break; |
---|
728 | case 'v': |
---|
729 | c = '\v'; |
---|
730 | break; |
---|
731 | case 'b': |
---|
732 | c = '\b'; |
---|
733 | break; |
---|
734 | case 'r': |
---|
735 | c = '\r'; |
---|
736 | break; |
---|
737 | case 'f': |
---|
738 | c = '\f'; |
---|
739 | break; |
---|
740 | case 'a': |
---|
741 | c = '\a'; |
---|
742 | break; |
---|
743 | default: |
---|
744 | mc_search_regex__process_append_str (dest_str, from, len, replace_flags); |
---|
745 | return; |
---|
746 | } |
---|
747 | } |
---|
748 | |
---|
749 | if (c < 0x80 || !is_utf8) |
---|
750 | g_string_append_c (dest_str, (char) c); |
---|
751 | else if (c < 0x800) |
---|
752 | { |
---|
753 | b = 0xC0 | (c >> 6); |
---|
754 | g_string_append_c (dest_str, b); |
---|
755 | b = 0x80 | (c & 0x3F); |
---|
756 | g_string_append_c (dest_str, b); |
---|
757 | } |
---|
758 | else if (c < 0x10000) |
---|
759 | { |
---|
760 | b = 0xE0 | (c >> 12); |
---|
761 | g_string_append_c (dest_str, b); |
---|
762 | b = 0x80 | ((c >> 6) & 0x3F); |
---|
763 | g_string_append_c (dest_str, b); |
---|
764 | b = 0x80 | (c & 0x3F); |
---|
765 | g_string_append_c (dest_str, b); |
---|
766 | } |
---|
767 | else if (c < 0x10FFFF) |
---|
768 | { |
---|
769 | b = 0xF0 | (c >> 16); |
---|
770 | g_string_append_c (dest_str, b); |
---|
771 | b = 0x80 | ((c >> 12) & 0x3F); |
---|
772 | g_string_append_c (dest_str, b); |
---|
773 | b = 0x80 | ((c >> 6) & 0x3F); |
---|
774 | g_string_append_c (dest_str, b); |
---|
775 | b = 0x80 | (c & 0x3F); |
---|
776 | g_string_append_c (dest_str, b); |
---|
777 | } |
---|
778 | } |
---|
779 | |
---|
780 | /* --------------------------------------------------------------------------------------------- */ |
---|
781 | /** |
---|
782 | * Get regex flags for compilation of expressions. |
---|
783 | * @param charset the charset |
---|
784 | * |
---|
785 | * @return regex flags |
---|
786 | */ |
---|
787 | |
---|
788 | static GRegexCompileFlags |
---|
789 | mc_search__regex_get_compile_flags (const char *charset, gboolean is_case_sensitive) |
---|
790 | { |
---|
791 | GRegexCompileFlags g_regex_options = G_REGEX_OPTIMIZE | G_REGEX_DOTALL; |
---|
792 | |
---|
793 | if (!(mc_global.utf8_display && str_isutf8 (charset))) |
---|
794 | g_regex_options |= G_REGEX_RAW; |
---|
795 | |
---|
796 | if (!is_case_sensitive) |
---|
797 | g_regex_options |= G_REGEX_CASELESS; |
---|
798 | |
---|
799 | return g_regex_options; |
---|
800 | } |
---|
801 | |
---|
802 | /* --------------------------------------------------------------------------------------------- */ |
---|
803 | /*** public functions ****************************************************************************/ |
---|
804 | /* --------------------------------------------------------------------------------------------- */ |
---|
805 | |
---|
806 | void |
---|
807 | mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * lc_mc_search, |
---|
808 | mc_search_cond_t * mc_search_cond) |
---|
809 | { |
---|
810 | if (lc_mc_search->whole_words && !lc_mc_search->is_entire_line) |
---|
811 | { |
---|
812 | /* NOTE: \b as word boundary doesn't allow search |
---|
813 | * whole words with non-ASCII symbols. |
---|
814 | * Update: Is it still true nowadays? Probably not. #2396, #3524 */ |
---|
815 | g_string_prepend (mc_search_cond->str, "(?<![\\p{L}\\p{N}_])"); |
---|
816 | g_string_append (mc_search_cond->str, "(?![\\p{L}\\p{N}_])"); |
---|
817 | } |
---|
818 | |
---|
819 | { |
---|
820 | #ifdef SEARCH_TYPE_GLIB |
---|
821 | GError *mcerror = NULL; |
---|
822 | |
---|
823 | GRegexCompileFlags CompileFlags; |
---|
824 | |
---|
825 | CompileFlags = mc_search__regex_get_compile_flags (charset, lc_mc_search->is_case_sensitive); |
---|
826 | if (lc_mc_search->search_type == MC_SEARCH_T_HEX) // if hex mode search, |
---|
827 | CompileFlags |= G_REGEX_RAW; // use raw mode here |
---|
828 | |
---|
829 | mc_search_cond->regex_handle = |
---|
830 | g_regex_new (mc_search_cond->str->str, |
---|
831 | CompileFlags, |
---|
832 | 0, |
---|
833 | &mcerror); |
---|
834 | |
---|
835 | if (mcerror != NULL) |
---|
836 | { |
---|
837 | lc_mc_search->error = MC_SEARCH_E_REGEX_COMPILE; |
---|
838 | lc_mc_search->error_str = |
---|
839 | str_conv_gerror_message (mcerror, _("Regular expression error")); |
---|
840 | g_error_free (mcerror); |
---|
841 | return; |
---|
842 | } |
---|
843 | #else /* SEARCH_TYPE_GLIB */ |
---|
844 | const char *error; |
---|
845 | int erroffset; |
---|
846 | int pcre_options = PCRE_EXTRA | PCRE_MULTILINE; |
---|
847 | |
---|
848 | if (str_isutf8 (charset) && mc_global.utf8_display) |
---|
849 | { |
---|
850 | pcre_options |= PCRE_UTF8; |
---|
851 | if (!lc_mc_search->is_case_sensitive) |
---|
852 | pcre_options |= PCRE_CASELESS; |
---|
853 | } |
---|
854 | else |
---|
855 | { |
---|
856 | if (!lc_mc_search->is_case_sensitive) |
---|
857 | { |
---|
858 | GString *tmp; |
---|
859 | |
---|
860 | tmp = mc_search_cond->str; |
---|
861 | mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp); |
---|
862 | g_string_free (tmp, TRUE); |
---|
863 | } |
---|
864 | } |
---|
865 | |
---|
866 | mc_search_cond->regex_handle = |
---|
867 | pcre_compile (mc_search_cond->str->str, pcre_options, &error, &erroffset, NULL); |
---|
868 | if (mc_search_cond->regex_handle == NULL) |
---|
869 | { |
---|
870 | lc_mc_search->error = MC_SEARCH_E_REGEX_COMPILE; |
---|
871 | lc_mc_search->error_str = g_strdup (error); |
---|
872 | return; |
---|
873 | } |
---|
874 | lc_mc_search->regex_match_info = pcre_study (mc_search_cond->regex_handle, 0, &error); |
---|
875 | if (lc_mc_search->regex_match_info == NULL) |
---|
876 | { |
---|
877 | if (error != NULL) |
---|
878 | { |
---|
879 | lc_mc_search->error = MC_SEARCH_E_REGEX_COMPILE; |
---|
880 | lc_mc_search->error_str = g_strdup (error); |
---|
881 | g_free (mc_search_cond->regex_handle); |
---|
882 | mc_search_cond->regex_handle = NULL; |
---|
883 | return; |
---|
884 | } |
---|
885 | } |
---|
886 | #endif /* SEARCH_TYPE_GLIB */ |
---|
887 | } |
---|
888 | |
---|
889 | lc_mc_search->is_utf8 = str_isutf8 (charset); |
---|
890 | } |
---|
891 | |
---|
892 | /* --------------------------------------------------------------------------------------------- */ |
---|
893 | |
---|
894 | gboolean |
---|
895 | mc_search__run_regex (mc_search_t * lc_mc_search, const void *user_data, |
---|
896 | gsize start_search, gsize end_search, gsize * found_len) |
---|
897 | { |
---|
898 | mc_search_cbret_t ret = MC_SEARCH_CB_ABORT; |
---|
899 | gsize current_pos, virtual_pos; |
---|
900 | gint start_pos; |
---|
901 | gint end_pos; |
---|
902 | |
---|
903 | if (lc_mc_search->regex_buffer != NULL) |
---|
904 | g_string_free (lc_mc_search->regex_buffer, TRUE); |
---|
905 | |
---|
906 | lc_mc_search->regex_buffer = g_string_sized_new (64); |
---|
907 | |
---|
908 | virtual_pos = current_pos = start_search; |
---|
909 | while (virtual_pos <= end_search) |
---|
910 | { |
---|
911 | g_string_set_size (lc_mc_search->regex_buffer, 0); |
---|
912 | lc_mc_search->start_buffer = current_pos; |
---|
913 | |
---|
914 | if (lc_mc_search->search_fn != NULL) |
---|
915 | { |
---|
916 | while (TRUE) |
---|
917 | { |
---|
918 | int current_chr = '\n'; /* stop search symbol */ |
---|
919 | |
---|
920 | ret = lc_mc_search->search_fn (user_data, current_pos, ¤t_chr); |
---|
921 | |
---|
922 | if (ret == MC_SEARCH_CB_ABORT) |
---|
923 | break; |
---|
924 | |
---|
925 | if (ret == MC_SEARCH_CB_INVALID) |
---|
926 | continue; |
---|
927 | |
---|
928 | current_pos++; |
---|
929 | |
---|
930 | if (ret == MC_SEARCH_CB_SKIP) |
---|
931 | continue; |
---|
932 | |
---|
933 | virtual_pos++; |
---|
934 | |
---|
935 | g_string_append_c (lc_mc_search->regex_buffer, (char) current_chr); |
---|
936 | |
---|
937 | if ((char) current_chr == '\n' || virtual_pos > end_search) |
---|
938 | break; |
---|
939 | } |
---|
940 | } |
---|
941 | else |
---|
942 | { |
---|
943 | /* optimization for standard case (for search from file manager) |
---|
944 | * where there is no MC_SEARCH_CB_INVALID or MC_SEARCH_CB_SKIP |
---|
945 | * return codes, so we can copy line at regex buffer all at once |
---|
946 | */ |
---|
947 | while (TRUE) |
---|
948 | { |
---|
949 | char current_chr; |
---|
950 | |
---|
951 | current_chr = ((char *) user_data)[current_pos]; |
---|
952 | if (current_chr == '\0') |
---|
953 | break; |
---|
954 | |
---|
955 | current_pos++; |
---|
956 | |
---|
957 | if (current_chr == '\n' || current_pos > end_search) |
---|
958 | break; |
---|
959 | } |
---|
960 | |
---|
961 | /* use virtual_pos as index of start of current chunk */ |
---|
962 | g_string_append_len (lc_mc_search->regex_buffer, (char *) user_data + virtual_pos, |
---|
963 | current_pos - virtual_pos); |
---|
964 | virtual_pos = current_pos; |
---|
965 | } |
---|
966 | |
---|
967 | switch (mc_search__regex_found_cond (lc_mc_search, lc_mc_search->regex_buffer)) |
---|
968 | { |
---|
969 | case COND__FOUND_OK: |
---|
970 | #ifdef SEARCH_TYPE_GLIB |
---|
971 | g_match_info_fetch_pos (lc_mc_search->regex_match_info, 0, &start_pos, &end_pos); |
---|
972 | #else /* SEARCH_TYPE_GLIB */ |
---|
973 | start_pos = lc_mc_search->iovector[0]; |
---|
974 | end_pos = lc_mc_search->iovector[1]; |
---|
975 | #endif /* SEARCH_TYPE_GLIB */ |
---|
976 | if (found_len != NULL) |
---|
977 | *found_len = end_pos - start_pos; |
---|
978 | lc_mc_search->normal_offset = lc_mc_search->start_buffer + start_pos; |
---|
979 | return TRUE; |
---|
980 | case COND__NOT_ALL_FOUND: |
---|
981 | break; |
---|
982 | default: |
---|
983 | g_string_free (lc_mc_search->regex_buffer, TRUE); |
---|
984 | lc_mc_search->regex_buffer = NULL; |
---|
985 | return FALSE; |
---|
986 | } |
---|
987 | |
---|
988 | if ((lc_mc_search->update_fn != NULL) && |
---|
989 | ((lc_mc_search->update_fn) (user_data, current_pos) == MC_SEARCH_CB_ABORT)) |
---|
990 | ret = MC_SEARCH_CB_ABORT; |
---|
991 | |
---|
992 | if (ret == MC_SEARCH_CB_ABORT) |
---|
993 | break; |
---|
994 | } |
---|
995 | |
---|
996 | g_string_free (lc_mc_search->regex_buffer, TRUE); |
---|
997 | lc_mc_search->regex_buffer = NULL; |
---|
998 | lc_mc_search->error = MC_SEARCH_E_NOTFOUND; |
---|
999 | |
---|
1000 | if (ret != MC_SEARCH_CB_ABORT) |
---|
1001 | lc_mc_search->error_str = g_strdup (_(STR_E_NOTFOUND)); |
---|
1002 | else |
---|
1003 | lc_mc_search->error_str = NULL; |
---|
1004 | |
---|
1005 | return FALSE; |
---|
1006 | } |
---|
1007 | |
---|
1008 | /* --------------------------------------------------------------------------------------------- */ |
---|
1009 | |
---|
1010 | GString * |
---|
1011 | mc_search_regex_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str) |
---|
1012 | { |
---|
1013 | GString *ret; |
---|
1014 | |
---|
1015 | int num_replace_tokens; |
---|
1016 | gsize loop; |
---|
1017 | gsize prev = 0; |
---|
1018 | replace_transform_type_t replace_flags = REPLACE_T_NO_TRANSFORM; |
---|
1019 | |
---|
1020 | num_replace_tokens = |
---|
1021 | mc_search_regex__get_max_num_of_replace_tokens (replace_str->str, replace_str->len); |
---|
1022 | |
---|
1023 | if (lc_mc_search->num_results < 0) |
---|
1024 | return g_string_new_len (replace_str->str, replace_str->len); |
---|
1025 | |
---|
1026 | if (num_replace_tokens > lc_mc_search->num_results - 1 |
---|
1027 | || num_replace_tokens > MC_SEARCH__NUM_REPLACE_ARGS) |
---|
1028 | { |
---|
1029 | lc_mc_search->error = MC_SEARCH_E_REGEX_REPLACE; |
---|
1030 | lc_mc_search->error_str = g_strdup (_(STR_E_RPL_NOT_EQ_TO_FOUND)); |
---|
1031 | return NULL; |
---|
1032 | } |
---|
1033 | |
---|
1034 | ret = g_string_sized_new (64); |
---|
1035 | |
---|
1036 | for (loop = 0; loop < replace_str->len - 1; loop++) |
---|
1037 | { |
---|
1038 | int lc_index; |
---|
1039 | gchar *tmp_str; |
---|
1040 | gsize len = 0; |
---|
1041 | |
---|
1042 | lc_index = mc_search_regex__process_replace_str (replace_str, loop, &len, &replace_flags); |
---|
1043 | |
---|
1044 | if (lc_index == REPLACE_PREPARE_T_NOTHING_SPECIAL) |
---|
1045 | { |
---|
1046 | if (len != 0) |
---|
1047 | { |
---|
1048 | mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev, |
---|
1049 | &replace_flags); |
---|
1050 | mc_search_regex__process_append_str (ret, replace_str->str + loop + 1, len - 1, |
---|
1051 | &replace_flags); |
---|
1052 | prev = loop + len; |
---|
1053 | loop = prev - 1; /* prepare to loop++ */ |
---|
1054 | } |
---|
1055 | |
---|
1056 | continue; |
---|
1057 | } |
---|
1058 | |
---|
1059 | if (lc_index == REPLACE_PREPARE_T_REPLACE_FLAG) |
---|
1060 | { |
---|
1061 | if (loop != 0) |
---|
1062 | mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev, |
---|
1063 | &replace_flags); |
---|
1064 | prev = loop + len; |
---|
1065 | loop = prev - 1; /* prepare to loop++ */ |
---|
1066 | continue; |
---|
1067 | } |
---|
1068 | |
---|
1069 | /* escape sequence */ |
---|
1070 | if (lc_index == REPLACE_PREPARE_T_ESCAPE_SEQ) |
---|
1071 | { |
---|
1072 | mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev, |
---|
1073 | &replace_flags); |
---|
1074 | /* call process_escape_sequence without starting '\\' */ |
---|
1075 | mc_search_regex__process_escape_sequence (ret, replace_str->str + loop + 1, len - 1, |
---|
1076 | &replace_flags, lc_mc_search->is_utf8); |
---|
1077 | prev = loop + len; |
---|
1078 | loop = prev - 1; /* prepare to loop++ */ |
---|
1079 | continue; |
---|
1080 | } |
---|
1081 | |
---|
1082 | /* invalid capture buffer number */ |
---|
1083 | if (lc_index > lc_mc_search->num_results) |
---|
1084 | { |
---|
1085 | g_string_free (ret, TRUE); |
---|
1086 | lc_mc_search->error = MC_SEARCH_E_REGEX_REPLACE; |
---|
1087 | lc_mc_search->error_str = g_strdup_printf (_(STR_E_RPL_INVALID_TOKEN), lc_index); |
---|
1088 | return NULL; |
---|
1089 | } |
---|
1090 | |
---|
1091 | tmp_str = mc_search_regex__get_token_by_num (lc_mc_search, lc_index); |
---|
1092 | |
---|
1093 | if (loop != 0) |
---|
1094 | mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev, |
---|
1095 | &replace_flags); |
---|
1096 | |
---|
1097 | mc_search_regex__process_append_str (ret, tmp_str, -1, &replace_flags); |
---|
1098 | g_free (tmp_str); |
---|
1099 | |
---|
1100 | prev = loop + len; |
---|
1101 | loop = prev - 1; /* prepare to loop++ */ |
---|
1102 | } |
---|
1103 | |
---|
1104 | mc_search_regex__process_append_str (ret, replace_str->str + prev, replace_str->len - prev, |
---|
1105 | &replace_flags); |
---|
1106 | |
---|
1107 | return ret; |
---|
1108 | } |
---|