Ticket #3449: mc-3449-glib-crashes-on-invalid-utf8-v2.patch

File mc-3449-glib-crashes-on-invalid-utf8-v2.patch, 2.9 KB (added by egmont, 9 years ago)

Fix, v2

  • lib/search/regex.c

    diff --git a/lib/search/regex.c b/lib/search/regex.c
    index f6eb24e..53e3b6f 100644
    a b mc_search__cond_struct_new_regex_ci_str (const char *charset, const GString * as 
    249249 
    250250/* --------------------------------------------------------------------------------------------- */ 
    251251 
     252#ifdef SEARCH_TYPE_GLIB 
     253/* Glib doesn't like invalid UTF-8 so sanitize it first: ticket 3449. 
     254 * Be careful: there might be embedded NULs in the strings. */ 
     255static gboolean 
     256mc_search__g_regex_match_full_safe (const GRegex * regex, 
     257                                    const gchar * string, 
     258                                    gssize string_len, 
     259                                    gint start_position, 
     260                                    GRegexMatchFlags match_options, 
     261                                    GMatchInfo ** match_info, GError ** error) 
     262{ 
     263    char *string_safe, *p, *end; 
     264    gboolean ret; 
     265 
     266    if ((g_regex_get_compile_flags (regex) & G_REGEX_RAW) 
     267        || g_utf8_validate (string, string_len, NULL)) 
     268    { 
     269        return g_regex_match_full (regex, string, string_len, start_position, match_options, 
     270                                   match_info, error); 
     271    } 
     272 
     273    if (string_len < 0) 
     274    { 
     275        string_len = strlen (string); 
     276    } 
     277    p = string_safe = g_strndup (string, string_len); 
     278    end = p + string_len; 
     279 
     280    while (p < end) 
     281    { 
     282        gunichar c = g_utf8_get_char_validated (p, -1); 
     283        if (c != (gunichar) (-1) && c != (gunichar) (-2)) 
     284        { 
     285            p = g_utf8_next_char (p); 
     286        } 
     287        else 
     288        { 
     289            /* U+FFFD would be the proper choice, but then we'd have to 
     290               maintain mapping between old and new offsets. 
     291               So rather do a byte by byte replacement. */ 
     292            *p++ = '\0'; 
     293        } 
     294    } 
     295 
     296    ret = 
     297        g_regex_match_full (regex, string_safe, string_len, start_position, match_options, 
     298                            match_info, error); 
     299    g_free (string_safe); 
     300    return ret; 
     301} 
     302#endif /* SEARCH_TYPE_GLIB */ 
     303 
     304/* --------------------------------------------------------------------------------------------- */ 
     305 
    252306static mc_search__found_cond_t 
    253307mc_search__regex_found_cond_one (mc_search_t * lc_mc_search, mc_search_regex_t * regex, 
    254308                                 GString * search_str) 
    mc_search__regex_found_cond_one (mc_search_t * lc_mc_search, mc_search_regex_t * 
    256310#ifdef SEARCH_TYPE_GLIB 
    257311    GError *mcerror = NULL; 
    258312 
    259     if (!g_regex_match_full (regex, search_str->str, search_str->len, 0, G_REGEX_MATCH_NEWLINE_ANY, 
    260                              &lc_mc_search->regex_match_info, &mcerror)) 
     313    if (!mc_search__g_regex_match_full_safe 
     314        (regex, search_str->str, search_str->len, 0, G_REGEX_MATCH_NEWLINE_ANY, 
     315         &lc_mc_search->regex_match_info, &mcerror)) 
    261316    { 
    262317        g_match_info_free (lc_mc_search->regex_match_info); 
    263318        lc_mc_search->regex_match_info = NULL;