Ticket #3449: mc-3449-glib-crashes-on-invalid-utf8-v3.patch

File mc-3449-glib-crashes-on-invalid-utf8-v3.patch, 3.1 KB (added by egmont, 9 years ago)

Fix, v3

  • lib/search/regex.c

    diff --git a/lib/search/regex.c b/lib/search/regex.c
    index f6eb24e..39975b8 100644
    a b mc_search__cond_struct_new_regex_ci_str (const char *charset, const GString * as 
    249249 
    250250/* --------------------------------------------------------------------------------------------- */ 
    251251 
     252#ifdef SEARCH_TYPE_GLIB 
     253/* A thin wrapper above g_regex_match_full that makes sure the string passed 
     254 * to it is valid UTF-8 (unless G_REGEX_RAW compile flag was set), as it is a 
     255 * requirement by glib and it might crash otherwise. See: mc ticket 3449. 
     256 * Be careful: there might be embedded NULs in the strings. */ 
     257static gboolean 
     258mc_search__g_regex_match_full_safe (const GRegex * regex, 
     259                                    const gchar * string, 
     260                                    gssize string_len, 
     261                                    gint start_position, 
     262                                    GRegexMatchFlags match_options, 
     263                                    GMatchInfo ** match_info, GError ** error) 
     264{ 
     265    char *string_safe, *p, *end; 
     266    gboolean ret; 
     267 
     268    if ((g_regex_get_compile_flags (regex) & G_REGEX_RAW) 
     269        || g_utf8_validate (string, string_len, NULL)) 
     270    { 
     271        return g_regex_match_full (regex, string, string_len, start_position, match_options, 
     272                                   match_info, error); 
     273    } 
     274 
     275    if (string_len < 0) 
     276    { 
     277        string_len = strlen (string); 
     278    } 
     279    p = string_safe = g_malloc (string_len); 
     280    memcpy (string_safe, string, string_len); 
     281    end = p + string_len; 
     282 
     283    while (p < end) 
     284    { 
     285        gunichar c = g_utf8_get_char_validated (p, -1); 
     286        if (c != (gunichar) (-1) && c != (gunichar) (-2)) 
     287        { 
     288            p = g_utf8_next_char (p); 
     289        } 
     290        else 
     291        { 
     292            /* U+FFFD would be the proper choice, but then we'd have to 
     293               maintain mapping between old and new offsets. 
     294               So rather do a byte by byte replacement. */ 
     295            *p++ = '\0'; 
     296        } 
     297    } 
     298 
     299    ret = 
     300        g_regex_match_full (regex, string_safe, string_len, start_position, match_options, 
     301                            match_info, error); 
     302    g_free (string_safe); 
     303    return ret; 
     304} 
     305#endif /* SEARCH_TYPE_GLIB */ 
     306 
     307/* --------------------------------------------------------------------------------------------- */ 
     308 
    252309static mc_search__found_cond_t 
    253310mc_search__regex_found_cond_one (mc_search_t * lc_mc_search, mc_search_regex_t * regex, 
    254311                                 GString * search_str) 
    mc_search__regex_found_cond_one (mc_search_t * lc_mc_search, mc_search_regex_t * 
    256313#ifdef SEARCH_TYPE_GLIB 
    257314    GError *mcerror = NULL; 
    258315 
    259     if (!g_regex_match_full (regex, search_str->str, search_str->len, 0, G_REGEX_MATCH_NEWLINE_ANY, 
    260                              &lc_mc_search->regex_match_info, &mcerror)) 
     316    if (!mc_search__g_regex_match_full_safe 
     317        (regex, search_str->str, search_str->len, 0, G_REGEX_MATCH_NEWLINE_ANY, 
     318         &lc_mc_search->regex_match_info, &mcerror)) 
    261319    { 
    262320        g_match_info_free (lc_mc_search->regex_match_info); 
    263321        lc_mc_search->regex_match_info = NULL;