https://bugs.gentoo.org/974285 https://gstreamer.freedesktop.org/security/sa-2026-0023.html https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/11237 From a26b58bfef4e8562c5e85b57896ba5fc1f7c64a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= Date: Mon, 30 Mar 2026 20:40:26 +0300 Subject: [PATCH 1/3] subparse: Replace a `g_strv_length()` in a loop header with more efficient code It doesn't really matter in this case as the array is small. Part-of: --- a/gst/subparse/gstsubparse.c +++ b/gst/subparse/gstsubparse.c @@ -915,56 +915,53 @@ static void parse_webvtt_cue_settings (ParserState * state, const gchar * settings) { gchar **splitted_settings = g_strsplit_set (settings, " \t", -1); - gint i = 0; gint16 text_position, text_size; gint16 line_position; gboolean vertical_found = FALSE; gboolean alignment_found = FALSE; - while (i < g_strv_length (splitted_settings)) { + for (gchar ** setting_ptr = splitted_settings; *setting_ptr; setting_ptr++) { gboolean valid_tag = FALSE; - switch (splitted_settings[i][0]) { + switch ((*setting_ptr)[0]) { case 'T': - if (sscanf (splitted_settings[i], "T:%" G_GINT16_FORMAT "%%", + if (sscanf (*setting_ptr, "T:%" G_GINT16_FORMAT "%%", &text_position) > 0) { state->text_position = (guint8) text_position; valid_tag = TRUE; } break; case 'D': - if (strlen (splitted_settings[i]) > 2) { + if (strlen (*setting_ptr) > 2) { vertical_found = TRUE; g_free (state->vertical); - state->vertical = g_strdup (splitted_settings[i] + 2); + state->vertical = g_strdup (*setting_ptr + 2); valid_tag = TRUE; } break; case 'L': - if (g_str_has_suffix (splitted_settings[i], "%")) { - if (sscanf (splitted_settings[i], "L:%" G_GINT16_FORMAT "%%", + if (g_str_has_suffix (*setting_ptr, "%")) { + if (sscanf (*setting_ptr, "L:%" G_GINT16_FORMAT "%%", &line_position) > 0) { state->line_position = line_position; valid_tag = TRUE; } } else { - if (sscanf (splitted_settings[i], "L:%" G_GINT16_FORMAT, - &line_position) > 0) { + if (sscanf (*setting_ptr, "L:%" G_GINT16_FORMAT, &line_position) > 0) { state->line_number = line_position; valid_tag = TRUE; } } break; case 'S': - if (sscanf (splitted_settings[i], "S:%" G_GINT16_FORMAT "%%", - &text_size) > 0) { + if (sscanf (*setting_ptr, "S:%" G_GINT16_FORMAT "%%", &text_size) > 0) { state->text_size = (guint8) text_size; valid_tag = TRUE; } break; case 'A': - if (strlen (splitted_settings[i]) > 2) { + if (strlen (*setting_ptr) > 2) { g_free (state->alignment); - state->alignment = g_strdup (splitted_settings[i] + 2); + state->alignment = g_strdup (*setting_ptr + 2); alignment_found = TRUE; valid_tag = TRUE; } @@ -973,10 +970,8 @@ parse_webvtt_cue_settings (ParserState * state, const gchar * settings) break; } if (!valid_tag) { - GST_LOG ("Invalid or unrecognised setting found: %s", - splitted_settings[i]); + GST_LOG ("Invalid or unrecognised setting found: %s", *setting_ptr); } - i++; } g_strfreev (splitted_settings); if (!vertical_found) { -- GitLab From 447191a31806372d200922086213c687ba4f30ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= Date: Mon, 30 Mar 2026 20:39:08 +0300 Subject: [PATCH 2/3] subparse: Replace regex string matching / replacing with plain C string parsing The regexes had suboptimal behaviour that easily took 30s on a 1kB long line and probably had quadratic behaviour in the length of the string. Fixes https://gitlab.freedesktop.org/gstreamer/gstreamer/-/work_items/5002 Part-of: --- a/gst/subparse/gstsubparse.c +++ b/gst/subparse/gstsubparse.c @@ -41,7 +41,6 @@ #include "gstsubparseelements.h" #define DEFAULT_ENCODING NULL -#define ATTRIBUTE_REGEX "\\s?[a-zA-Z0-9\\. \t\\(\\)]*" static const gchar *allowed_srt_tags[] = { "i", "b", "u", NULL }; static const gchar *allowed_vtt_tags[] = { "i", "b", "c", "u", "v", "ruby", "rt", NULL }; @@ -639,50 +638,112 @@ strip_trailing_newlines (gchar * txt) * escaping everything (the text between these simple markers isn't * necessarily escaped, so it seems best to do it like this) */ static void -subrip_unescape_formatting (gchar * txt, gconstpointer allowed_tags_ptr, +subrip_unescape_formatting (gchar * txt, gchar ** allowed_tags, gboolean allows_tag_attributes) { - gchar *res; - GRegex *tag_regex; - gchar *allowed_tags_pattern, *search_pattern; - const gchar *replace_pattern; + const gchar *p; + GString *out; /* No processing needed if no escaped tag marker found in the string. */ if (strstr (txt, "<") == NULL) return; - /* Build a list of alternates for our regexp. - * FIXME: Could be built once and stored */ - allowed_tags_pattern = g_strjoinv ("|", (gchar **) allowed_tags_ptr); - /* Look for starting/ending escaped tags with optional attributes. */ - search_pattern = g_strdup_printf ("<(/)?\\ *(%s)(%s)>", - allowed_tags_pattern, ATTRIBUTE_REGEX); - /* And unescape appropriately */ - if (allows_tag_attributes) { - replace_pattern = "<\\1\\2\\3>"; - } else { - replace_pattern = "<\\1\\2>"; - } + out = g_string_new (""); + p = txt; - tag_regex = g_regex_new (search_pattern, 0, 0, NULL); - res = g_regex_replace (tag_regex, txt, strlen (txt), 0, - replace_pattern, 0, NULL); + while (*p) { + const gchar *lt; + const gchar *gt; - /* Replacing can fail. Return an empty string in that case. */ - if (!res) { - strcpy (txt, ""); - return; - } + /* Find next < */ + lt = strstr (p, "<"); + if (!lt) { + /* No more < found - copy remainder and done */ + g_string_append (out, p); + break; + } - /* res will always be shorter than the input or identical, so this - * copy is OK */ - strcpy (txt, res); + /* Copy everything before < */ + g_string_append_len (out, p, lt - p); + + /* Skip < */ + lt += 4; + + /* Find matching > */ + gt = strstr (lt, ">"); + if (!gt) { + /* No closing > - copy everything until the end as is and end */ + g_string_append (out, lt - 4); + break; + } + + /* Check for optional closing tag / */ + gboolean is_closing = FALSE; + const gchar *tag_start = lt; + if (*tag_start == '/') { + is_closing = TRUE; + tag_start++; + } + + /* Skip optional whitespace before tag name */ + while (*tag_start == ' ' || *tag_start == '\t') + tag_start++; + + /* Extract tag name */ + const gchar *tag_end = tag_start; + while (g_ascii_isalnum (*tag_end)) + tag_end++; + gsize tag_len = tag_end - tag_start; + + /* Check if tag is allowed */ + gboolean allowed = FALSE; + gchar **tag_ptr; + for (tag_ptr = allowed_tags; *tag_ptr; tag_ptr++) { + if (strlen (*tag_ptr) == tag_len && + strncmp (*tag_ptr, tag_start, tag_len) == 0) { + allowed = TRUE; + break; + } + } + + if (!allowed) { + /* Tag not allowed - copy everything between and including <...> as is */ + g_string_append_len (out, lt - 4, gt + 4 - (lt - 4)); + p = gt + 4; + continue; + } - g_free (res); - g_free (search_pattern); - g_free (allowed_tags_pattern); + /* Otherwise handle allowed tag by unescaping < */ + g_string_append_c (out, '<'); + if (is_closing) + g_string_append_c (out, '/'); + g_string_append_len (out, tag_start, tag_len); + + /* If attributes allowed then copy them over, otherwise ignore them */ + if (allows_tag_attributes) { + /* Scan for optional attributes */ + const gchar *attr_start = tag_end; + + /* Find attributes end */ + while (tag_end < gt && + (g_ascii_isalnum (*tag_end) || *tag_end == '.' || + *tag_end == ' ' || *tag_end == '\t' || + *tag_end == '(' || *tag_end == ')')) + tag_end++; + + /* Copy attributes */ + g_string_append_len (out, attr_start, tag_end - attr_start); + } - g_regex_unref (tag_regex); + /* Append closing > and skip to next */ + g_string_append_c (out, '>'); + p = gt + 4; + } + + /* out will always be shorter than the input or identical, so this + * copy is OK */ + strcpy (txt, out->str); + g_string_free (out, TRUE); } @@ -727,19 +788,11 @@ subrip_remove_unhandled_tags (gchar * txt) * input! This function adds missing closing markup tags and removes * broken closing tags for tags that have never been opened. */ static void -subrip_fix_up_markup (gchar ** p_txt, gconstpointer allowed_tags_ptr) +subrip_fix_up_markup (gchar ** p_txt, gchar ** allowed_tags) { gchar *cur, *next_tag; GPtrArray *open_tags = NULL; guint num_open_tags = 0; - const gchar *iter_tag; - guint offset = 0; - guint index; - gchar *cur_tag; - gchar *end_tag; - GRegex *tag_regex; - GMatchInfo *match_info; - gchar **allowed_tags = (gchar **) allowed_tags_ptr; g_assert (*p_txt != NULL); @@ -749,44 +802,75 @@ subrip_fix_up_markup (gchar ** p_txt, gconstpointer allowed_tags_ptr) next_tag = strchr (cur, '<'); if (next_tag == NULL) break; - offset = 0; - index = 0; - while (index < g_strv_length (allowed_tags)) { - iter_tag = allowed_tags[index]; - /* Look for a white listed tag */ - cur_tag = g_strconcat ("<", iter_tag, ATTRIBUTE_REGEX, ">", NULL); - tag_regex = g_regex_new (cur_tag, 0, 0, NULL); - (void) g_regex_match (tag_regex, next_tag, 0, &match_info); - - if (g_match_info_matches (match_info)) { - gint start_pos, end_pos; - gchar *word = g_match_info_fetch (match_info, 0); - g_match_info_fetch_pos (match_info, 0, &start_pos, &end_pos); - if (start_pos == 0) { - offset = strlen (word); + + /* Look for allowed tag */ + guint offset = 0; + gboolean is_closing = FALSE; + for (gchar ** tag_ptr = allowed_tags; *tag_ptr; tag_ptr++) { + const gchar *tag_start = next_tag + 1; + + is_closing = (*tag_start == '/'); + if (is_closing) + tag_start++; + + /* Skip optional whitespace before tag start */ + while (*tag_start == ' ' || *tag_start == '\t') + tag_start++; + + /* Extract tag name length */ + const gchar *tag_end = tag_start; + while (g_ascii_isalnum (*tag_end)) + tag_end++; + gsize tag_len = tag_end - tag_start; + + /* Check if tag name matches */ + if (strlen (*tag_ptr) == tag_len && + g_ascii_strncasecmp (*tag_ptr, tag_start, tag_len) == 0) { + /* Found allowed tag - calculate offset to position after tag */ + + /* Check for optional attributes */ + if (*tag_end == ' ' || *tag_end == '\t' || *tag_end == '.') { + while (*tag_end && *tag_end != '>' && + (g_ascii_isalnum (*tag_end) || *tag_end == '.' || + *tag_end == ' ' || *tag_end == '\t' || + *tag_end == '(' || *tag_end == ')')) { + tag_end++; + } + } + + /* Check for closing >, if not found skip over this */ + if (*tag_end == '>') { + offset = tag_end - (next_tag + 1); + + /* Full opening tag found, let's keep track of it and continue */ + if (!is_closing) { + g_ptr_array_add (open_tags, g_ascii_strdown (*tag_ptr, -1)); + ++num_open_tags; + } + break; } - g_free (word); - } - g_match_info_free (match_info); - g_regex_unref (tag_regex); - g_free (cur_tag); - index++; - if (offset) { - /* OK we found a tag, let's keep track of it */ - g_ptr_array_add (open_tags, g_ascii_strdown (iter_tag, -1)); - ++num_open_tags; - break; } + + /* No closing > found, continue */ + offset = 0; + } + + /* Not a valid tag - skip to next */ + if (offset == 0) { + ++next_tag; + cur = next_tag; + continue; } - if (offset) { + /* Not a closing tag - skip to the next */ + if (!is_closing) { next_tag += offset; cur = next_tag; continue; } if (*next_tag == '<' && *(next_tag + 1) == '/') { - end_tag = strchr (next_tag, '>'); + gchar *end_tag = strchr (next_tag, '>'); if (end_tag) { const gchar *last = NULL; if (num_open_tags > 0) @@ -810,6 +894,7 @@ subrip_fix_up_markup (gchar ** p_txt, gconstpointer allowed_tags_ptr) cur = next_tag; } + /* if there are still open tags, close them all at the end */ if (num_open_tags > 0) { GString *s; --- a/gst/subparse/gstsubparse.h +++ b/gst/subparse/gstsubparse.h @@ -48,7 +48,7 @@ typedef struct { guint8 text_size; /* percent value */ gchar *vertical; /* "", "vertical", "vertical-lr" */ gchar *alignment; /* "", "start", "middle", "end" */ - gconstpointer allowed_tags; /* list of markup tags allowed in the cue text. */ + gchar **allowed_tags; /* list of markup tags allowed in the cue text. */ gboolean allows_tag_attributes; } ParserState; -- GitLab From 9c4f69eebc86eca2b3881a9ea7a524eb70687793 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= Date: Mon, 30 Mar 2026 23:37:04 +0300 Subject: [PATCH 3/3] subparse: Fix handling of closing of multi-byte tags They were previously wrongly considered as wrong tags due to using wrong byte offsets into the string that only happened to match for single-byte tags. Add a test for this case too. Part-of: --- a/gst/subparse/gstsubparse.c +++ b/gst/subparse/gstsubparse.c @@ -869,29 +869,24 @@ subrip_fix_up_markup (gchar ** p_txt, gchar ** allowed_tags) continue; } - if (*next_tag == '<' && *(next_tag + 1) == '/') { - gchar *end_tag = strchr (next_tag, '>'); - if (end_tag) { - const gchar *last = NULL; - if (num_open_tags > 0) - last = g_ptr_array_index (open_tags, num_open_tags - 1); - if (num_open_tags == 0 - || g_ascii_strncasecmp (end_tag - 1, last, strlen (last))) { - GST_LOG ("broken input, closing tag '%s' is not open", next_tag); - /* Move everything after the tag end, including closing \0 */ - memmove (next_tag, end_tag + 1, strlen (end_tag)); - cur = next_tag; - continue; - } else { - --num_open_tags; - g_ptr_array_remove_index (open_tags, num_open_tags); - cur = end_tag + 1; - continue; - } - } + /* Otherwise a closing tag */ + gchar *tag_end = strchr (next_tag, '>'); + const gchar *last = NULL; + if (num_open_tags > 0) + last = g_ptr_array_index (open_tags, num_open_tags - 1); + /* Check if the closing tag is the last tag that was opened */ + if (num_open_tags == 0 + || g_ascii_strncasecmp (next_tag + 2, last, strlen (last)) != 0) { + GST_LOG ("broken input, closing tag '%s' is not open", next_tag); + /* Skip over the tag by moving everything after the tag end, including closing \0 */ + memmove (next_tag, tag_end + 1, strlen (tag_end + 1) + 1); + cur = next_tag; + continue; } - ++next_tag; - cur = next_tag; + + --num_open_tags; + g_ptr_array_remove_index (open_tags, num_open_tags); + cur = tag_end + 1; } /* if there are still open tags, close them all at the end */ --- a/tests/check/elements/subparse.c +++ b/tests/check/elements/subparse.c @@ -504,9 +504,25 @@ GST_START_TEST (test_webvtt) , }; + /* Tests with a wrong multi-character closing tags before the end of the line */ + SubParseInputChunk webvtt_input3[] = { + { + "1\n00:00:00,000 --> 00:00:01,000\nHello!World!\n\n", + 0 * GST_SECOND, 1 * GST_SECOND, "Hello!World!"}, + { + "1\n00:00:01,000 --> 00:00:02,000\nHello!World!\n\n", + 1 * GST_SECOND, 2 * GST_SECOND, "Hello!World!"} + , + { + "1\n00:00:02,000 --> 00:00:03,000\nWorld!Hello!\n\n", + 2 * GST_SECOND, 3 * GST_SECOND, "World!Hello!"} + , + }; + test_vtt_do_test (webvtt_input, 0, G_N_ELEMENTS (webvtt_input)); test_vtt_do_test (webvtt_input1, 0, G_N_ELEMENTS (webvtt_input1)); test_vtt_do_test (webvtt_input2, 0, G_N_ELEMENTS (webvtt_input2)); + test_vtt_do_test (webvtt_input3, 0, G_N_ELEMENTS (webvtt_input3)); } GST_END_TEST; -- GitLab