xdg-toplevel: check that title provided actually is UTF-8

While the xdg-shell protocol requires this, it does not yet have a dedicated error code for invalid titles; this commit makes wlroots send a generic error instead.
2024-11-24 16:12:23 +00:00 · 2023-11-11 16:55:41 -05:00 · 2023-11-11 16:55:41 -05:00 · a3d22dbe97
commit a3d22dbe97
parent 41494244df
4 changed files with 85 additions and 0 deletions
--- a/include/util/utf8.h
+++ b/include/util/utf8.h
@ -0,0 +1,11 @@
+#ifndef UTIL_UTF8_H
+#define UTIL_UTF8_H
+
+#include <stdbool.h>
+
+/**
+ * Return true if and only if the string is a valid UTF-8 sequence.
+ */
+bool is_utf8(const char *string);
+
+#endif
--- a/types/xdg_shell/wlr_xdg_toplevel.c
+++ b/types/xdg_shell/wlr_xdg_toplevel.c
@ -5,6 +5,7 @@
 #include <wlr/util/log.h>
 #include <wlr/util/edges.h>
 #include "types/wlr_xdg_shell.h"
+#include "util/utf8.h"

 void handle_xdg_toplevel_ack_configure(
 		struct wlr_xdg_toplevel *toplevel,
@ -224,6 +225,12 @@ static void xdg_toplevel_handle_set_title(struct wl_client *client,
 		wlr_xdg_toplevel_from_resource(resource);
 	char *tmp;

+	if (!is_utf8(title)) {
+		// TODO: update when xdg_toplevel has a dedicated error code for this
+		wl_resource_post_error(resource, (uint32_t)-1, "xdg_toplevel title is not valid UTF-8");
+		return;
+	}
+
 	tmp = strdup(title);
 	if (tmp == NULL) {
 		wl_resource_post_no_memory(resource);
--- a/util/meson.build
+++ b/util/meson.build
@ -11,5 +11,6 @@ wlr_files += files(
 	'shm.c',
 	'time.c',
 	'token.c',
+	'utf8.c',
 )

--- a/util/utf8.c
+++ b/util/utf8.c
@ -0,0 +1,66 @@
+#include <stdint.h>
+#include "util/utf8.h"
+
+static bool in_range(char x, uint8_t low, uint8_t high) {
+	uint8_t v = (uint8_t)x;
+	return low <= v && v <= high;
+}
+
+bool is_utf8(const char *string) {
+	/* Returns true iff the string is 'well-formed', as defined by
+	 * Unicode Standard 15.0.0. See Chapter 3, D92 and Table 3.7.
+	 *
+	 * UTF-8 strings are sequences of code points encoded in one of the
+	 * following ways. The first byte determines the pattern.
+	 *
+	 * 00..7F
+	 * C2..DF 80..BF
+	 * E0     A0..BF 80..BF
+	 * E1..EC 80..BF 80..BF
+	 * ED     80..9F 80..BF
+	 * EE..EF 80..BF 80..BF
+	 * F0     90..BF 80..BF 80..BF
+	 * F1..F3 80..BF 80..BF 80..BF
+	 * F4     80..8F 80..BF 80..BF
+	 */
+	uint8_t range_table[9][8] = {
+		{0x00, 0x7F},
+		{0xC2, 0xDF, 0x80, 0xBF},
+		{0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF},
+		{0xE1, 0xEC, 0x80, 0xBF, 0x80, 0xBF},
+		{0xED, 0xED, 0x80, 0x9F, 0x80, 0xBF},
+		{0xEE, 0xEF, 0x80, 0xBF, 0x80, 0xBF},
+		{0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF},
+		{0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF},
+		{0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF},
+	};
+	int lengths[9] = {
+		1, 2, 3, 3, 3, 3, 4, 4, 4
+	};
+
+	while (string[0]) {
+		bool accept = false;
+		for (int i = 0; i < 9; i++) {
+			if (!in_range(string[0], range_table[i][0],
+					range_table[i][1])) {
+				continue;
+			}
+			for (int j = 1; j < lengths[i]; j++) {
+				if (!in_range(string[j], range_table[i][2 * j],
+						range_table[i][2 * j + 1])) {
+					// Early exit is necessary to avoid
+					// reading past the null terminator
+					return false;
+				}
+			}
+			string += lengths[i];
+			accept = true;
+			break;
+		}
+		if (!accept) {
+			return false;
+		}
+	}
+
+	return true;
+}