Commit Diff


commit - 87890bc26c1c6958bd64bb9d46fbc29ba6a92d95
commit + ea5e974da9b1047689411a00ecc0a9c1fb101d73
blob - 80cfc6f803bbf046beb7285ddb64fd7ef3169391
blob + e1dd226bd532d7adaf1e4712e8c13bf3cc29ffba
--- gotd/libexec/got-notify-http/got-notify-http.c
+++ gotd/libexec/got-notify-http/got-notify-http.c
@@ -33,6 +33,7 @@
 #include "got_version.h"
 
 #include "bufio.h"
+#include "utf8d.h"
 
 #define USERAGENT	 "got-notify-http/" GOT_VERSION_STR
 
@@ -91,39 +92,50 @@ dial(const char *host, const char *port)
 static void
 escape(FILE *fp, const uint8_t *s)
 {
-	for (; *s; ++s) {
-		/*
-		 * XXX: this is broken for UNICODE: we should leave
-		 * the multibyte characters as-is.
-		 */
+	uint32_t codepoint, state;
+	const uint8_t *start = s;
 
-		if (*s >= ' ' && *s <= '~') {
-			fputc(*s, fp);
-			continue;
-		}
-
-		switch (*s) {
-		case '"':
-		case '\\':
-			fprintf(fp, "\\%c", *s);
-			break;
-		case '\b':
-			fprintf(fp, "\\b");
-			break;
-		case '\f':
-			fprintf(fp, "\\f");
-			break;
-		case '\n':
-			fprintf(fp, "\\n");
-			break;
-		case '\r':
-			fprintf(fp, "\\r");
-			break;
-		case '\t':
-			fprintf(fp, "\\t");
+	state = 0;
+	for (; *s; ++s) {
+		switch (decode(&state, &codepoint, *s)) {
+		case UTF8_ACCEPT:
+			switch (codepoint) {
+			case '"':
+			case '\\':
+				fprintf(fp, "\\%c", *s);
+				break;
+			case '\b':
+				fprintf(fp, "\\b");
+				break;
+			case '\f':
+				fprintf(fp, "\\f");
+				break;
+			case '\n':
+				fprintf(fp, "\\n");
+				break;
+			case '\r':
+				fprintf(fp, "\\r");
+				break;
+			case '\t':
+				fprintf(fp, "\\t");
+				break;
+			default:
+				/* other control characters */
+				if (codepoint < ' ' || codepoint == 0x7F) {
+					fprintf(fp, "\\u%04x", codepoint);
+					break;
+				}
+				fwrite(start, 1, s - start + 1, fp);
+				break;
+			}
+			start = s + 1;
 			break;
-		default:
-			fprintf(fp, "\\u%04X", *s);
+
+		case UTF8_REJECT:
+			/* bad UTF-8 sequence; try to recover */
+			fputs("\\uFFFD", fp);
+			state = UTF8_ACCEPT;
+			start = s + 1;
 			break;
 		}
         }
blob - /dev/null
blob + 480e9c55ac4a6aa58414ec65e0053dd620b5b01d (mode 644)
--- /dev/null
+++ gotd/libexec/got-notify-http/utf8d.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 1
+
+static const uint8_t utf8d[] = {
+  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
+  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
+  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
+  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
+  8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
+  0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
+  0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
+  0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
+  1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
+  1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
+  1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
+};
+
+static uint32_t inline
+decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
+  uint32_t type = utf8d[byte];
+
+  *codep = (*state != UTF8_ACCEPT) ?
+    (byte & 0x3fu) | (*codep << 6) :
+    (0xff >> type) & (byte);
+
+  *state = utf8d[256 + *state*16 + type];
+  return *state;
+}
blob - 6892df7536c1a19134db4827c42f870d903c65a7
blob + 3255fa55f2841d50b5170e35303b4e7622843e04
--- regress/gotd/http_notification.sh
+++ regress/gotd/http_notification.sh
@@ -65,6 +65,75 @@ test_file_changed() {
 		"author":"$GOT_AUTHOR",
 		"date":"$d",
 		"message":"make changes\n",
+		"diffstat":{},
+		"changes":{}
+	}]}
+	.
+	,j
+	w
+	EOF
+
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	test_done "$testroot" "$ret"
+}
+
+test_bad_utf8() {
+	local testroot=`test_init bad_utf8 1`
+
+	got clone -a -q ${GOTD_TEST_REPO_URL} $testroot/repo-clone
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		echo "got clone failed unexpectedly" >&2
+		test_done "$testroot" 1
+		return 1
+	fi
+
+	got checkout -q $testroot/repo-clone $testroot/wt >/dev/null
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		echo "got checkout failed unexpectedly" >&2
+		test_done "$testroot" 1
+	fi
+
+	# invalid utf8 sequenc
+	commit_msg="make$(printf '\xED\xA0\x80')changes"
+
+	echo "changed" > $testroot/wt/alpha
+	(cd $testroot/wt && got commit -m "$commit_msg" > /dev/null)
+	local commit_id=`git_show_head $testroot/repo-clone`
+	local author_time=`git_show_author_time $testroot/repo-clone`
+
+	timeout 5 ./http-server -p $GOTD_TEST_HTTP_PORT \
+	    > $testroot/stdout &
+
+	got send -b main -q -r $testroot/repo-clone
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		echo "got send failed unexpectedly" >&2
+		test_done "$testroot" "1"
+		return 1
+	fi
+
+	wait %1 # wait for the http "server"
+
+	d=`date -u -r $author_time +"%a %b %e %X %Y UTC"`
+
+	touch "$testroot/stdout.expected"
+	ed -s "$testroot/stdout.expected" <<-EOF
+	a
+	{"notifications":[{
+		"short":false,
+		"id":"$commit_id",
+		"author":"$GOT_AUTHOR",
+		"date":"$d",
+		"message":"make\uFFFD\uFFFDchanges\n",
 		"diffstat":{},
 		"changes":{}
 	}]}
@@ -245,5 +314,6 @@ test_many_commits_summarized() {
 
 test_parseargs "$@"
 run_test test_file_changed
+run_test test_bad_utf8
 run_test test_many_commits_not_summarized
 run_test test_many_commits_summarized