check 1 '\xc2\x20'
check 1 '\x20\xc2'
check 1 '\300\200'
+check 1 '\xed\xa0\x88\xed\xbd\x85' # UTF-16 surrogates
+check 1 '\xef\xbf\xbe' # 0xFFFE
+check 1 '\xef\xbf\xbf' # 0xFFFF
exit $failed
* ifdata: Don't assume that all interface names are 6 characters or less,
for instance "wmaster0" is longer. Increase the limit to 20 characters.
Closes: #526654 (Thanks, Alan Pope)
+ * isutf8: Reject UTF-8-encoded UTF-16 surrogates. Closes: #525301
+ (Thanks, Jakub Wilk and liw)
-- Joey Hess <joeyh@debian.org> Sat, 02 May 2009 20:40:23 -0400
return INVALID_CHAR;
u = (u << 6) | (buf[j] & 0x3f);
}
+
+ /* Conforming UTF-8 cannot contain codes 0xd800–0xdfff (UTF-16
+ surrogates) as well as 0xfffe and 0xffff. */
+ if (u >= 0xD800 && u <= 0xDFFF)
+ return INVALID_CHAR;
+ if (u == 0xFFFE || u == 0xFFFF)
+ return INVALID_CHAR;
+
return u;
}
int nbytes, nbytes2;
int c;
unsigned long code;
- unsigned long line, col, byteoff;
+ unsigned long line, col, byteoff;
nbytes = 0;
line = 1;