make lexer unicode-aware for strings and char literals

This commit is contained in:
Marijn Haverbeke 2011-03-24 16:07:43 +01:00 committed by Graydon Hoare
parent a045514477
commit d4c3059e1e
3 changed files with 7 additions and 8 deletions

View File

@ -76,7 +76,7 @@ impure fn new_reader(io.reader rdr, str filename) -> reader
col += 1u;
}
n = rdr.read_byte() as char;
n = rdr.read_char();
}
fn mark() {
@ -204,8 +204,8 @@ impure fn new_reader(io.reader rdr, str filename) -> reader
reserved.insert("m128", ()); // IEEE 754-2008 'decimal128'
reserved.insert("dec", ()); // One of m32, m64, m128
ret reader(rdr, filename, rdr.read_byte() as char,
rdr.read_byte() as char, 1u, 0u, 1u, 0u, keywords, reserved);
ret reader(rdr, filename, rdr.read_char(),
rdr.read_char(), 1u, 0u, 1u, 0u, keywords, reserved);
}
@ -505,7 +505,7 @@ impure fn next_token(reader rdr) -> token.token {
if (is_alpha(c) || c == '_') {
while (is_alnum(c) || c == '_') {
_str.push_byte(accum_str, (c as u8));
_str.push_char(accum_str, c);
rdr.bump();
c = rdr.curr();
}
@ -692,7 +692,7 @@ impure fn next_token(reader rdr) -> token.token {
}
}
case (_) {
_str.push_byte(accum_str, rdr.curr() as u8);
_str.push_char(accum_str, rdr.curr());
}
}
rdr.bump();

View File

@ -303,9 +303,9 @@ fn to_str(token t) -> str {
ret "\"" + s + "\"";
}
case (LIT_CHAR(?c)) {
// FIXME: escape and encode.
// FIXME: escape.
auto tmp = "'";
_str.push_byte(tmp, c as u8);
_str.push_char(tmp, c);
_str.push_byte(tmp, '\'' as u8);
ret tmp;
}

View File

@ -718,7 +718,6 @@ impure fn print_maybe_parens(ps s, @ast.expr expr, int outer_prec) {
if (add_them) {pclose(s);}
}
// TODO non-ascii
fn escape_str(str st, char to_escape) -> str {
let str out = "";
auto len = _str.byte_len(st);