Add support for a reserved-words list and reserve the various not-supported-but-plausible IEEE datatypes.

This commit is contained in:
Graydon Hoare 2010-08-17 14:13:58 -07:00
parent 560d7b4983
commit 766b91d88b
2 changed files with 108 additions and 17 deletions

View File

@ -599,6 +599,7 @@ Unicode characters.
* Ref.Lex.Ignore:: Ignored characters.
* Ref.Lex.Ident:: Identifier tokens.
* Ref.Lex.Key:: Keyword tokens.
* Ref.Lex.Res:: Reserved tokens.
* Ref.Lex.Num:: Numeric tokens.
* Ref.Lex.Text:: String and character tokens.
* Ref.Lex.Syntax:: Syntactic extension tokens.
@ -636,7 +637,7 @@ token or a syntactic extension token. Multi-line comments may be nested.
Identifiers follow the pattern of C identifiers: they begin with a
@emph{letter} or @emph{underscore}, and continue with any combination of
@emph{letters}, @emph{decimal digits} and underscores, and must not be equal
to any keyword. @xref{Ref.Lex.Key}.
to any keyword or reserved token. @xref{Ref.Lex.Key}. @xref{Ref.Lex.Res}.
A @emph{letter} is a Unicode character in the ranges U+0061-U+007A and
U+0041-U+005A (@code{'a'}-@code{'z'} and @code{'A'}-@code{'Z'}).
@ -728,6 +729,35 @@ The keywords are:
@tab @code{be}
@end multitable
@node Ref.Lex.Res
@subsection Ref.Lex.Res
@c * Ref.Lex.Res:: Reserved tokens.
The reserved tokens are:
@cindex Reserved
@sp 2
@multitable @columnfractions .15 .15 .15 .15 .15
@item @code{f16}
@tab @code{f80}
@tab @code{f128}
@item @code{m32}
@tab @code{m64}
@tab @code{m128}
@tab @code{dec}
@end multitable
@sp 2
At present these tokens have no defined meaning in the Rust language.
These tokens may correspond, in some current or future implementation,
to additional built-in types for decimal floating-point, extended
binary and interchange floating-point formats, as defined in the IEEE
754-1985 and IEEE 754-2008 specifications.
@node Ref.Lex.Num
@subsection Ref.Lex.Num
@c * Ref.Lex.Num:: Numeric tokens.
@ -785,6 +815,10 @@ only two floating-point suffixes: @code{f32} and @code{f64}. Each of these
gives the floating point literal the associated type, rather than
@code{float}.
A set of suffixes are also reserved to accommodate literal support for
types corresponding to reserved tokens. The reserved suffixes are @code{f16},
@code{f80}, @code{f128}, @code{m}, @code{m32}, @code{m64} and @code{m128}.
@sp 1
A @dfn{hex digit} is either a @emph{decimal digit} or else a character in the
ranges U+0061-U+0066 and U+0041-U+0046 (@code{'a'}-@code{'f'},
@ -2024,7 +2058,7 @@ The signed two's complement word types @code{i8}, @code{i16}, @code{i32} and
@end ifhtml
respectively.
@item
The IEEE 754 single-precision and double-precision floating-point types:
The IEEE 754-2008 @code{binary32} and @code{binary64} floating-point types:
@code{f32} and @code{f64}, respectively.
@end itemize

View File

@ -27,8 +27,12 @@
<- (bump_line lexbuf.Lexing.lex_curr_p)
;;
let mach_suf_table = Hashtbl.create 0
let mach_suf_table = Hashtbl.create 10
;;
let reserved_suf_table = Hashtbl.create 10
;;
let _ =
List.iter (fun (suf, ty) -> Common.htab_put mach_suf_table suf ty)
[ ("u8", Common.TY_u8);
@ -43,8 +47,24 @@
("f64", Common.TY_f64); ]
;;
let _ =
List.iter (fun suf -> Common.htab_put reserved_suf_table suf ())
[ "f16"; (* IEEE 754-2008 'binary16' interchange format. *)
"f80"; (* IEEE 754-1985 'extended' *)
"f128"; (* IEEE 754-2008 'binary128' *)
"m32"; (* IEEE 754-2008 'decimal32' *)
"m64"; (* IEEE 754-2008 'decimal64' *)
"m128"; (* IEEE 754-2008 'decimal128' *)
"m"; (* One of m32, m64, m128. *)
]
;;
let keyword_table = Hashtbl.create 100
;;
let reserved_table = Hashtbl.create 10
;;
let _ =
List.iter (fun (kwd, tok) -> Common.htab_put keyword_table kwd tok)
[ ("mod", MOD);
@ -141,6 +161,19 @@
("f64", MACH TY_f64)
]
;;
let _ =
List.iter (fun kwd -> Common.htab_put reserved_table kwd ())
[ "f16"; (* IEEE 754-2008 'binary16' interchange format. *)
"f80"; (* IEEE 754-1985 'extended' *)
"f128"; (* IEEE 754-2008 'binary128' *)
"m32"; (* IEEE 754-2008 'decimal32' *)
"m64"; (* IEEE 754-2008 'decimal64' *)
"m128"; (* IEEE 754-2008 'decimal128' *)
"dec"; (* One of m32, m64, m128. *)
];
;;
}
let hexdig = ['0'-'9' 'a'-'f' 'A'-'F']
@ -153,6 +186,7 @@ let flo = (dec '.' dec (exp?)) | (dec exp)
let mach_float_suf = "f32"|"f64"
let mach_int_suf = ['u''i']('8'|"16"|"32"|"64")
let flo_suf = ['m''f']("16"|"32"|"64"|"80"|"128")
let ws = [ ' ' '\t' '\r' ]
@ -218,26 +252,39 @@ rule token = parse
| ']' { RBRACKET }
| id as i
{ try
Hashtbl.find keyword_table i
with
Not_found -> IDENT (i) }
{
match Common.htab_search keyword_table i with
Some tok -> tok
| None ->
if Hashtbl.mem reserved_table i
then fail lexbuf "reserved keyword"
else IDENT (i)
}
| (bin|hex|dec) as n { LIT_INT (Int64.of_string n) }
| ((bin|hex|dec) as n) 'u' { LIT_UINT (Int64.of_string n) }
| ((bin|hex|dec) as n)
(mach_int_suf as s) { try
let tm =
Hashtbl.find mach_suf_table s
in
LIT_MACH_INT
(tm, Int64.of_string n)
with
Not_found ->
fail lexbuf
"bad mach-int suffix" }
(mach_int_suf as s)
{
match Common.htab_search mach_suf_table s with
Some tm -> LIT_MACH_INT (tm, Int64.of_string n)
| None ->
if Hashtbl.mem reserved_suf_table s
then fail lexbuf "reserved mach-int suffix"
else fail lexbuf "bad mach-int suffix"
}
| flo as n { LIT_FLOAT (float_of_string n) }
| flo 'm' { fail lexbuf "reseved mach-float suffix" }
| (flo as n) (flo_suf as s)
{
match Common.htab_search mach_suf_table s with
Some tm -> LIT_MACH_FLOAT (tm, float_of_string n)
| None ->
if Hashtbl.mem reserved_suf_table s
then fail lexbuf "reserved mach-float suffix"
else fail lexbuf "bad mach-float suffix"
}
| '\'' { char lexbuf }
| '"' { let buf = Buffer.create 32 in
@ -411,3 +458,13 @@ and comment depth = parse
comment depth lexbuf }
| _ { comment depth lexbuf }
(*
* Local Variables:
* fill-column: 78;
* indent-tabs-mode: nil
* buffer-file-coding-system: utf-8-unix
* compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
* End:
*)