Add support for a reserved-words list and reserve the various not-supported-but-plausible IEEE datatypes.

2024-11-01 15:01:51 +00:00 · 2010-08-17 14:13:58 -07:00 · 2010-08-17 14:13:58 -07:00 · 766b91d88b
commit 766b91d88b
parent 560d7b4983
2 changed files with 108 additions and 17 deletions
--- a/doc/rust.texi
+++ b/doc/rust.texi
@ -599,6 +599,7 @@ Unicode characters.
 * Ref.Lex.Ignore::       Ignored characters.
 * Ref.Lex.Ident::        Identifier tokens.
 * Ref.Lex.Key::          Keyword tokens.
+* Ref.Lex.Res::          Reserved tokens.
 * Ref.Lex.Num::          Numeric tokens.
 * Ref.Lex.Text::         String and character tokens.
 * Ref.Lex.Syntax::       Syntactic extension tokens.
@ -636,7 +637,7 @@ token or a syntactic extension token.  Multi-line comments may be nested.
 Identifiers follow the pattern of C identifiers: they begin with a
@emph{letter} or @emph{underscore}, and continue with any combination of
@emph{letters}, @emph{decimal digits} and underscores, and must not be equal
-to any keyword. @xref{Ref.Lex.Key}.
+to any keyword or reserved token. @xref{Ref.Lex.Key}. @xref{Ref.Lex.Res}.

 A @emph{letter} is a Unicode character in the ranges U+0061-U+007A and
 U+0041-U+005A (@code{'a'}-@code{'z'} and @code{'A'}-@code{'Z'}).
@ -728,6 +729,35 @@ The keywords are:
@tab @code{be}
@end multitable

+@node       Ref.Lex.Res
+@subsection Ref.Lex.Res
+@c * Ref.Lex.Res::                Reserved tokens.
+
+The reserved tokens are:
+@cindex Reserved
+
+@sp 2
+
+@multitable @columnfractions .15 .15 .15 .15 .15
+@item @code{f16}
+@tab @code{f80}
+@tab @code{f128}
+@item @code{m32}
+@tab @code{m64}
+@tab @code{m128}
+@tab @code{dec}
+@end multitable
+
+@sp 2
+
+At present these tokens have no defined meaning in the Rust language.
+
+These tokens may correspond, in some current or future implementation,
+to additional built-in types for decimal floating-point, extended
+binary and interchange floating-point formats, as defined in the IEEE
+754-1985 and IEEE 754-2008 specifications.
+
+
@node       Ref.Lex.Num
@subsection Ref.Lex.Num
@c * Ref.Lex.Num::                 Numeric tokens.
@ -785,6 +815,10 @@ only two floating-point suffixes: @code{f32} and @code{f64}. Each of these
 gives the floating point literal the associated type, rather than
@code{float}.

+A set of suffixes are also reserved to accommodate literal support for
+types corresponding to reserved tokens. The reserved suffixes are @code{f16},
+@code{f80}, @code{f128}, @code{m}, @code{m32}, @code{m64} and @code{m128}.
+
@sp 1
 A @dfn{hex digit} is either a @emph{decimal digit} or else a character in the
 ranges U+0061-U+0066 and U+0041-U+0046 (@code{'a'}-@code{'f'},
@ -2024,7 +2058,7 @@ The signed two's complement word types @code{i8}, @code{i16}, @code{i32} and
@end ifhtml
 respectively.
@item
-The IEEE 754 single-precision and double-precision floating-point types:
+The IEEE 754-2008 @code{binary32} and @code{binary64} floating-point types:
@code{f32} and @code{f64}, respectively.
@end itemize

--- a/src/boot/fe/lexer.mll
+++ b/src/boot/fe/lexer.mll
@ -27,8 +27,12 @@
    <- (bump_line lexbuf.Lexing.lex_curr_p)
  ;;

-  let mach_suf_table = Hashtbl.create 0
+  let mach_suf_table = Hashtbl.create 10
  ;;
+
+  let reserved_suf_table = Hashtbl.create 10
+  ;;
+
  let _ =
    List.iter (fun (suf, ty) -> Common.htab_put mach_suf_table suf ty)
      [ ("u8", Common.TY_u8);
@ -43,8 +47,24 @@
        ("f64", Common.TY_f64); ]
  ;;

+  let _ =
+    List.iter (fun suf -> Common.htab_put reserved_suf_table suf ())
+      [ "f16";  (* IEEE 754-2008 'binary16' interchange format. *)
+        "f80";  (* IEEE 754-1985 'extended'   *)
+        "f128"; (* IEEE 754-2008 'binary128'  *)
+        "m32";  (* IEEE 754-2008 'decimal32'  *)
+        "m64";  (* IEEE 754-2008 'decimal64'  *)
+        "m128"; (* IEEE 754-2008 'decimal128' *)
+        "m";  (* One of m32, m64, m128.     *)
+      ]
+  ;;
+
  let keyword_table = Hashtbl.create 100
  ;;
+
+  let reserved_table = Hashtbl.create 10
+  ;;
+
  let _ =
    List.iter (fun (kwd, tok) -> Common.htab_put keyword_table kwd tok)
              [ ("mod", MOD);
@ -141,6 +161,19 @@
                ("f64", MACH TY_f64)
              ]
 ;;
+
+  let _ =
+    List.iter (fun kwd -> Common.htab_put reserved_table kwd ())
+              [ "f16";  (* IEEE 754-2008 'binary16' interchange format. *)
+                "f80";  (* IEEE 754-1985 'extended'   *)
+                "f128"; (* IEEE 754-2008 'binary128'  *)
+                "m32";  (* IEEE 754-2008 'decimal32'  *)
+                "m64";  (* IEEE 754-2008 'decimal64'  *)
+                "m128"; (* IEEE 754-2008 'decimal128' *)
+                "dec";  (* One of m32, m64, m128.     *)
+              ];
+  ;;
+
 }

 let hexdig = ['0'-'9' 'a'-'f' 'A'-'F']
@ -153,6 +186,7 @@ let flo = (dec '.' dec (exp?)) | (dec exp)

 let mach_float_suf = "f32"|"f64"
 let mach_int_suf = ['u''i']('8'|"16"|"32"|"64")
+let flo_suf = ['m''f']("16"|"32"|"64"|"80"|"128")

 let ws = [ ' ' '\t' '\r' ]

@ -218,26 +252,39 @@ rule token = parse
 | ']'                          { RBRACKET   }

 | id as i
-                               { try
-                                     Hashtbl.find keyword_table i
-                                 with
-                                     Not_found -> IDENT (i)        }
+    {
+      match Common.htab_search keyword_table i with
+          Some tok -> tok
+        | None ->
+            if Hashtbl.mem reserved_table i
+            then fail lexbuf "reserved keyword"
+            else IDENT (i)
+    }

 | (bin|hex|dec) as n           { LIT_INT (Int64.of_string n)       }
 | ((bin|hex|dec) as n) 'u'     { LIT_UINT (Int64.of_string n)      }
 | ((bin|hex|dec) as n)
-  (mach_int_suf as s)          { try
-                                   let tm =
-                                     Hashtbl.find mach_suf_table s
-                                   in
-                                     LIT_MACH_INT
-                                       (tm, Int64.of_string n)
-                                 with
-                                     Not_found ->
-                                       fail lexbuf
-                                         "bad mach-int suffix"     }
+  (mach_int_suf as s)
+  {
+    match Common.htab_search mach_suf_table s with
+        Some tm -> LIT_MACH_INT (tm, Int64.of_string n)
+      | None ->
+          if Hashtbl.mem reserved_suf_table s
+          then fail lexbuf "reserved mach-int suffix"
+          else fail lexbuf "bad mach-int suffix"
+  }

 | flo as n                     { LIT_FLOAT (float_of_string n)     }
+| flo 'm'                      { fail lexbuf "reseved mach-float suffix" }
+| (flo as n) (flo_suf as s)
+  {
+    match Common.htab_search mach_suf_table s with
+        Some tm -> LIT_MACH_FLOAT (tm, float_of_string n)
+      | None ->
+          if Hashtbl.mem reserved_suf_table s
+          then fail lexbuf "reserved mach-float suffix"
+          else fail lexbuf "bad mach-float suffix"
+  }

 | '\''                         { char lexbuf                       }
 | '"'                          { let buf = Buffer.create 32 in
@ -411,3 +458,13 @@ and comment depth = parse
                                  comment depth lexbuf           }

 | _                             { comment depth lexbuf           }
+
+
+(*
+ * Local Variables:
+ * fill-column: 78;
+ * indent-tabs-mode: nil
+ * buffer-file-coding-system: utf-8-unix
+ * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
+ * End:
+ *)