diff --git a/.gitignore b/.gitignore index 850bcb6a92c..c7d56e16dae 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ *~ +*.x86 +*.llvm *.out *.exe *.orig diff --git a/AUTHORS.txt b/AUTHORS.txt index 16686027022..82e593cae42 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -1,7 +1,12 @@ -Rust authors: +Initial author, project lead, target of blame: Graydon Hoare + +Other authors: + Andreas Gal +Brendan Eich Dave Herman +Michael Bebenita Patrick Walton -Brendan Eich +Roy Frostig diff --git a/LICENSE.txt b/LICENSE.txt index efe7e76ec64..9cab1f89f2c 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -53,7 +53,8 @@ The following third party packages are included: All rights reserved. Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: + modification, are permitted provided that the following conditions are + met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. @@ -71,9 +72,10 @@ The following third party packages are included: SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -* Two header files that are part of the Valgrind package. These files are found - at src/rt/valgrind.h and src/rt/memcheck.h, within this distribution. These - files are redistributed under the following terms, as noted in them: +* Two header files that are part of the Valgrind package. These files are + found at src/rt/valgrind.h and src/rt/memcheck.h, within this + distribution. These files are redistributed under the following terms, as + noted in them: for src/rt/valgrind.h: @@ -158,20 +160,20 @@ well as the collective work itslf, is distributed under the following terms: Copyright (c) 2006-2010 Graydon Hoare Copyright (c) 2009-2010 Mozilla Foundation - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. diff --git a/doc/Makefile b/doc/Makefile index 4ac419c2e18..081a723b9d6 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -5,7 +5,8 @@ all: rust.pdf rust.html texi2pdf $< %.html: %.texi - makeinfo --html --force --no-split --output=$@ $< + makeinfo --html --ifhtml --force --no-split --output=$@ $< clean: - rm -f rust.aux rust.cp rust.fn rust.ky rust.log rust.pdf rust.html rust.pg rust.toc rust.tp rust.vr \ No newline at end of file + rm -f rust.aux rust.cp rust.fn rust.ky rust.log rust.pdf \ + rust.html rust.pg rust.toc rust.tp rust.vr \ No newline at end of file diff --git a/doc/rust.texi b/doc/rust.texi new file mode 100644 index 00000000000..a53520611eb --- /dev/null +++ b/doc/rust.texi @@ -0,0 +1,3244 @@ +\input texinfo @c -*-texinfo-*- +@c %**start of header +@setfilename rust.info +@settitle Rust Documentation +@setchapternewpage odd +@c %**end of header + +@syncodeindex fn cp + +@ifinfo +This manual is for the ``Rust'' programming language. + +Copyright 2006-2010 Graydon Hoare + +Copyright 2009-2010 Mozilla Foundation + +All rights reserved (for the time being). +@end ifinfo + +@dircategory Programming +@direntry +* rust: (rust). Rust programming language +@end direntry + +@titlepage +@title Rust +@subtitle A safe, concurrent, practical language. +@author Graydon Hoare +@author Mozilla Foundation + +@page +@vskip 0pt plus 1filll +Copyright @copyright{} 2006-2010 Graydon Hoare + +Copyright @copyright{} 2009-2010 Mozilla Foundation + +See accompanying LICENSE.txt for terms. +@end titlepage + +@ifnottex +@node Top +@top Top + +Rust Documentation + +@end ifnottex + +@menu +* Disclaimer:: Notes on a work in progress. +* Introduction:: Background, intentions, lineage. +* Tutorial:: Gentle introduction to reading Rust code. +* Reference:: Systematic reference of language elements. +* Index:: Index +@end menu + +@ifnottex +Complete table of contents +@end ifnottex + +@contents + +@c ############################################################ +@c Disclaimer +@c ############################################################ + +@node Disclaimer +@chapter Disclaimer + +To the reader, + +Rust is a work in progress. The language continues to evolve as the design +shifts and is fleshed out in working code. Certain parts work, certain parts +do not, certain parts will be removed or changed. + +This manual is a snapshot written in the present tense. Some features +described do not yet exist in working code. Some may be temporary. It +is a @emph{draft}, and we ask that you not take anything you read here +as either definitive or final. The manual is to help you get a sense +of the language and its organization, not to serve as a complete +specification. At least not yet. + +If you have suggestions to make, please try to focus them on @emph{reductions} +to the language: possible features that can be combined or omitted. At this +point, every ``additive'' feature we're likely to support is already on the +table. The task ahead involves combining, trimming, and implementing. + + +@c ############################################################ +@c Introduction +@c ############################################################ + +@node Introduction +@chapter Introduction + +@quotation + We have to fight chaos, and the most effective way of doing that is + to prevent its emergence. +@flushright + - Edsger Dijkstra +@end flushright +@end quotation +@sp 2 + +Rust is a curly-brace, block-structured statement language. It visually +resembles the C language family, but differs significantly in syntactic and +semantic details. Its design is oriented toward concerns of ``programming in +the large'', that is, of creating and maintaining @emph{boundaries} -- both +abstract and operational -- that preserve large-system @emph{integrity}, +@emph{availability} and @emph{concurrency}. + +It supports a mixture of imperative procedural, concurrent actor, object +oriented and pure functional styles. Rust also supports generic programming +and metaprogramming, in both static and dynamic styles. + +@menu +* Goals:: Intentions, motivations. +* Sales Pitch:: A summary for the impatient. +* Influences:: Relationship to past languages. +@end menu + + +@node Goals +@section Goals + +The language design pursues the following goals: + +@sp 1 +@itemize +@item Compile-time error detection and prevention. +@item Run-time fault tolerance and containment. +@item System building, analysis and maintenance affordances. +@item Clarity and precision of expression. +@item Implementation simplicity. +@item Run-time efficiency. +@item High concurrency. +@end itemize +@sp 1 + +Note that most of these goals are @emph{engineering} goals, not showcases for +sophisticated language technology. Most of the technology in Rust is +@emph{old} and has been seen decades earlier in other languages. + +All new languages are developed in a technological context. Rust's goals arise +from the context of writing large programs that interact with the internet -- +both servers and clients -- and are thus much more concerned with +@emph{safety} and @emph{concurrency} than older generations of program. Our +experience is that these two forces do not conflict; rather they drive system +design decisions toward extensive use of @emph{partitioning} and +@emph{statelessness}. Rust aims to make these a more natural part of writing +programs, within the niche of lower-level, practical, resource-conscious +languages. + + +@page +@node Sales Pitch +@section Sales Pitch + +The following comprises a brief ``sales pitch'' overview of the salient +features of Rust, relative to other languages. + +@itemize + +@sp 1 +@item No @code{null} pointers + +The initialization state of every slot is statically computed as part of the +typestate system (see below), and requires that all slots are initialized +before use. There is no @code{null} value; uninitialized slots are +uninitialized, and can only be written to, not read. + +The common use for @code{null} in other languages -- as a sentinel value -- is +subsumed into the more general facility of disjoint union types. A program +must explicitly model its use of such types. + +@sp 1 +@item Lightweight tasks with no shared mutable state + +Like many @emph{actor} languages, Rust provides an isolation (and concurrency) +model based on lightweight tasks scheduled by the language runtime. These +tasks are very inexpensive and statically unable to mutate one another's local +memory. Breaking the rule of task isolation is only possible by calling +external (C/C++) code. + +Inter-task communication is typed, asynchronous and simplex, based on passing +messages over channels to ports. Transmission can be rate-limited or +rate-unlimited. Selection between multiple senders is pseudo-randomized on the +receiver side. + +@sp 1 +@item Predictable native code, simple runtime + +The meaning and cost of every operation within a Rust program is intended to +be easy to model for the reader. The code should not ``surprise'' the +programmer once it has been compiled. + +Rust compiles to native code. Rust compilation units are large and the +compilation model is designed around multi-file, whole-library or +whole-program optimization. The compiled units are standard loadable objects +(ELF, PE, Mach-O) containing standard metadata (DWARF) and are compatible with +existing, standard low-level tools (disassemblers, debuggers, profilers, +dynamic loaders). + +The Rust runtime library is a small collection of support code for scheduling, +memory management, inter-task communication, reflection and runtime +linkage. This library is written in standard C++ and is quite +straightforward. It presents a simple interface to embeddings. No +research-level virtual machine, JIT or garbage collection technology is +required. It should be relatively easy to adapt a Rust front-end on to many +existing native toolchains. + +@sp 1 +@item Integrated system-construction facility + +The units of compilation of Rust are multi-file amalgamations called +@emph{crates}. A crate is described by a separate, declarative type of source +file that guides the compilation of the crate, its packaging, its versioning, +and its external dependencies. Crates are also the units of distribution and +loading. Significantly: the dependency graph of crates is @emph{acyclic} and +@emph{anonymous}: there is no global namespace for crates, and module-level +recursion cannot cross crate barriers. + +Unlike many languages, individual modules do @emph{not} carry all the +mechanisms or restrictions of crates. Modules and crates serve different +roles. + +@sp 1 +@item Stack-based iterators + +Rust provides a type of function-like multiple-invocation iterator that is +very efficient: the iterator state lives only on the stack and is tightly +coupled to the loop that invoked it. + +@sp 1 +@item Direct interface to C code + +Rust can load and call many C library functions simply by declaring +them. Calling a C function statically marks a function as ``unsafe'', unless +the task calling the unsafe function is further isolated within an external +``heavyweight'' operating-system subprocess. Every ``unsafe'' function or +module in a Rust compilation unit must be explicitly authorized in the crate +file. + +@sp 1 +@item Structural algebraic data types + +The Rust type system is structural rather than nominal, and contains the +standard assortment of useful ``algebraic'' type constructors from functional +languages, such as function types, tuples, record types, vectors, and tagged +disjoint unions. Structural types may be @emph{pattern-matched} in an +@code{alt} statement. + +@sp 1 +@item Generic code + +Rust supports a simple form of parametric polymorphism: functions, iterators, +types and objects can be parametrized by other types. + +@sp 1 +@item Argument binding + +Rust provides a mechanism of partially binding arguments to functions, +producing new functions that accept the remaining un-bound arguments. This +mechanism combines some of the features of lexical closures with some of the +features of currying, in a smaller and simpler package. + +@sp 1 +@item Local type inference + +To save some quantity of programmer key-pressing, Rust supports local type +inference: signatures of functions, objects and iterators always require type +annotation, but within the body of a function or iterator many slots can be +declared @code{auto} and Rust will infer the slot's type from its uses. + +@sp 1 +@item Structural object system + +Rust has a lightweight object system based on structural object types: there +is no ``class hierarchy'' nor any concept of inheritance. Method overriding +and object restriction are performed explicitly on object values, which are +little more than order-insensitive records of methods sharing a common private +value. Objects can be mutable or immutable, and immutable objects can have +destructors. + +@sp 1 +@item Dynamic type + +Rust includes support for slots of a top type, @code{any}, that can hold any +type of value whatsoever. An @code{any} slot is a pair of a type code and an +exterior value of that type. Injection into an @code{any} and projection by +type-case-selection is integrated into the language. + +@sp 1 +@item Dynamic metaprogramming (reflection) + +Rust supports run-time reflection on the structure of a crate, using a +combination of custom descriptor structures and the DWARF metadata tables used +to support crate linkage and other runtime services. + +@sp 1 +@item Static metaprogramming (syntactic extension) + +Rust supports a system for syntactic extensions that can be loaded into the +compiler, to implement user-defined notations, macros, program-generators and +the like. These notations are @emph{marked} using a special form of +bracketing, such that a reader unfamiliar with the extension can still parse +the surrounding text by skipping over the bracketed ``extension text''. + +@sp 1 +@item Idempotent failure + +If a task fails due to a signal, or if it executes the special @code{fail} +statement, it enters the @emph{failing} state. A failing task unwinds its +control stack, frees all of its owned resources (executing destructors) and +enters the @emph{dead} state. Failure is idempotent and non-recoverable. + +@sp 1 +@item Signal handling + +Rust has a system for propagating task-failures and other spontaneous +events between tasks. Some signals can be trapped and redirected to +channels; other signals are fatal and result in task-failure. Tasks +can designate other tasks to handle signals for them. This permits +organizing tasks into mutually-supervising or mutually-failing groups. + +@sp 1 +@item Deterministic destruction + +Immutable objects can have destructor functions, which are executed +deterministically in top-down ownership order, as control frames are exited +and/or objects are otherwise freed from data structures holding them. The same +destructors are run in the same order whether the object is deleted by +unwinding during failure or normal execution. + +Similarly, the rules for freeing immutable memory are deterministic and +predictable: on scope-exit or structure-release, interior slots are released +immediately, exterior slots have their reference count decreased and are +released if the count drops to zero. Alias slots are not affected by scope +exit. + +Mutable memory is local to a task, and is subject to per-task garbage +collection. As a result, unreferenced mutable memory is not necessarily freed +immediately; if it is acyclic it is freed when the last reference to it drops, +but if it is part of a reference cycle it will be freed when the GC collects +it (or when the owning task terminates, at the latest). + +Mutable memory can point to immutable memory but not vice-versa. Doing so +merely delays (to an undefined future time) the moment when the deterministic, +top-down destruction sequence for the referenced immutable memory +@emph{starts}. In other words, the immutable ``leaves'' of a mutable structure +are released in a locally-predictable order, even if the ``interior'' of the +mutable structure is released in an unpredictable order. + +@sp 1 +@item Typestate system + +Every storage slot in Rust participates in not only a conventional structural +static type system, describing the interpretation of memory in the slot, but +also a @emph{typestate} system. The static typestates of a program describe +the set of @emph{pure, dynamic predicates} that provably hold over some set of +slots, at each point in the program's control flow graph. The static +calculation of the typestates of a program is a dataflow problem, and handles +user-defined predicates in a similar fashion to the way the type system +permits user-defined types. + +A short way of thinking of this is: types statically model the kinds of values +held in slots, typestates statically model @emph{assertions that hold} before +and after statements. + +@sp 1 +@item Static control over memory allocation, packing and aliasing. + +Every variable or field in Rust is a combination of a type, a mutability flag +and a @emph{mode}; this combination is called a @emph{slot}. There are 3 kinds +of @dfn{slot mode}, denoting 3 ways of referring to a value: + +@itemize +@item ``interior'' (slot contains value) +@item ``exterior'', (slot points to to managed heap allocation) +@item ``alias'', (slot points directly to provably-live address) +@end itemize + +Interior slots declared as variables in a function are allocated very quickly +on the stack, as part of a local activation frame, as in C or C++. Alias slots +permit efficient by-reference parameter passing without adjusting heap +reference counts or interacting with garbage collection, as alias lifetimes +are statically guaranteed to outlive callee lifetimes. + +Copying data between slots of different modes may cause either a simple +address assignment or reference-count adjustment, or may cause a value to be +``transplanted'': copied by value from the interior of one memory structure to +another, or between stack and heap. Transplanting, when necessary, is +predictable and automatic, as part of the definition of the copy operator +(@code{=}). + +In addition, slots have a static initialization state that is calculated by +the typestate system. This permits late initialization of variables in +functions with complex control-flow, while still guaranteeing that every use +of a slot occurs after it has been initialized. + +@sp 1 +@item Static control over mutability. + +Slots in Rust are classified as either immutable or mutable. By default, +all slots are immutable. + +If a slot within a type is declared as @code{mutable}, the type is a +@code{state} type and must be declared as such. + +This classification of data types in Rust interacts with the memory allocation +and transmission rules. In particular: + +@itemize +@item Only immutable (non-state) values can be sent over channels. +@item Only immutable (non-state) objects can have destructor functions. +@end itemize + +State values are subject to local (per-task) garbage-collection. Garbage +collection costs are therefore also task-local and do not interrupt or suspend +other tasks. + +Immutable values are reference-counted and have a deterministic destruction +order: top-down, immediately upon release of the last live reference. + +State values can refer to immutable values, but not vice-versa. Rust therefore +encourages the programmer to write in a style that consists primarily of +immutable types, but also permits limited, local (per-task) mutability. + +@end itemize + + +@page +@node Influences +@section Influences +@sp 2 + +@quotation + The essential problem that must be solved in making a fault-tolerant + software system is therefore that of fault-isolation. Different programmers + will write different modules, some modules will be correct, others will have + errors. We do not want the errors in one module to adversely affect the + behaviour of a module which does not have any errors. + +@flushright + - Joe Armstrong +@end flushright +@end quotation +@sp 2 + +@quotation + In our approach, all data is private to some process, and processes can + only communicate through communications channels. @emph{Security}, as used + in this paper, is the property which guarantees that processes in a system + cannot affect each other except by explicit communication. + + When security is absent, nothing which can be proven about a single module + in isolation can be guaranteed to hold when that module is embedded in a + system [...] +@flushright + - Robert Strom and Shaula Yemini +@end flushright +@end quotation +@sp 2 + +@quotation + Concurrent and applicative programming complement each other. The + ability to send messages on channels provides I/O without side effects, + while the avoidance of shared data helps keep concurrent processes from + colliding. +@flushright + - Rob Pike +@end flushright +@end quotation +@sp 2 + +@page +Rust is not a particularly original language. It may however appear unusual by +contemporary standards, as its design elements are drawn from a number of +``historical'' languages that have, with a few exceptions, fallen out of +favour. Five prominent lineages contribute the most: + +@itemize +@sp 1 +@item +The NIL (1981) and Hermes (1990) family. These languages were developed by +Robert Strom, Shaula Yemini, David Bacon and others in their group at IBM +Watson Research Center (Yorktown Heights, NY, USA). + +@sp 1 +@item +The Erlang (1987) language, developed by Joe Armstrong, Robert Virding, Claes +Wikstr@"om, Mike Williams and others in their group at the Ericsson Computer +Science Laboratory (@"Alvsj@"o, Stockholm, Sweden) . + +@sp 1 +@item +The Sather (1990) language, developed by Stephen Omohundro, Chu-Cheow Lim, +Heinz Schmidt and others in their group at The International Computer Science +Institute of the University of California, Berkeley (Berkeley, CA, USA). + +@sp 1 +@item +The Newsqueak (1988), Alef (1995), and Limbo (1996) family. These languages +were developed by Rob Pike, Phil Winterbottom, Sean Dorward and others in +their group at Bell labs Computing Sciences Reserch Center (Murray Hill, NJ, +USA). + +@sp 1 +@item +The Napier (1985) and Napier88 (1988) family. These languages were developed +by Malcolm Atkinson, Ron Morrison and others in their group at the University +of St. Andrews (St. Andrews, Fife, UK). +@end itemize + +@sp 1 +Additional specific influences can be seen from the following languages: +@itemize +@item The structural algebraic types and compilation manager of SML. +@item The syntax-extension systems of Camlp4 and the Common Lisp readtable. +@item The deterministic destructor system of C++. +@end itemize + +@c ############################################################ +@c Tutorial +@c ############################################################ + +@node Tutorial +@chapter Tutorial + +@emph{TODO}. + +@c ############################################################ +@c Reference +@c ############################################################ + +@node Reference +@chapter Reference + +@menu +* Ref.Lex:: Lexical structure. +* Ref.Path:: References to slots and items. +* Ref.Gram:: Grammar. +* Ref.Comp:: Compilation and component model. +* Ref.Mem:: Semantic model of memory. +* Ref.Task:: Semantic model of tasks. +* Ref.Item:: The components of a module. +* Ref.Type:: The types of values held in memory. +* Ref.Expr:: Parsed and primitive expressions. +* Ref.Stmt:: Executable statements. +* Ref.Run:: Organization of runtime services. +@end menu + +@page +@node Ref.Lex +@section Ref.Lex +@c * Ref.Lex:: Lexical structure. + +The lexical structure of a Rust source file or crate file is defined in terms +of Unicode character codes and character properties. + +Groups of Unicode character codes and characters are organized into +@emph{tokens}. Tokens are defined as the longest contiguous sequence of +characters within the same token type (identifier, keyword, literal, symbol), +or interrupted by ignored characters. + +Most tokens in Rust follow rules similar to the C family. + +Most tokens (including identifiers, whitespace, keywords, operators and +structural symbols) are drawn from the ASCII-compatible range of +Unicode. String and character literals, however, may include the full range of +Unicode characters. + +@emph{TODO: formalize this section much more}. + +@menu +* Ref.Lex.Ignore:: Ignored characters. +* Ref.Lex.Ident:: Identifier tokens. +* Ref.Lex.Key:: Keyword tokens. +* Ref.Lex.Num:: Numeric tokens. +* Ref.Lex.Text:: String and character tokens. +* Ref.Lex.Syntax:: Syntactic extension tokens. +* Ref.Lex.Sym:: Special symbol tokens. +@end menu + +@page +@node Ref.Lex.Ignore +@subsection Ref.Lex.Ignore +@c * Ref.Lex.Ignore:: Ignored tokens. + +The classes of @emph{whitespace} and @emph{comment} is ignored, and are not +considered as tokens. + +@dfn{Whitespace} is any of the following Unicode characters: U+0020 (space), +U+0009 (tab, @code{'\t'}), U+000A (LF, @code{'\n'}), U+000D (CR, @code{'\r'}). + +@dfn{Comments} are any sequence of Unicode characters beginning with U+002F +U+002F (@code{//}) and extending to the next U+000a character, +@emph{excluding} cases in which such a sequence occurs within a string literal +token or a syntactic extension token. + + +@page +@node Ref.Lex.Ident +@subsection Ref.Lex.Ident +@c * Ref.Lex.Ident:: Identifier tokens. + +Identifiers follow the pattern of C identifiers: they begin with a +@emph{letter} or underscore character @code{_} (Unicode character U+005f), and +continue with any combination of @emph{letters}, @emph{digits} and +underscores, and must not be equal to any keyword. @xref{Ref.Lex.Key}. + +A @emph{letter} is a Unicode character in the ranges U+0061-U+007A and +U+0041-U+005A (@code{a-z} and @code{A-Z}). + +A @emph{digit} is a Unicode character in the range U+0030-U0039 (@code{0-9}). + +@page +@node Ref.Lex.Key +@subsection Ref.Lex.Key +@c * Ref.Lex.Key:: Keyword tokens. + +The keywords are: + +@sp 2 + +@multitable @columnfractions .15 .15 .15 .15 .15 +@item @code{use} +@tab @code{meta} +@tab @code{syntax} +@tab @code{mutable} +@tab @code{native} +@item @code{mod} +@tab @code{import} +@tab @code{export} +@tab @code{let} +@tab @code{auto} +@item @code{io} +@tab @code{state} +@tab @code{unsafe} +@tab @code{auth} +@tab @code{with} +@item @code{bind} +@tab @code{type} +@tab @code{true} +@tab @code{false} +@item @code{any} +@tab @code{int} +@tab @code{uint} +@tab @code{char} +@tab @code{bool} +@item @code{u8} +@tab @code{u16} +@tab @code{u32} +@tab @code{u64} +@tab @code{f32} +@item @code{i8} +@tab @code{i16} +@tab @code{i32} +@tab @code{i64} +@tab @code{f64} +@item @code{rec} +@tab @code{tup} +@tab @code{tag} +@tab @code{vec} +@tab @code{str} +@item @code{fn} +@tab @code{iter} +@tab @code{obj} +@tab @code{as} +@tab @code{drop} +@item @code{task} +@tab @code{port} +@tab @code{chan} +@tab @code{flush} +@tab @code{spawn} +@item @code{if} +@tab @code{else} +@tab @code{alt} +@tab @code{case} +@tab @code{in} +@item @code{do} +@tab @code{while} +@tab @code{break} +@tab @code{cont} +@tab @code{fail} +@item @code{log} +@tab @code{note} +@tab @code{claim} +@tab @code{check} +@tab @code{prove} +@item @code{for} +@tab @code{each} +@tab @code{ret} +@tab @code{put} +@tab @code{be} +@end multitable + +@page +@node Ref.Lex.Num +@subsection Ref.Lex.Num +@c * Ref.Lex.Num:: Numeric tokens. + +@emph{TODO: describe numeric literals}. + +@page +@node Ref.Lex.Text +@subsection Ref.Lex.Text +@c * Ref.Lex.Key:: String and character tokens. + +@emph{TODO: describe string and character literals}. + +@page +@node Ref.Lex.Syntax +@subsection Ref.Lex.Syntax +@c * Ref.Lex.Syntax:: Syntactic extension tokens. + +Syntactic extensions are marked with the @emph{pound} sigil @code{#} (U+0023), +followed by a qualified name of a compile-time imported module item, an +optional parenthesized list of @emph{tokens}, and an optional brace-enclosed +region of free-form text (with brace-matching and brace-escaping used to +determine the limit of the region). @xref{Ref.Comp.Syntax}. + +@emph{TODO: formalize those terms more}. + +@page +@node Ref.Lex.Sym +@subsection Ref.Lex.Sym +@c * Ref.Lex.Sym:: Special symbol tokens. + +The special symbols are: + +@sp 2 + +@multitable @columnfractions .1 .1 .1 .1 .1 .1 + +@item @code{@@} +@tab @code{_} +@item @code{#} +@tab @code{:} +@tab @code{.} +@tab @code{;} +@tab @code{,} +@item @code{[} +@tab @code{]} +@tab @code{@{} +@tab @code{@}} +@tab @code{(} +@tab @code{)} +@item @code{=} +@tab @code{<-} +@tab @code{<|} +@tab @code{<+} +@tab @code{->} +@item @code{+} +@tab @code{++} +@tab @code{+=} +@tab @code{-} +@tab @code{--} +@tab @code{-=} +@item @code{*} +@tab @code{/} +@tab @code{%} +@tab @code{*=} +@tab @code{/=} +@tab @code{%=} +@item @code{&} +@tab @code{|} +@tab @code{!} +@tab @code{~} +@tab @code{^} +@item @code{&=} +@tab @code{|=} +@tab @code{^=} +@tab @code{!=} +@item @code{>>} +@tab @code{>>>} +@tab @code{<<} +@tab @code{<<=} +@tab @code{>>=} +@tab @code{>>>=} +@item @code{<} +@tab @code{<=} +@tab @code{==} +@tab @code{>=} +@tab @code{>} +@item @code{&&} +@tab @code{||} +@end multitable + +@page +@page +@node Ref.Path +@section Ref.Path +@c * Ref.Path:: References to slots and items. + +A @dfn{path} is a ubiquitous syntactic form in Rust that deserves special +attention. A path denotes a slot or an +item. @xref{Ref.Mem.Slot}. @xref{Ref.Item}. Every slot and item in a Rust +crate has a @emph{canonical path} that refers to it from the crate top-level, +as well as a number of shorter @emph{relative paths} that may also denote it +in inner scopes of the crate. There is no way to define a slot or item without +a canonical path within its crate (with the exception of the crate's implicit +top-level module). Paths have meaning only within a specific +crate. @xref{Ref.Comp.Crate}. + +Paths consist of period-separated components. In the simplest form, path +components are identifiers. @xref{Ref.Lex.Ident}. + +Two examples of simple paths consisting of only identifier components: +@example +x; +x.y.z; +@end example + +Paths fall into two important categories: @emph{names} and +@emph{lvals}. + +A @dfn{name} denotes an item, and is statically resolved to its +referent at compile time. + +An @dfn{lval} denotes a slot, and is statically resolved to a sequence of +memory operations and primitive (arithmetic) expressions required to load or +store to the slot at compile time. + +In some contexts, the Rust grammar accepts a general @emph{path}, but a +subsequent syntactic restriction requires the path to be an lval or a name. In +other words: in some contexts an lval is required (for example, on the left +hand side of the copy operator, @pxref{Ref.Stmt.Copy}) and in other contexts a +name is required (for example, as a type parameter, @pxref{Ref.Item}). In no +case is the grammar made ambiguous by accepting a general path and restricting +allowed paths to names or lvals after parsing. These restrictions are noted in +the grammar. @xref{Ref.Gram}. + +A name component may include type parameters. Type parameters are denoted by +square brackets. Square brackets are used @emph{only} to denote type +parameters in Rust. If a name component includes a type parameter, the type +parameter must also resolve statically to a type in the environment of the +name. Type parameters are only part of the names of items. @xref{Ref.Item}. + +An example of a name with type parameters: +@example +m.map[int,str]; +@end example + +An lval component may include an indexing operator. Index operators are +enclosed in parentheses and can include any integral expression. Indexing +operators can only be applied to vectors or strings, and imply a run-time +bounds-check. @xref{Ref.Type.Vec}. + +An example of an lval with a dynamic indexing operator: +@example +x.y.(1 + v).z; +@end example + +@page +@node Ref.Gram +@section Ref.Gram +@c * Ref.Gram:: Grammar. + +@emph{TODO: LL(1), it reads like C, Alef and bits of Napier; formalize here}. + +@page +@node Ref.Comp +@section Ref.Comp +@c * Ref.Comp:: Compilation and component model. + +Rust is a @emph{compiled} language. Its semantics are divided along a +@emph{phase distinction} between compile-time and run-time. Those semantic +rules that have a @emph{static interpretation} govern the success or failure +of compilation. A program that fails to compile due to violation of a +compile-time rule has no defined semantics at run-time; the compiler should +halt with an error report, and produce no executable artifact. + +The compilation model centres on artifacts called @emph{crates}. Each +compilation is directed towards a single crate in source form, and if +successful produces a single crate in executable form. + +@menu +* Ref.Comp.Crate:: Units of compilation and linking. +* Ref.Comp.Meta:: Metadata about a crate. +* Ref.Comp.Syntax:: Syntax extensions. +@end menu + +@page +@node Ref.Comp.Crate +@subsection Ref.Comp.Crate +@c * Ref.Comp.Crate:: Units of compilation and linking. + +A @dfn{crate} is a unit of compilation and linking, as well as versioning, +distribution and runtime loading. Crates are defined by @emph{crate source +files}, which are a type of source file written in a special declarative +language: @emph{crate language}.@footnote{A crate is somewhat analogous to an +@emph{assembly} in the ECMA-335 CLI model, a @emph{library} in the SML/NJ +Compilation Manager, a @emph{unit} in the Owens and Flatt module system, or a +@emph{configuration} in Mesa.} A crate source file describes: + +@itemize +@item Metadata about the crate, such as author, name, version, and copyright. +@item The source-file and directory modules that make up the crate. +@item The set of syntax extensions to enable for the crate. +@item Any external crates or native modules that the crate imports to its top level. +@item The organization of the crate's internal namespace. +@item The set of names exported from the crate. +@end itemize + +A single crate source file may describe the compilation of a large number of +Rust source files; it is compiled in its entirety, as a single indivisible +unit. The compilation phase attempts to transform a single crate source file, +and its referenced contents, into a single compiled crate. Crate source files +and compiled crates have a 1:1 relationship. + +The syntactic form of a crate is a sequence of @emph{directives}, some of +which have nested sub-directives. + +A crate defines an implicit top-level anonymous module: within this module, +all members of the crate have canonical path names. @xref{Ref.Path}. The +@code{mod} directives within a crate file specify sub-modules to include in +the crate: these are either directory modules, corresponding to directories in +the filesystem of the compilation environment, or file modules, corresponding +to Rust source files. The names given to such modules in @code{mod} directives +become prefixes of the paths of items and slots defined within any included +Rust source files. + +The @code{use} directives within the crate specify @emph{other crates} to scan +for, locate, import into the crate's module namespace during compilation, and +link against at runtime. Use directives may also occur independently in rust +source files. These directives may specify loose or tight ``matching +criteria'' for imported crates, depending on the preferences of the crate +developer. In the simplest case, a @code{use} directive may only specify a +symbolic name and leave the task of locating and binding an appropriate crate +to a compile-time heuristic. In a more controlled case, a @code{use} directive +may specify any metadata as matching criteria, such as a URI, an author name +or version number, a checksum or even a cryptographic signature, in order to +select an an appropriate imported crate. @xref{Ref.Comp.Meta}. + +The compiled form of a crate is a loadable and executable object file full of +machine code, in a standard loadable operating-system format such as ELF, PE +or Mach-O. The loadable object contains extensive DWARF metadata, describing: +@itemize +@item Metadata required for type reflection. +@item The publicly exported module structure of the crate. +@item Any metadata about the crate, defined by @code{meta} directives. +@item The crates to dynamically link with at run-time, with matching criteria +derived from the same @code{use} directives that guided compile-time imports. +@end itemize + +The @code{syntax} directives of a crate are similar to the @code{use} +directives, except they govern the syntax extension namespace (accessed +through the syntax-extension sigil @code{#}, @pxref{Ref.Comp.Syntax}) +available only at compile time. A @code{syntax} directive also makes its +extension available to all subsequent directives in the crate file. + +An example of a crate: + +@example +// Metadata about this crate +meta (author = "Jane Doe", + name = "projx" + desc = "Project X", + ver = "2.5"); + +// Import a module. +use std (ver = "1.0"); + +// Activate a syntax-extension. +syntax re; + +// Define some modules. +mod foo = "foo.rs"; +mod bar @{ + mod quux = "quux.rs"; +@} +@end example + +@page +@node Ref.Comp.Meta +@subsection Ref.Comp.Meta + +In a crate, a @code{meta} directive associates free form key-value metadata +with the crate. This metadata can, in turn, be used in providing partial +matching parameters to syntax-extension loading and crate importing +directives, denoted by @code{syntax} and @code{use} keywords respectively. + +Alternatively, metadata can serve as a simple form of documentation. + +@page +@node Ref.Comp.Syntax +@subsection Ref.Comp.Syntax +@c * Ref.Comp.Syntax:: Syntax extension. + +Rust provides a notation for @dfn{syntax extension}. The notation is a marked +syntactic form that can appear as an expression, statement or item in the body +of a Rust program, or as a directive in a Rust crate, and which causes the +text enclosed within the marked form to be translated through a named +extension function loaded into the compiler at compile-time. + +The compile-time extension function must return a value of the corresponding +Rust AST type, either an expression node, a statement node or an item +node. @footnote{The syntax-extension system is analogous to the extensible +reader system provided by Lisp @emph{readtables}, or the Camlp4 system of +Objective Caml.} @xref{Ref.Lex.Syntax}. + +A syntax extension is enabled by a @code{syntax} directive, which must occur +in a crate file. When the Rust compiler encounters a @code{syntax} directive +in a crate file, it immediately loads the named syntax extension, and makes it +available for all subsequent crate directives within the enclosing block scope +of the crate file, and all Rust source files referenced as modules from the +enclosing block scope of the crate file. + +For example, this extension might provide a syntax for regular +expression literals: + +@example +// In a crate file: + +// Requests the 're' syntax extension from the compilation environment. +syntax re; + +// Also declares an import dependency on the module 're'. +use re; + +// Reference to a Rust source file as a module in the crate. +mod foo = "foo.rs"; + +@dots{} + +// In the source file "foo.rs", use the #re syntax extension and +// the re module at run-time. +let str s = get_string(); +let regex pattern = #re.pat@{ aa+b? @}; +let bool matched = re.match(pattern, s); +@end example + +@page +@node Ref.Mem +@section Ref.Mem +@c * Ref.Mem:: Semantic model of memory. + +A Rust task's memory consists of a static set of @emph{items}, a set of tasks +each with its own @emph{stack}, and a @emph{heap}. Immutable portions of the +heap may be shared between tasks, mutable portions may not. + +Allocations in the stack and the heap consist of @emph{slots}. + +@menu +* Ref.Mem.Alloc:: Memory allocation model. +* Ref.Mem.Own:: Memory ownership model. +* Ref.Mem.Slot:: Memory containment and reference model. +* Ref.Mem.Init:: Initialization state of memory. +* Ref.Mem.Acct:: Memory accounting model. +@end menu + +@page +@node Ref.Mem.Alloc +@subsection Ref.Mem.Alloc +@c * Ref.Mem.Alloc:: Memory allocation model. + +The @dfn{items} of a program are those functions, iterators, objects, modules +and types that have their value calculated at compile-time and stored uniquely +in the memory image of the rust process. Items are neither dynamically +allocated nor freed. + +A task's @dfn{stack} consists of activation frames automatically allocated on +entry to each function as the task executes. A stack allocation is reclaimed +when control leaves the frame containing it. + +The @dfn{heap} is a general term that describes two separate sets of exterior +allocations: @emph{local heap} allocations and the @emph{shared heap} +allocations. + +Exterior allocations of mutable types are @dfn{local heap} allocations, +owned by the task. Such @dfn{local allocations} cannot pass over channels and +do not outlive the task that owns them. When unreferenced, they are collected +using a general (cycle-aware) garbage-collector local to each task. Garbage +collection within a local heap does not interrupt execution of other tasks. + +Exterior allocations of immutable types are @dfn{shared heap} allocations, +and can be multiply-referenced by many different tasks. Such @dfn{shared +allocations} can pass over channels, and live as long as the last task +referencing them. When unreferenced, they are collected immediately using +reference-counting. + + + +@page +@node Ref.Mem.Own +@subsection Ref.Mem.Own +@c * Ref.Mem.Own:: Memory ownership model. + +A task @emph{owns} all the interior allocations in its stack and @emph{local} +exterior allocations. A task @emph{shares} ownership of @emph{shared} exterior +allocations. A task does not own any items. + +@dfn{Ownership} of an allocation means that the owning task is the only task +that can access the allocation. + +@dfn{Sharing} of an allocation means that the same allocation may be +concurrently referenced by multiple tasks. The only shared allocations are +those that are immutable. + +When a stack frame is exited, its interior allocations are all released, and +its references to heap allocations (both shared and owned) are dropped. + +When a task finishes, its stack is necessarily empty. The task's interior +slots are released as the task itself is released, and its references to heap +allocations are dropped. + +@page +@node Ref.Mem.Slot +@subsection Ref.Mem.Slot +@c * Ref.Mem.Slot:: Memory containment and reference model. + +A @dfn{slot} is a component of an allocation. A slot either holds a value or +the address of another allocation. Every slot has one of three possible +@emph{modes}. + +The possible @dfn{modes} of a slot are: + +@itemize +@sp 1 +@item @dfn{Interior mode} + +The slot holds the value of the slot. + +@sp 1 +@item @dfn{Exterior mode} + +The slot holds the address of a heap allocation that holds the value of the +slot. + +Exterior slots are indicated by the @emph{at} sigil @code{@@}. + +For example, the following code allocates an exterior record, copies it by +counted-reference to a second exterior slot, then modifies the record through +the second exterior slot that points to the same exterior allocation. +@example +type point3d = rec(int x, int y, int z); +let @@point3d pt1 = rec(x=1, y=2, z=3); +let @@point3d pt2 = pt1; +pt2.z = 4; +@end example + +@sp 1 +@item @dfn{Alias mode} + +The slot holds the address of a value. The referenced value may reside within +a stack allocation @emph{or} a heap allocation. + +Alias slots can @emph{only} be declared as members of a function or iterator +signature, bound to the lifetime of a stack frame. Alias slots cannot be +declared as members of general data types. + +Alias slots are indicated by the @emph{ampersand} sigil @code{&}. + +The following example function accepts a single read-only alias parameter: +@example +type point3d = rec(int x, int y, int z); + +fn extract_z(&point3d p) -> int @{ + ret p.z; +@} +@end example + +The following example function accepts a single mutable alias +parameter: +@example +fn incr(mutable &int i) @{ + i = i + 1; +@} +@end example + +@end itemize + +@page +@node Ref.Mem.Init +@subsection Ref.Mem.Init +@c * Ref.Mem.Init:: Initialization state of memory. + +A slot is either initialized or uninitialized at every point in a program. An +@dfn{initialized} slot is one that holds a value. An @dfn{uninitialized} slot +is one that has not yet had a value written into it, or has had its value +deleted, and so holds undefined memory. The typestate system ensures that an +uninitialized slot cannot be read, but can be written to. A slot becomes +initialized in any statement that writes to it, and remains initialized until +explicitly destroyed or until its enclosing allocation is destroyed. + +@page +@node Ref.Mem.Acct +@subsection Ref.Mem.Acct +@c * Ref.Mem.Acct:: Memory accounting model. + +Every task belongs to a domain, and that domain tracks the amount of memory +allocated and not yet released by tasks within it. @xref{Ref.Task.Dom}. Each +domain has a memory budget. The @dfn{budget} of a domain is the maximum amount +of memory that can be simultaneously allocated in the domain. If a task tries +to allocate memory within a domain with an exceeded budget, the task will +receive a signal. + +Within a task, accounting is strictly enforced: all memory allocated through +the runtime library, both user data, sub-domains and runtime-support +structures such as channel and signal queues, are charged to a task's domain. + +When a communication channel crosses from one domain to another, any value +sent over the channel is guaranteed to have been @emph{detached} from the +domain's memory graph (singly referenced, and/or deep-copied), so its memory +cost is transferred to the receiving domain. + + +@page +@node Ref.Task +@section Ref.Task +@c * Ref.Task:: Semantic model of tasks. + +A executing Rust program consists of a tree of tasks. A Rust @dfn{task} +consists of an entry function, a stack, a set of outgoing communication +channels and incoming communication ports, and ownership of some portion of +the heap of a single operating-system process. + +Multiple Rust tasks may coexist in a single operating-system +process. Execution of multiple Rust tasks in a single operating-system process +may be either truly concurrent or interleaved by the runtime scheduler. Rust +tasks are lightweight: each consumes less memory than an operating-system +process, and switching between Rust tasks is faster than switching between +operating-system processes. + +@menu +* Ref.Task.Comm:: Inter-task communication. +* Ref.Task.Life:: Task lifecycle and state transitions. +* Ref.Task.Dom:: Task domains. +* Ref.Task.Sched:: Task scheduling model. +@end menu + +@page +@node Ref.Task.Comm +@subsection Ref.Task.Comm +@c * Ref.Task.Comm:: Inter-task communication. + +With the exception of @emph{unsafe} constructs, Rust tasks are isolated from +interfering with one another's memory directly. Instead of manipulating shared +storage, Rust tasks communicate with one another using a typed, asynchronous, +simplex message-passing system. + +A @dfn{port} is a communication endpoint that can @emph{receive} +messages. Ports receive messages from channels. + +A @dfn{channel} is a communication endpoint that can @emph{send} +messages. Channels send messages to ports. + +Each port has a unique identity and cannot be replicated. If a port value is +copied from one slot to another, both slots refer to the @emph{same} port, +even if the slots are declared as interior-mode. New ports can be constructed +dynamically and stored in data structures. + +Each channel is bound to a port when the channel is constructed, so the +destination port for a channel must exist before the channel itself. A channel +cannot be rebound to a different port from the one it was constructed with. + +Many channels can be bound to the same port, but each channel is bound to a +single port. In other words, channels and ports exist in an N:1 relationship, +N channels to 1 port. @footnote{It may help to remember nautical terminology +when differentiating channels from ports. Many different waterways -- +channels -- may lead to the same port.} + +Each port and channel can carry only one type of message. The message type is +encoded as a parameter of the channel or port type. The message type of a +channel is equal to the message type of the port it is bound to. + +Messages are sent asynchronously or semi-synchronously. A channel contains a +message queue and asynchronously sending a message merely inserts it into the +channel's queue; message receipt is the responsibility of the receiving task. + +Queued messages in channels are charged to the domain of the @emph{sending} +task. If too many messages are queued for transmission from a single sending +task, without being received by a receiving task, the sending task may exceed +its memory budget, which causes a run-time signal. To help control this +possibility, a semi-synchronous send operation is possible, which blocks until +there is room in the existing queue and then executes an asynchronous send. A +full @code{flush} operation is also available, which blocks until a channel's +queue is @emph{empty}. A @code{flush} does @emph{not} guarantee that a message +has been @emph{received} by any particular recipient when the sending task is +unblocked. @xref{Ref.Stmt.Flush}. + +The asynchronous message-send operator is @code{<+}. The semi-synchronous +message-send operator is @code{<|}. @xref{Ref.Stmt.Send}. The message-receive +operator is @code{<-}. @xref{Ref.Stmt.Recv}. + +@page +@node Ref.Task.Life +@subsection Ref.Task.Life +@c * Ref.Task.Life:: Task lifecycle and state transitions. + +The @dfn{lifecycle} of a task consists of a finite set of states and events +that cause transitions between the states. The lifecycle states of a task are: + +@itemize +@item running +@item blocked +@item failing +@item dead +@end itemize + +A task begins its lifecycle -- once it has been spawned -- in the +@emph{running} state. In this state it executes the statements of its entry +function, and any functions called by the entry function. + +A task may transition from the @emph{running} state to the @emph{blocked} +state any time it executes a communication statement on a port or channel that +cannot be immediately completed. When the communication statement can be +completed -- when a message arrives at a sender, or a queue drains +sufficiently to complete a semi-synchronous send -- then the blocked task will +unblock and transition back to @emph{running}. + +A task may transition to the @emph{failing} state at any time, due to an +un-trapped signal or the execution of a @code{fail} statement. Once +@emph{failing}, a task unwinds its stack and transitions to the @emph{dead} +state. Unwinding the stack of a task is done by the task itself, on its own +control stack. If a value with a destructor is freed during unwinding, the +code for the destructor is run, also on the task's control stack. If the +destructor code causes any subsequent state transitions, the task of unwinding +and failing may suspend temporarily, and may involve (recursive) unwinding of +the stack of a failed destructor. Nonetheless, the outermost unwinding +activity will continue until the stack is unwound and the task transitions to +the @emph{dead} state. There is no way to ``recover'' from task failure. + +A task in the @emph{dead} state cannot transition to other states; it exists +only to have its termination status inspected by other tasks, and/or to await +reclamation when the last reference to it drops. + +@page +@node Ref.Task.Dom +@subsection Ref.Task.Dom +@c * Ref.Task.Dom:: Task domains + +Every task belongs to a domain. A @dfn{domain} is a structure that owns tasks, +schedules tasks, tracks memory allocation within tasks and manages access to +runtime services on behalf of tasks. + +Typically each domain runs on a separate operating-system @emph{thread}, or +within an isolated operating-system @emph{process}. An easy way to think of a +domain is as an abstraction over either an operating-system thread @emph{or} a +process. + +The key feature of a domain is that it isolates memory references created by +the Rust tasks within it. No Rust task can refer directly to memory outside +its domain. + +Tasks can own sub-domains, which in turn own their own tasks. Every domain +owns one @emph{root task}, which is the root of the tree of tasks owned by the +domain. + +@page +@node Ref.Task.Sched +@subsection Ref.Task.Sched +@c * Ref.Task.Sched:: Task scheduling model. + +Every task is @emph{scheduled} within its domain. @xref{Ref.Task.Dom}. The +currently scheduled task is given a finite @emph{time slice} in which to +execute, after which it is @emph{descheduled} at a loop-edge or similar +preemption point, and another task within the domain is scheduled, +pseudo-randomly. + +An executing task can @code{yield} control at any time, which deschedules it +immediately. Entering any other non-executing state (blocked, dead) similarly +deschedules the task. + +@page +@node Ref.Item +@section Ref.Item +@c * Ref.Item:: The components of a module. + +An @dfn{item} is a component of a module. Items are entirely determined at +compile-time, remain constant during execution, and may reside in read-only +memory. + +There are 5 primary kinds of item: modules, functions, iterators, objects and +types. + +All items form an implicit scope for the declaration of sub-items. In other +words, within a function, object or iterator, declarations of items can (in +many cases) be mixed with the statements, control blocks, and similar +artifacts that otherwise compose the item body. The meaning of these scoped +items is the same as if the item was declared outside the scope, except that +the item's @emph{path name} within the module namespace is qualified by the +name of the enclosing item. The exact locations in which sub-items may be +declared is given by the grammar. @xref{Ref.Gram}. + +Functions, iterators, objects and types may be @emph{parametrized} by +type. Type parameters are given as a comma-separated list of identifiers +enclosed in square brackets (@code{[]}), after the name of the item and before +its definition. The type parameters of an item are part of the name, not the +type of the item; in order to refer to the type-parametrized item, a +referencing name must in general provide type arguments as a list of +comma-separated types enclosed within square brackets (though the +type-inference system can often infer such argument types from context). There +are no general parametric types. + +@menu +* Ref.Item.Mod:: Items defining modules. +* Ref.Item.Fn:: Items defining functions. +* Ref.Item.Iter:: Items defining iterators. +* Ref.Item.Obj:: Items defining objects. +* Ref.Item.Type:: Items defining the types of values and slots. +@end menu + +@page +@node Ref.Item.Mod +@subsection Ref.Item.Mod +@c * Ref.Item.Mod:: Items defining sub-modules. + +A @dfn{module item} contains declarations of other @emph{items}. The items +within a module may be functions, modules, objects or types. These +declarations have both static and dynamic interpretation. The purpose of a +module is to organize @emph{names} and control @emph{visibility}. Modules are +declared with the keyword @code{mod}. + +An example of a module: +@example +mod math @{ + type complex = (f64,f64); + fn sin(f64) -> f64 @{ + @dots{} + @} + fn cos(f64) -> f64 @{ + @dots{} + @} + fn tan(f64) -> f64 @{ + @dots{} + @} + @dots{} +@} +@end example + +Modules may also include any number of @dfn{import and export +declarations}. These declarations must precede any module item declarations +within the module, and control the visibility of names both within the module +and outside of it. + +@menu +* Ref.Item.Mod.Import:: Declarations for module-local synonyms. +* Ref.Item.Mod.Export:: Declarations for restricting visibility. +@end menu + +@page +@node Ref.Item.Mod.Import +@subsubsection Ref.Item.Mod.Import +@c * Ref.Item.Mod.Import:: Declarations for module-local synonyms. + +An @dfn{import declaration} creates one or more local name bindings synonymous +with some other name. Usually an import declaration is used to shorten the +path required to refer to a module item. + +@emph{Note}: unlike many languages, Rust's @code{import} declarations do +@emph{not} declare linkage-dependency with external crates. Linkage +dependencies are independently declared with @code{use} +declarations. @xref{Ref.Comp.Crate}. + +An example of an import: +@example +import std.math.sin; +fn main() @{ + // Equivalent to 'log std.math.sin(1.0);' + log sin(1.0); +@} +@end example + +@page +@node Ref.Item.Mod.Export +@subsubsection Ref.Item.Mod.Export +@c * Ref.Item.Mod.Import:: Declarations for restricting visibility. + +An @dfn{export declaration} restricts the set of local declarations within a +module that can be accessed from code outside the module. By default, all +local declarations in a module are exported. If a module contains an export +declaration, this declaration replaces the default export with the export +specified. + +An example of an export: +@example +mod foo @{ + export primary; + + fn primary() @{ + helper(1, 2); + helper(3, 4); + @} + + fn helper(int x, int y) @{ + @dots{} + @} +@} + +fn main() @{ + foo.primary(); // Will compile. + foo.helper(2,3) // ERROR: will not compile. +@} +@end example + + + +@page +@node Ref.Item.Fn +@subsection Ref.Item.Fn +@c * Ref.Item.Fn:: Items defining functions. + +A @dfn{function item} defines a sequence of statements associated with a name +and a set of parameters. Functions are declared with the keyword +@code{fn}. Functions declare a set of @emph{input slots} as parameters, +through which the caller passes arguments into the function, and an +@emph{output slot} through which the function passes results back to the +caller. + +A function may also be copied into a first class @emph{value}, in which case +the value has the corresponding @emph{function type}, and can be used +otherwise exactly as a function item (with a minor additional cost of calling +the function, as such a call is indirect). @xref{Ref.Type.Fn}. + +Every control path in a function ends with either a @code{ret} or @code{be} +statement. If a control path lacks a @code{ret} statement in source code, an +implicit @code{ret} statement is appended to the end of the control path +during compilation, returning the implicit @code{()} value. + +A function may have an @emph{effect}, which may be either @code{io}, +@code{state}, @code{unsafe}. If no effect is specified, the function is said +to be @dfn{pure}. + +Any pure boolean function is also called a @emph{predicate}, and may be used +as part of the static typestate system. @xref{Ref.Stmt.Stat.Constr}. + +An example of a function: +@example +fn add(int x, int y) -> int @{ + ret x + y; +@} +@end example + +@page +@node Ref.Item.Iter +@subsection Ref.Item.Iter +@c * Ref.Item.Iter:: Items defining iterators. + +Iterators are function-like items that can @code{put} multiple values during +their execution before returning or tail-calling. + +Putting a value is similar to returning a value -- the argument to @code{put} +is copied into the caller's frame and control transfers back to the caller -- +but the iterator frame is only @emph{suspended} during the put, and will be +@emph{resumed} at the statement after the @code{put}, on the next iteration of +the caller's loop. + +The output type of an iterator is the type of value that the function will +@code{put}, before it eventually executes a @code{ret} or @code{be} statement +of type @code{()} and completes its execution. + +An iterator can only be called in the loop header of a matching @code{for +each} loop or as the argument in a @code{put each} statement. +@xref{Ref.Stmt.Foreach}. + +An example of an iterator: +@example +iter range(int lo, int hi) -> int @{ + let int i = lo; + while (i < hi) @{ + put i; + i = i + 1; + @} +@} + +let int sum = 0; +for each (int x = range(0,100)) @{ + sum += x; +@} +@end example + + +@page +@node Ref.Item.Obj +@subsection Ref.Item.Obj +@c * Ref.Item.Obj:: Items defining objects. + +An @dfn{object item} defines the @emph{state} and @emph{methods} of a set of +@emph{object values}. Object values have object types. @xref{Ref.Type.Obj}. + +An @emph{object item} declaration -- in addition to providing a scope for +state and method declarations -- implicitly declares a static function called +the @emph{object constructor}, as well as a named @emph{object type}. The name +given to the object item is resolved to a type when used in type context, or a +constructor function when used in value context (such as a call). + +Example of an object item: +@example +obj counter(int state) @{ + fn incr() @{ + state += 1; + @} + fn get() -> int @{ + ret state; + @} +@} + +let counter c = counter(1); + +c.incr(); +c.incr(); +check (c.get() == 3); +@end example + +@page +@node Ref.Item.Type +@subsection Ref.Item.Type +@c * Ref.Item.Type:: Items defining the types of values and slots. + +A @dfn{type} defines an @emph{interpretation} of a value in +memory. @xref{Ref.Type}. Types are declared with the keyword @code{type}. A +type's interpretation is used for the values held in any slot with that +type. @xref{Ref.Mem.Slot}. The interpretation of a value includes: + +@itemize +@item Whether the value is composed of sub-values or is indivisible. +@item Whether the value represents textual or numerical information. +@item Whether the value represents integral or floating-point information. +@item The sequence of memory operations required to access the value. +@item Whether the value is mutable or immutable. +@end itemize + +For example, the type @code{rec(u8 x, u8 y)} defines the +interpretation of values that are composite records, each containing +two unsigned two's complement 8-bit integers accessed through the +components @code{x} and @code{y}, and laid out in memory with the +@code{x} component preceding the @code{y} component. + +Some types are @emph{recursive}. A recursive type is one that includes +its own definition as a component, by named reference. Recursive types +are restricted to occur only within a single crate, and only through a +restricted form of @code{tag} type. @xref{Ref.Type.Tag}. + +@page +@node Ref.Type +@section Ref.Type + +Every slot and value in a Rust program has a type. The @dfn{type} of a +@emph{value} defines the interpretation of the memory holding it. The type of +a @emph{slot} may also include constraints. @xref{Ref.Type.Constr}. + +Built-in types and type-constructors are tightly integrated into the language, +in nontrivial ways that are not possible to emulate in user-defined +types. User-defined types have limited capabilities. In addition, every +built-in type or type-constructor name is reserved as a @emph{keyword} in +Rust; they cannot be used as user-defined identifiers in any context. + +@menu +* Ref.Type.Any:: An open sum of every possible type. +* Ref.Type.Mach:: Machine-level types. +* Ref.Type.Int:: The machine-dependent integer types. +* Ref.Type.Prim:: Primitive types. +* Ref.Type.Big:: The arbitrary-precision integer type. +* Ref.Type.Text:: Strings and characters. +* Ref.Type.Rec:: Labeled products of heterogeneous types. +* Ref.Type.Tup:: Unlabeled products of homogeneous types. +* Ref.Type.Vec:: Open products of homogeneous types. +* Ref.Type.Tag:: Disjoint sums of heterogeneous types. +* Ref.Type.Fn:: Subroutine types. +* Ref.Type.Iter:: Scoped coroutine types. +* Ref.Type.Port:: Unique inter-task message-receipt endpoints. +* Ref.Type.Chan:: Copyable inter-task message-send capabilities. +* Ref.Type.Task:: General coroutine-instance types. +* Ref.Type.Obj:: Abstract types. +* Ref.Type.Constr:: Constrained types. +* Ref.Type.Type:: Types describing types. +@end menu + +@page +@node Ref.Type.Any +@subsection Ref.Type.Any + +The type @code{any} is the union of all possible Rust types. A value of type +@code{any} is represented in memory as a pair consisting of an exterior value +of some non-@code{any} type @var{T} and a reflection of the type @var{T}. + +Values of type @code{any} can be used in an @code{alt type} statement, in +which the reflection is used to select a block corresponding to a particular +type extraction. @xref{Ref.Stmt.Alt}. + +@page +@node Ref.Type.Mach +@subsection Ref.Type.Mach + +The machine types are the following: + +@itemize +@item +The unsigned two's complement word types @code{u8}, @code{u16}, @code{u32} and +@code{u64}, with values drawn from the integer intervals +@iftex +@math{[0, 2^8 - 1]}, +@math{[0, 2^{16} - 1]}, +@math{[0, 2^{32} - 1]} and +@math{[0, 2^{64} - 1]} +@end iftex +@ifhtml +@html +[0, 28-1], +[0, 216-1], +[0, 232-1] and +[0, 264-1] +@end html +@end ifhtml + respectively. +@item +The signed two's complement word types @code{i8}, @code{i16}, @code{i32} and +@code{i64}, with values drawn from the integer intervals +@iftex +@math{[-(2^7),(2^7)-1)]}, +@math{[-(2^{15}),2^{15}-1)]}, +@math{[-(2^{31}),2^{31}-1)]} and +@math{[-(2^{63}),2^{63}-1)]} +@end iftex +@ifhtml +@html +[-(27), 27-1], +[-(215), 215-1], +[-(231), 231-1] and +[-(263), 263-1] +@end html +@end ifhtml + respectively. +@item +The IEEE 754 single-precision and double-precision floating point types: +@code{f32} and @code{f64}, respectively. +@end itemize + +@page +@node Ref.Type.Int +@subsection Ref.Type.Int + + +The Rust type @code{uint}@footnote{A Rust @code{uint} is analogous to a C99 +@code{uintptr_t}.} is a two's complement unsigned integer type with with +target-machine-dependent size. Its size, in bits, is equal to the number of +bits required to hold any memory address on the target machine. + +The Rust type @code{int}@footnote{A Rust @code{int} is analogous to a C99 +@code{intptr_t}.} is a two's complement signed integer type with +target-machine-dependent size. Its size, in bits, is equal to the size of the +rust type @code{uint} on the same target machine. + + + +@page +@node Ref.Type.Prim +@subsection Ref.Type.Prim + +The primitive types are the following: + +@itemize +@item +The ``nil'' type @code{()}, having the single ``nil'' value +@code{()}.@footnote{The ``nil'' value @code{()} is @emph{not} a sentinel +``null pointer'' value for alias or exterior slots; the ``nil'' type is the +implicit return type from functions otherwise lacking a return type, and can +be used in other contexts (such as message-sending or type-parametric code) as +a zero-byte type.} +@item +The boolean type @code{bool} with values @code{true} and @code{false}. +@item +The machine types. +@item +The machine-dependent integer types. +@end itemize + + +@page +@node Ref.Type.Big +@subsection Ref.Type.Big + +The Rust type @code{big}@footnote{A Rust @code{big} is analogous to a Lisp +bignum or a Python long integer.} is an arbitrary precision integer type that +fits in a machine word @emph{when possible} and transparently expands to a +boxed ``big integer'' allocated in the run-time heap when it overflows or +underflows outside of the range of a machine word. + +A Rust @code{big} grows to accommodate extra binary digits as they are needed, +by taking extra memory from the memory budget available to each Rust task, and +should only exhaust its range due to memory exhaustion. + +@page +@node Ref.Type.Text +@subsection Ref.Type.Text + +The types @code{char} and @code{str} hold textual data. + +A value of type @code{char} is a Unicode character, represented as a 32-bit +unsigned word holding a UCS-4 codepoint. + +A value of type @code{str} is a Unicode string, represented as a vector of +8-bit unsigned bytes holding a sequence of UTF-8 codepoints. + +@page +@node Ref.Type.Rec +@subsection Ref.Type.Rec + +The record type-constructor @code{rec} forms a new heterogeneous product of +slots.@footnote{The @code{rec} type-constructor is analogous to the +@code{struct} type-constructor in the Algol/C family, the @emph{record} types +of the ML family, or the @emph{structure} types of the Lisp family.} Fields of +a @code{rec} type are accessed by name and are arranged in memory in the order +specified by the @code{rec} type. + +An example of a @code{rec} type and its use: +@example +type point = rec(int x, int y); +let point p = rec(x=10, y=11); +let int px = p.x; +@end example + +@page +@node Ref.Type.Tup +@subsection Ref.Type.Tup + +The tuple type-constructor @code{tup} forms a new heterogeneous product of +slots exactly as the @code{rec} type-constructor does, with the difference +that tuple slots are automatically assigned implicit field names, given by +ascending integers prefixed by the underscore character: @code{_0}, @code{_1}, +@code{_2}, etc. The fields of a tuple are laid out in memory contiguously, +like a record, in order specified by the tuple type. + +An example of a tuple type and its use: +@example +type pair = tup(int,str); +let pair p = tup(10,"hello"); +check (p._0 == 10); +p._1 = "world"; +check (p._1 == "world"); +@end example + + +@page +@node Ref.Type.Vec +@subsection Ref.Type.Vec + +The vector type-constructor @code{vec} represents a homogeneous array of +slots. A vector has a fixed size, and may or may not have mutable member +slots. If the slots of a vector are mutable, the vector is a @emph{state} +type. + +Vectors can be sliced. A slice expression builds a new vector by copying a +contiguous range -- given by a pair of indices representing a half-open +interval -- out of the sliced vector. + +And example of a @code{vec} type and its use: +@example +let vec[int] v = vec(7, 5, 3); +let int i = v.(2); +let vec[int] v2 = v.(0,1); // Form a slice. +@end example + +Vectors always @emph{allocate} a storage region sufficient to store the first +power of two worth of elements greater than or equal to the size of the +largest slice sharing the storage. This behaviour supports idiomatic in-place +``growth'' of a mutable slot holding a vector: + +@example +let mutable vec[int] v = vec(1, 2, 3); +v += vec(4, 5, 6); +@end example + +Normal vector concatenation causes the allocation of a fresh vector to hold +the result; in this case, however, the slot holding the vector recycles the +underlying storage in-place (since the reference-count of the underlying +storage is equal to 1). + +All accessible elements of a vector are always initialized, and access to a +vector is always bounds-checked. + + +@page +@node Ref.Type.Tag +@subsection Ref.Type.Tag + +The @code{tag} type-constructor forms new heterogeneous disjoint sum +types.@footnote{The @code{tag} type is analogous to a @code{data} constructor +declaration in ML or a @emph{pick ADT} in Limbo.} A @code{tag} type consists +of a number of @emph{variants}, each of which is independently named and takes +an optional tuple of arguments. + +The variants of a @code{tag} type may be recursive: that is, the definition of +a @code{tag} type may refer to type definitions that include the defined +@code{tag} type itself. Such recursion has restrictions: +@itemize +@item Recursive types can only be introduced through @code{tag} types. +@item A recursive @code{tag} type must have at least one non-recursive +variant (in order to give the recursion a basis case). +@item The recursive slots of recursive variants must be @emph{exterior} +slots (in order to bound the in-memory size of the variant). +@item Recursive type definitions can cross module boundaries, but not module +@emph{visibility} boundaries, nor crate boundaries (in order to simplify the +module system). +@end itemize + +An example of a @code{tag} type and its use: +@example +type animal = tag(dog, cat); +let animal a = dog; +a = cat; +@end example + +An example of a @emph{recursive} @code{tag} type and its use: +@example +type list[T] = tag(nil, + cons(T, @@list[T])); +let list[int] a = cons(7, cons(13, nil)); +@end example + + +@page +@node Ref.Type.Fn +@subsection Ref.Type.Fn + +The function type-constructor @code{fn} forms new function types. A function +type consists of a sequence of input slots, an optional set of input +constraints (@pxref{Ref.Stmt.Stat.Constr}), an output slot, and an +@emph{effect}. @xref{Ref.Item.Fn}. + +An example of a @code{fn} type: +@example +fn add(int x, int y) -> int @{ + ret x + y; +@} + +let int x = add(5,7); + +type binop = fn(int,int) -> int; +let binop bo = add; +x = bo(5,7); +@end example + +@page +@node Ref.Type.Iter +@subsection Ref.Type.Iter + +The iterator type-constructor @code{iter} forms new iterator types. An +iterator type consists a sequence of input slots, an optional set of input +constraints, an output slot, and an @emph{effect}. @xref{Ref.Item.Iter}. + +An example of an @code{iter} type: +@example +iter range(int x, int y) -> int @{ + while (x < y) @{ + put x; + x += 1; + @} +@} + +for each (int i = range(5,7)) @{ + @dots{}; +@} +@end example + + +@page +@node Ref.Type.Port +@subsection Ref.Type.Port + +The port type-constructor @code{port} forms types that describe ports. A port +is the @emph{receiving end} of a typed, asynchronous, simplex inter-task +communication facility. @xref{Ref.Task.Comm}. A @code{port} type takes a +single type parameter, denoting the type of value that can be received from a +@code{port} value of that type. + +Ports are modeled as mutable native types with built-in meaning to the +language. They cannot be transmitted over channels or otherwise replicated, +and are always local to the task that creates them. + +An example of a @code{port} type: +@example +type port[vec[str]] svp; +let svp p = get_port(); +let vec[str] v; +v <- p; +@end example + +@page +@node Ref.Type.Chan +@subsection Ref.Type.Chan + +The channel type-constructor @code{chan} forms types that describe channels. A +channel is the @emph{sending end} of a typed, asynchronous, simplex inter-task +communication facility. @xref{Ref.Task.Comm}. A @code{chan} type takes a +single type parameter, denoting the type of value that can be sent to a +channel of that type. + +Channels are immutable, and can be transmitted over channels to other +tasks. They are modeled as immutable native types with built-in meaning to the +language. + +When a task sends a message into a channel, the task forms an outgoing queue +associated with that channel. The per-task queue @emph{associated} with a +channel can be indirectly manipulated by the task, but is @emph{not} otherwise +considered ``part of'' to the channel: the queue is ``part of'' the +@emph{sending task}. Sending a channel to another task does not copy the queue +associated with the channel. + +Channels are also @emph{weak}: a channel is directly coupled to a particular +destination port on a particular task, but does not keep that port or task +@emph{alive}. A channel may therefore fail to operate at any moment. If a task +sends to a channel that is connected to a nonexistent port, it receives a +signal. + +An example of a @code{chan} type: +@example +type chan[vec[str]] svc; +let svc c = get_chan(); +let vec[str] v = vec("hello", "world"); +c <| v; +@end example + +@page +@node Ref.Type.Task +@subsection Ref.Type.Task + +The task type @code{task} describes values that are @emph{live +tasks}. + +Tasks form an @emph{ownership tree} in which each task (except the root task) +is directly owned by exactly one parent task. The purpose of a variable of +@code{task} type is to manage the lifecycle of the associated +task. Communication is carried out solely using channels and ports. + +Like ports, tasks are modeled as mutable native types with built-in meaning to +the language. They cannot be transmitted over channels or otherwise +replicated, and are always local to the task that spawns them. + +If all references to a task are dropped (due to the release of any slots +holding those references), the released task immediately fails. +@xref{Ref.Task.Life}. + + +@page +@node Ref.Type.Obj +@subsection Ref.Type.Obj +@c * Ref.Type.Obj:: Object types. + +A @dfn{object type} describes values of abstract type, that carry some hidden +@emph{fields} and are accessed through a set of un-ordered +@emph{methods}. Every object item (@pxref{Ref.Item.Obj}) implicitly declares +an object type carrying methods with types derived from all the methods of the +object item. + +Object types can also be declared in isolation, independent of any object item +declaration. Such a ``plain'' object type can be used to describe an interface +that a variety of particular objects may conform to, by supporting a superset +of the methods. + +An object type that can contain a state must be declared as a @code{state obj} +like any other state type. And similarly a method type that performs I/O or +makes native calls must be declared @code{io} or @code{unsafe}, like any other +function. + +Moreover, @emph{all} methods of a state object are implicitly state functions -- as +they all bind the same mutable state field(s) -- so implicitly have an effect +lower than @code{io}. It is therefore unnecessary to declare methods within a +state object type (or state object item) as @code{io}. + +An example of an object type with two separate object items supporting it, and +a client function using both items via the object type: + +@example + +state type taker = + state obj @{ + fn take(int); + @}; + +state obj adder(mutable int x) @{ + fn take(int y) @{ + x += y; + @} +@} + +obj sender(chan[int] c) @{ + io fn take(int z) @{ + c <| z; + @} +@} + +fn give_ints(taker t) @{ + t.take(1); + t.take(2); + t.take(3); +@} + +let port[int] p = port(); + +let taker t1 = adder(0); +let taker t2 = sender(chan(p)); + +give_ints(t1); +give_ints(t2); + +@end example + + + +@page +@node Ref.Type.Constr +@subsection Ref.Type.Constr +@c * Ref.Type.Constr:: Constrained types. + +A @dfn{constrained type} is a type that carries a @emph{formal constraint} +(@pxref{Ref.Stmt.Stat.Constr}), which is similar to a normal constraint except +that the @emph{base name} of any slots mentioned in the constraint must be the +special @emph{formal symbol} @emph{*}. + +When a constrained type is instantiated in a particular slot declaration, the +formal symbol in the constraint is replaced with the name of the declared slot +and the resulting constraint is checked immediately after the slot is +declared. @xref{Ref.Stmt.Check}. + +An example of a constrained type with two separate instantiations: +@example +type ordered_range = rec(int low, int high) : less_than(*.low, *.high); + +let ordered_range rng1 = rec(low=5, high=7); +// implicit: 'check less_than(rng1.low, rng1.high);' + +let ordered_range rng2 = rec(low=15, high=17); +// implicit: 'check less_than(rng2.low, rng2.high);' +@end example + +@page +@node Ref.Type.Type +@subsection Ref.Type.Type +@c * Ref.Type.Type:: Types describing types. + +@emph{TODO}. + +@page +@node Ref.Expr +@section Ref.Expr +@c * Ref.Expr:: Parsed and primitive expressions. + +Rust has two kinds of expressions: @emph{parsed expressions} and +@emph{primitive expressions}. The former are syntactic sugar and are +eliminated during parsing. The latter are very minimal, consisting only of +paths and primitive literals, possibly combined via a single level +(non-recursive) unary or binary machine-level operation (ALU or +FPU). @xref{Ref.Path}. + +For the most part, Rust semantics are defined in terms of @emph{statements}, +which parsed expressions are desugared to. The desugaring is defined in the +grammar. @xref{Ref.Gram}. The residual primitive statements appear only in the +right hand side of copy statements, @xref{Ref.Stmt.Copy}. + +@page +@node Ref.Stmt +@section Ref.Stmt +@c * Ref.Stmt:: Executable statements. + +A @dfn{statement} is a component of a block, which is in turn a components of +an outer block, a function or an iterator. When a function is spawned into a +task, the task @emph{executes} statements in an order determined by the body +of the enclosing structure. Each statement causes the task to perform certain +actions. + +@menu +* Ref.Stmt.Stat:: The static typestate system of statement analysis. +* Ref.Stmt.Decl:: Statement declaring an item or slot. +* Ref.Stmt.Copy:: Statement for copying a value between two slots. +* Ref.Stmt.Spawn:: Statements for creating new tasks. +* Ref.Stmt.Send:: Statements for sending a value into a channel. +* Ref.Stmt.Flush:: Statement for flushing a channel queue. +* Ref.Stmt.Recv:: Statement for receiving a value from a channel. +* Ref.Stmt.Call:: Statement for calling a function. +* Ref.Stmt.Bind:: Statement for binding arguments to functions. +* Ref.Stmt.Ret:: Statement for stopping and producing a value. +* Ref.Stmt.Be:: Statement for stopping and executing a tail call. +* Ref.Stmt.Put:: Statement for pausing and producing a value. +* Ref.Stmt.Fail:: Statement for causing task failure. +* Ref.Stmt.Log:: Statement for logging values to diagnostic buffers. +* Ref.Stmt.Note:: Statement for logging values during failure. +* Ref.Stmt.While:: Statement for simple conditional looping. +* Ref.Stmt.Break:: Statement for terminating a loop. +* Ref.Stmt.Cont:: Statement for terminating a single loop iteration. +* Ref.Stmt.For:: Statement for looping over strings and vectors. +* Ref.Stmt.Foreach:: Statement for looping via an iterator. +* Ref.Stmt.If:: Statement for simple conditional branching. +* Ref.Stmt.Alt:: Statement for complex conditional branching. +* Ref.Stmt.Prove:: Statement for static assertion of typestate. +* Ref.Stmt.Check:: Statement for dynamic assertion of typestate. +* Ref.Stmt.IfCheck:: Statement for dynamic testing of typestate. +@end menu + +@page +@node Ref.Stmt.Stat +@subsection Ref.Stmt.Stat +@c * Ref.Stmt.Stat:: The static typestate system of statement analysis. + +Statements have a detailed static semantics. The static semantics determine, +on a statement-by-statement basis, the @emph{effects} the statement has on its +environment, as well the @emph{legality} of the statement in its environment. + +The legality of a statement is partly governed by syntactic rules, partly by +its conformance to the types of slots it affects, and partly by a +statement-oriented static dataflow analysis. This section describes the +statement-oriented static dataflow analysis, also called the @emph{typestate} +system. + +@menu +* Ref.Stmt.Stat.Point:: Inter-statement positions of logical judgements. +* Ref.Stmt.Stat.CFG:: The control flow graph formed by statements. +* Ref.Stmt.Stat.Constr:: Predicates applied to slots. +* Ref.Stmt.Stat.Cond:: Constraints required and implied by a statement. +* Ref.Stmt.Stat.Typestate:: Constraints that hold at points. +* Ref.Stmt.Stat.Check:: Relating dynamic state to static typestate. +@end menu + +@page +@node Ref.Stmt.Stat.Point +@subsubsection Ref.Stmt.Stat.Point +@c * Ref.Stmt.Stat.Point:: Inter-statement positions of logical judgements. + +A @dfn{point} exists before and after any statement in a Rust program. +For example, this code: + +@example + s = "hello, world"; + print(s); +@end example + +Consists of two statements and four points: + +@itemize +@item the point before the first statement +@item the point after the first statement +@item the point before the second statement +@item the point after the second statement +@end itemize + +The typestate system reasons over points, rather than statements. This may +seem counter-intuitive, but points are the more primitive concept. Another way +of thinking about a point is as a set of @emph{instants in time} at which the +state of a task is fixed. By contrast, a statement represents a @emph{duration +in time}, during which the state of the task changes. The typestate system is +concerned with constraining the possible states of a task's memory at +@emph{instants}; it is meaningless to speak of the state of a task's memory +``at'' a statement, as each statement is likely to change the contents of +memory. + +@page +@node Ref.Stmt.Stat.CFG +@subsubsection Ref.Stmt.Stat.CFG +@c * Ref.Stmt.Stat.CFG:: The control flow graph formed by statements. + +Each @emph{point} can be considered a vertex in a directed @emph{graph}. Each +kind of statement implies a single edge in this graph between the point before +the statement and the point after it, as well as a set of zero or more edges +from the points of the statement to points before other statements. The edges +between points represent @emph{possible} indivisible control transfers that +might occur during execution. + +This implicit graph is called the @dfn{control flow graph}, or @dfn{CFG}. + +@page +@node Ref.Stmt.Stat.Constr +@subsubsection Ref.Stmt.Stat.Constr +@c * Ref.Stmt.Stat.Constr:: Predicates applied to slots. + +A @dfn{predicate} is any pure boolean function. @xref{Ref.Item.Fn}. + +A @dfn{constraint} is a predicate applied to specific slots. + +For example, consider the following code: + +@example +fn is_less_than(int a, int b) -> bool @{ + ret a < b; +@} + +fn test() @{ + let int x = 10; + let int y = 20; + check is_less_than(x,y); +@} +@end example + +This example defines the predicate @code{is_less_than}, and applies it to the +slots @code{x} and @code{y}. The constraint being checked on the third line of +the function is @code{is_less_than(x,y)}. + +Predicates can only apply to slots holding immutable values. The slots a +predicate applies to can themselves be mutable, but the types of values held +in those slots must be immutable. + +@page +@node Ref.Stmt.Stat.Cond +@subsubsection Ref.Stmt.Stat.Cond +@c * Ref.Stmt.Stat.Cond:: Constraints required and implied by a statement. + +A @dfn{condition} is a set of zero or more constraints. + +Each @emph{point} has an associated @emph{condition}: + +@itemize +@item The @dfn{precondition} of a statement is the condition the statement +requires in the point before the condition. +@item The @dfn{postcondition} of a statement is the condition the statement +enforces in the point after the statement. +@end itemize + +Any constraint present in the precondition and @emph{absent} in the +postcondition is considered to be @emph{dropped} by the statement. + +@page +@node Ref.Stmt.Stat.Typestate +@subsubsection Ref.Stmt.Stat.Typestate +@c * Ref.Stmt.Stat.Typestate:: Constraints that hold at points. + +The typestate checking system @emph{calculates} an additional +condition for each point called its typestate. For a given statement, +we call the two typestates associated with its two points the prestate +and a poststate. + +@itemize +@item The @dfn{prestate} of a statement is the typestate of the point +before the statement. +@item The @dfn{poststate} of a statement is the typestate of the point +after the statement. +@end itemize + +A @dfn{typestate} is a condition that has @emph{been determined by the +typestate algorithm} to hold at a point. This is a subtle but important point +to understand: preconditions and postconditions are @emph{inputs} to the +typestate algorithm; prestates and poststates are @emph{outputs} from the +typestate algorithm. + +The typestate algorithm analyses the preconditions and postconditions of every +statement in a block, and computes a condition for each +typestate. Specifically: + +@itemize +@item Initially, every typestate is empty. +@item Each statement's poststate is given the union of the statement's +prestate, precondition, and postcondition. +@item Each statement's poststate has the difference between the statement's +precondition and postcondition removed. +@item Each statement's prestate is given the intersection of the poststates +of every parent statement in the CFG. +@item The previous three steps are repeated until no typestates in the +block change. +@end itemize + +The typestate algorithm is a very conventional dataflow calculation, and can +be performed using bit-set operations, with one bit per predicate and one +bit-set per condition. + +After the typestates of a block are computed, the typestate algorithm checks +that every constraint in the precondition of a statement is satisfied by its +prestate. If any preconditions are not satisfied, the mismatch is considered a +static (compile-time) error. + + +@page +@node Ref.Stmt.Stat.Check +@subsubsection Ref.Stmt.Stat.Check +@c * Ref.Stmt.Stat.Check:: Relating dynamic state to static typestate. + +The key mechanism that connects run-time semantics and compile-time analysis +of typestates is the use of @code{check} statements. @xref{Ref.Stmt.Check}. A +@code{check} statement guarantees that @emph{if} control were to proceed past +it, the predicate associated with the @code{check} would have succeeded, so +the constraint being checked @emph{statically} holds in subsequent +statements.@footnote{A @code{check} statement is similar to an @code{assert} +call in a C program, with the significant difference that the Rust compiler +@emph{tracks} the constraint that each @code{check} statement +enforces. Naturally, @code{check} statements cannot be omitted from a +``production build'' of a Rust program the same way @code{asserts} are +frequently disabled in deployed C programs.} + +It is important to understand that the typestate system has @emph{no insight} +into the meaning of a particular predicate. Predicates and constraints are not +evaluated in any way at compile time. Predicates are treated as specific (but +unknown) functions applied to specific (also unknown) slots. All the typestate +system does is track which of those predicates -- whatever they calculate -- +@emph{must have been checked already} in order for program control to reach a +particular point in the CFG. The fundamental building block, therefore, is the +@code{check} statement, which tells the typestate system ``if control passes +this statement, the checked predicate holds''. + +From this building block, constraints can be propagated to function signatures +and constrained types, and the responsibility to @code{check} a constraint +pushed further and further away from the site at which the program requires it +to hold in order to execute properly. + +@page +@node Ref.Stmt.Decl +@subsection Ref.Stmt.Decl +@c * Ref.Stmt.Decl:: Statement declaring an item or slot. + +A @dfn{declaration statement} is one that introduces a @emph{name} into the +enclosing statement block. The declared name may denote a new slot or a new +item. The scope of the name extends to the entire containing block, both +before and after the declaration. + +@menu +* Ref.Stmt.Decl.Item:: Statement declaring an item. +* Ref.Stmt.Decl.Slot:: Statement declaring a slot. +@end menu + +@page +@node Ref.Stmt.Decl.Item +@subsubsection Ref.Stmt.Decl.Item +@c * Ref.Stmt.Decl.Item:: Statement declaring an item. + +An @dfn{item declaration statement} has a syntactic form identical to an item +declaration within a module. Declaring an item -- a function, iterator, +object, type or module -- locally within a statement block is simply a way of +restricting its scope to a narrow region containing all of its uses; it is +otherwise identical in meaning to declaring the item outside the statement +block. + +Note: there is no implicit capture of the function's dynamic environment when +declaring a function-local item. + +@page +@node Ref.Stmt.Decl.Slot +@subsubsection Ref.Stmt.Decl.Slot +@c * Ref.Stmt.Decl.Slot:: Statement declaring an slot. + +A @code{slot declaration statement} has one one of two forms: + +@itemize +@item @code{let} @var{mode-and-type} @var{slot} @var{optional-init}; +@item @code{auto} @var{slot} @var{optional-init}; +@end itemize + +Where @var{mode-and-type} is a slot mode and type expression, @var{slot} is +the name of the slot being declared, and @var{optional-init} is either the +empty string or an equals sign (@code{=}) followed by a primitive expression. + +Both forms introduce a new slot into the containing block scope. The new slot +is visible across the entire scope, but is initialized only at the point +following the declaration statement. + +The latter (@code{auto}) form of slot declaration causes the compiler to infer +the static type of the slot through unification with the types of values +assigned to the slot in the the remaining code in the block scope. Inferred +slots always have @emph{interior} mode. @xref{Ref.Mem.Slot}. + + + +@page +@node Ref.Stmt.Copy +@subsection Ref.Stmt.Copy +@c * Ref.Stmt.Copy:: Statement for copying a value between two slots. + +A @dfn{copy statement} consists of an @emph{lval} -- a name denoting a slot -- +followed by an equals-sign (@code{=}) and a primitive +expression. @xref{Ref.Expr}. + +Executing a copy statement causes the value denoted by the expression -- +either a value in a slot or a primitive combination of values held in slots -- +to be copied into the slot denoted by the @emph{lval}. + +A copy may entail the formation of references, the adjustment of reference +counts, execution of destructors, or similar adjustments in order to respect +the @code{lval} slot mode and any existing value held in it. All such +adjustment is automatic and implied by the @code{=} operator. + +An example of three different copy statements: +@example +x = y; +x.y = z; +x.y = z + 2; +@end example + +@page +@node Ref.Stmt.Spawn +@subsection Ref.Stmt.Spawn +@c * Ref.Stmt.Spawn:: Statements creating new tasks. + +A @code{spawn} statement consists of keyword @code{spawn}, followed by a +normal @emph{call} statement (@pxref{Ref.Stmt.Call}). A @code{spawn} +statement causes the runtime to construct a new task executing the called +function. The called function is referred to as the @dfn{entry function} for +the spawned task, and its arguments are copied form the spawning task to the +spawned task before the spawned task begins execution. + +Only arguments of interior or exterior mode are permitted in the function +called by a spawn statement, not arguments with alias mode. + +The result of a @code{spawn} statement is a @code{task} value. + +An example of a @code{spawn} statement: +@example +fn helper(chan[u8] out) @{ + // do some work. + out <| result; +@} + +let port[u8] out; +let task p = spawn helper(chan(out)); +// let task run, do other things. +auto result <- out; + +@end example + +@page +@node Ref.Stmt.Send +@subsection Ref.Stmt.Send +@c * Ref.Stmt.Send:: Statements for sending a value into a channel. + +Sending a value through a channel can be done via two different statements. +Both statements take an @emph{lval}, denoting a channel, and a value to send +into the channel. The action of @emph{sending} varies depending on the +@dfn{send operator} employed. + +The @emph{asynchronous send} operator @code{<+} adds a value to the channel's +queue, without blocking. If the queue is full, it is extended, taking memory +from the task's domain. If the task memory budget is exhausted, a signal is +sent to the task. + +The @emph{semi-synchronous send} operator @code{<|} adds a value to the +channel's queue @emph{only if} the queue has room; if the queue is full, the +operation @emph{blocks} the sender until the queue has room. + +An example of an asynchronous send: +@example +chan[str] c = @dots{}; +c <+ "hello, world"; +@end example + +An example of a semi-synchronous send: +@example +chan[str] c = @dots{}; +c <| "hello, world"; +@end example + +@page +@node Ref.Stmt.Flush +@subsection Ref.Stmt.Flush +@c * Ref.Stmt.Flush:: Statement for flushing a channel queue. + +A @code{flush} statement takes a channel and blocks the flushing task until +the channel's queue has emptied. It can be used to implement a more precise +form of flow-control than with the send operators alone. + +An example of the @code{flush} statement: +@example +chan[str] c = @dots{}; +c <| "hello, world"; +flush c; +@end example + + +@page +@node Ref.Stmt.Recv +@subsection Ref.Stmt.Recv +@c * Ref.Stmt.Recv:: Statement for receiving a value from a channel. + +The @dfn{receive statement} takes an @var{lval} to receive into and an +expression denoting a port, and applies the @emph{receive operator} +(@code{<-}) to the pair, copying a value out of the port and into the +@var{lval}. The statement causes the receiving task to enter the @emph{blocked +reading} state until a task is sending a value to the port, at which point the +runtime pseudo-randomly selects a sending task and copies a value from the +head of one of the task queues to the receiving slot, and un-blocks the +receiving task. @xref{Ref.Run.Comm}. + +An example of a @emph{receive}: +@example +port[str] p = @dots{}; +let str s <- p; +@end example + +@page +@node Ref.Stmt.Call +@subsection Ref.Stmt.Call +@c * Ref.Stmt.Call:: Statement for calling a function. + +A @dfn{call statement} invokes a function, providing a tuple of input slots +and a reference to an output slot. If the function eventually returns, then +the statement completes. + +A call statement statically requires that the precondition declared in the +callee's signature is satisfied by the statement prestate. In this way, +typestates propagate through function boundaries. @xref{Ref.Stmt.Stat}. + +An example of a call statement: +@example +let int x = add(1, 2); +@end example + +@page +@node Ref.Stmt.Bind +@subsection Ref.Stmt.Bind +@c * Ref.Stmt.Bind:: Statement for binding arguments to functions. + +A @dfn{bind statement} constructs a new function from an existing +function.@footnote{The @code{bind} statement is analogous to the @code{bind} +expression in the Sather language.} The new function has zero or more of its +arguments @emph{bound} into a new, hidden exterior tuple that holds the +bindings. For each concrete argument passed in the @code{bind} statement, the +corresponding parameter in the existing function is @emph{omitted} as a +parameter of the new function. For each argument passed the placeholder symbol +@code{_} in the @code{bind} statement, the corresponding parameter of the +existing function is @emph{retained} as a parameter of the new function. + +Any subsequent invocation of the new function with residual arguments causes +invocation of the existing function with the combination of bound arguments +and residual arguments that was specified during the binding. + +An example of a @code{bind} statement: +@example +fn add(int x, int y) -> int @{ + ret x + y; +@} +type single_param_fn = fn(int) -> int; + +let single_param_fn add4 = bind add(4, _); + +let single_param_fn add5 = bind add(_, 5); + +check (add(4,5) == add4(5)); +check (add(4,5) == add5(4)); + +@end example + +A @code{bind} statement generally stores a copy of the bound arguments in the +hidden exterior tuple. For bound interior slots and alias slots in the bound +function signature, an interior slot is allocated in the hidden tuple and +populated with a copy of the bound value. For bound exterior slots in the +bound function signature, an exterior slot is allocated in the hidden tuple +and populated with a copy of the bound value, an exterior (pointer) value. + +The @code{bind} statement is a lightweight mechanism for simulating the more +elaborate construct of @emph{lexical closures} that exist in other +languages. Rust has no support for lexical closures, but many realistic uses +of them can be achieved with @code{bind} statements. + + +@page +@node Ref.Stmt.Ret +@subsection Ref.Stmt.Ret +@c * Ref.Stmt.Ret:: Statement for stopping and producing a value. + +Executing a @code{ret} statement@footnote{A @code{ret} statement is +analogous to a @code{return} statement in the C family.} copies a +value into the return slot of the current function, destroys the +current function activation frame, and transfers control to the caller +frame. + +An example of a @code{ret} statement: +@example +fn max(int a, int b) -> int @{ + if (a > b) @{ + ret a; + @} + ret b; +@} +@end example + +@page +@node Ref.Stmt.Be +@subsection Ref.Stmt.Be +@c * Ref.Stmt.Be:: Statement for stopping and executing a tail call. + +Executing a @code{be} statement @footnote{A @code{be} statement in is +analogous to a @code{become} statement in Newsqueak or Alef.} destroys the +current function activation frame and replaces it with an activation frame for +the called function. In other words, @code{be} executes a tail-call. The +syntactic form of a @code{be} statement is therefore limited to @emph{tail +position}: its argument must be a @emph{call expression}, and it must be the +last statement in a block. + +An example of a @code{be} statement: +@example +fn print_loop(int n) @{ + if (n <= 0) @{ + ret; + @} else @{ + print_int(n); + be print_loop(n-1); + @} +@} +@end example + +The above example executes in constant space, replacing each frame with a new +copy of itself. + + + +@page +@node Ref.Stmt.Put +@subsection Ref.Stmt.Put +@c * Ref.Stmt.Put:: Statement for pausing and producing a value. + +Executing a @code{put} statement copies a value into the put slot of the +current iterator, suspends execution of the current iterator, and transfers +control to the current put-recipient frame. + +A @code{put} statement is only valid within an iterator. @footnote{A +@code{put} statement is analogous to a @code{yield} statement in the CLU, +Sather and Objective C 2.0 languages, or in more recent languages providing a +``generator'' facility, such as Python, Javascript or C#. Like the generators +of CLU, Sather and Objective C 2.0, but @emph{unlike} these later languages, +Rust's iterators reside on the stack and obey a strict stack discipline.} The +current put-recipient will eventually resume the suspended iterator containing +the @code{put} statement, either continuing execution after the @code{put} +statement, or terminating its execution and destroying the iterator frame. + + +@page +@node Ref.Stmt.Fail +@subsection Ref.Stmt.Fail +@c * Ref.Stmt.Fail:: Statement for causing task failure. + +Executing a @code{fail} statement causes a task to enter the @emph{failing} +state. In the @emph{failing} state, a task unwinds its stack, destroying all +frames and freeing all resources until it reaches its entry frame, at which +point it halts execution in the @emph{dead} state. + +@page +@node Ref.Stmt.Log +@subsection Ref.Stmt.Log +@c * Ref.Stmt.Log:: Statement for logging values to diagnostic buffers. + +Executing a @code{log} statement may, depending on runtime configuration, +cause a value to be appended to an internal diagnostic logging buffer provided +by the runtime or emitted to a system console. Log statements are enabled or +disabled dynamically at run-time on a per-task and per-item +basis. @xref{Ref.Run.Log}. + +Executing a @code{log} statement not considered an @code{io} effect in the +effect system. In other words, a pure function remains pure even if it +contains a log statement. + +@example +@end example + +@page +@node Ref.Stmt.Note +@subsection Ref.Stmt.Note +@c * Ref.Stmt.Note:: Statement for logging values during failure. + +A @code{note} statement has no effect during normal execution. The purpose of +a @code{note} statement is to provide additional diagnostic information to the +logging subsystem during task failure. @xref{Ref.Stmt.Log}. Using @code{note} +statements, normal diagnostic logging can be kept relatively sparse, while +still providing verbose diagnostic ``back-traces'' when a task fails. + +When a task is failing, control frames @emph{unwind} from the innermost frame +to the outermost, and from the innermost lexical block within an unwinding +frame to the outermost. When unwinding a lexical block, the runtime processes +all the @code{note} statements in the block sequentially, from the first +statement of the block to the last. During processing, a @code{note} +statement has equivalent meaning to a @code{log} statement: it causes the +runtime to append the argument of the @code{note} to the internal logging +diagnostic buffer. + +An example of a @code{note} statement: +@example +fn read_file_lines(&str path) -> vec[str] @{ + note path; + vec[str] r; + file f = open_read(path); + for* (str &s = lines(f)) @{ + vec.append(r,s); + @} + ret r; +@} +@end example + +In this example, if the task fails while attempting to open or read a file, +the runtime will log the path name that was being read. If the function +completes normally, the runtime will not log the path. + +A slot that is marked by a @code{note} statement does @emph{not} have its +value copied aside when control passes through the @code{note}. In other +words, if a @code{note} statement notes a particular slot, and code after the +@code{note} that slot, and then a subsequent failure occurs, the +@emph{mutated} value will be logged during unwinding, @emph{not} the original +value that was held in the slot at the moment control passed through the +@code{note} statement. + +@page +@node Ref.Stmt.While +@subsection Ref.Stmt.While +@c * Ref.Stmt.While:: Statement for simple conditional looping. + +A @code{while} statement is a loop construct. A @code{while} loop may be +either a simple @code{while} or a @code{do}-@code{while} loop. + +In the case of a simple @code{while}, the loop begins by evaluating the +boolean loop conditional expression. If the loop conditional expression +evaluates to @code{true}, the loop body block executes and control returns to +the loop conditional expression. If the loop conditional expression evaluates +to @code{false}, the @code{while} statement completes. + +In the case of a @code{do}-@code{while}, the loop begins with an execution of +the loop body. After the loop body executes, it evaluates the loop conditional +expression. If it evaluates to @code{true}, control returns to the beginning +of the loop body. If it evaluates to @code{false}, control exits the loop. + +An example of a simple @code{while} statement: +@example +while (i < 10) @{ + print("hello\n"); + i = i + 1; +@} +@end example + +An example of a @code{do}-@code{while} statement: +@example +do @{ + print("hello\n"); + i = i + 1; +@} while (i < 10); +@end example + +@page +@node Ref.Stmt.Break +@subsection Ref.Stmt.Break +@c * Ref.Stmt.Break:: Statement for terminating a loop. + +Executing a @code{break} statement immediately terminates the innermost loop +enclosing it. It is only permitted in the body of a loop. + +@page +@node Ref.Stmt.Cont +@subsection Ref.Stmt.Cont +@c * Ref.Stmt.Cont:: Statement for terminating a single loop iteration. + +Executing a @code{cont} statement immediately terminates the current iteration +of the innermost loop enclosing it, returning control to the loop +@emph{head}. In the case of a @code{while} loop, the head is the conditional +expression controlling the loop. In the case of a @code{for} or @code{for +each} loop, the head is the iterator or vector-slice increment controlling the +loop. + +A @code{cont} statement is only permitted in the body of a loop. + + +@page +@node Ref.Stmt.For +@subsection Ref.Stmt.For +@c * Ref.Stmt.For:: Statement for looping over strings and vectors. + +A @dfn{for loop} is controlled by a vector or string. The for loop +bounds-checks the underlying sequence @emph{once} when initiating the loop, +then repeatedly copies each value of the underlying sequence into the element +variable, executing the loop body once per copy. To perform a for loop on a +sub-range of a vector or string, form a temporary slice over the sub-range and +run the loop over the slice. + +Example of a 4 for loops, all identical: +@example +let vec[foo] v = vec(a, b, c); + +for (&foo e in v.(0, _vec.len(v))) @{ + bar(e); +@} + +for (&foo e in v.(0,)) @{ + bar(e); +@} + +for (&foo e in v.(,)) @{ + bar(e); +@} + +for (&foo e in v) @{ + bar(e); +@} +@end example + +@page +@node Ref.Stmt.Foreach +@subsection Ref.Stmt.Foreach +@c * Ref.Stmt.Foreach:: Statement for general conditional looping. + +An @dfn{foreach loop} is denoted by the @code{for each} keywords, and is +controlled by an iterator. The loop executes once for each value @code{put} by +the iterator. When the iterator returns or fails, the loop terminates. + +Example of a foreach loop: +@example +let str txt; +let vec[str] lines; +for each (&str s = _str.split(txt, "\n")) @{ + vec.push(lines, s); +@} +@end example + + +@page +@node Ref.Stmt.If +@subsection Ref.Stmt.If +@c * Ref.Stmt.If:: Statement for simple conditional branching. + +An @code{if} statement is a conditional branch in program control. The form of +an @code{if} statement is a parenthesized condition expression, followed by a +consequent block, and an optional trailing @code{else} block. The condition +expression must have type @code{bool}. If the condition expression evaluates +to @code{true}, the consequent block is executed and any @code{else} block is +skipped. If the condition expression evaluates to @code{false}, the consequent +block is skipped and any @code{else} block is executed. + +@page +@node Ref.Stmt.Alt +@subsection Ref.Stmt.Alt +@c * Ref.Stmt.Alt:: Statement for complex conditional branching. + +An @code{alt} statement is a multi-directional branch in program control. +There are three kinds of @code{alt} statement: communication @code{alt} +statements, pattern @code{alt} statements, and @code{alt type} statements. + +The form of each kind of @code{alt} is similar: an initial @emph{head} that +describes the criteria for branching, followed by a sequence of zero or more +@emph{arms}, each of which describes a @emph{case} and provides a @emph{block} +of statements associated with the case. When an @code{alt} is executed, +control enters the head, determines which of the cases to branch to, branches +to the block associated with the chosen case, and then proceeds to the +statement following the @code{alt} when the case block completes. + +@menu +* Ref.Stmt.Alt.Comm:: Statement for branching on communication events. +* Ref.Stmt.Alt.Pat:: Statement for branching on pattern matches. +* Ref.Stmt.Alt.Type:: Statement for branching on types. +@end menu + +@page +@node Ref.Stmt.Alt.Comm +@subsubsection Ref.Stmt.Alt.Comm +@c * Ref.Stmt.Alt.Comm:: Statement for branching on communication events. + +The simplest form of @code{alt} statement is the a @emph{communication} +@code{alt}. The cases of a communication @code{alt}'s arms are send, receive +and flush statements. @xref{Ref.Task.Comm}. + +To execute a communication @code{alt}, the runtime checks all of the ports and +channels involved in the arms of the statement to see if any @code{case} can +execute without blocking. If no @code{case} can execute, the task blocks, and +the runtime unblocks the task when a @code{case} @emph{can} execute. The +runtime then makes a pseudo-random choice from among the set of @code{case} +statements that can execute, executes the statement of the @code{case} and +branches to the block of that arm. + +An example of a communication @code{alt} statement: +@example +let chan c[int] = foo(); +let port p[str] = bar(); +let int x = 0; +let vec[str] strs; + +alt @{ + case (str s <- p) @{ + vec.append(strs, s); + @} + case (c <| x) @{ + x++; + @} +@} +@end example + +@page +@node Ref.Stmt.Alt.Pat +@subsubsection Ref.Stmt.Alt.Pat +@c * Ref.Stmt.Alt.Pat:: Statement for branching on pattern matches. + +A pattern @code{alt} statement branches on a @emph{pattern}. The exact form of +matching that occurs depends on the pattern. Patterns consist of some +combination of literals, tag constructors, variable binding specifications and +placeholders (@code{_}). A pattern @code{alt} has a parenthesized @emph{head +expression}, which is the value to compare to the patterns. The type of the +patterns must equal the type of the head expression. + +To execute a pattern @code{alt} statement, first the head expression is +evaluated, then its value is sequentially compared to the patterns in the arms +until a match is found. The first arm with a matching @code{case} pattern is +chosen as the branch target of the @code{alt}, any variables bound by the +pattern are assigned to local @emph{auto} slots in the arm's block, and +control enters the block. + +An example of a pattern @code{alt} statement: + +@example +type list[X] = tag(nil, cons(X, @@list[X])); + +let list[int] x = cons(10, cons(11, nil)); + +alt (x) @{ + case (cons(a, cons(b, _))) @{ + process_pair(a,b); + @} + case (cons(v=10, _)) @{ + process_ten(v); + @} + case (_) @{ + fail; + @} +@} +@end example + + +@page +@node Ref.Stmt.Alt.Type +@subsubsection Ref.Stmt.Alt.Type +@c * Ref.Stmt.Alt.Type:: Statement for branching on type. + +An @code{alt type} statement is similar to a pattern @code{alt}, but branches +on the @emph{type} of its head expression, rather than the value. The head +expression of an @code{alt type} statement must be of type @code{any}, and the +arms of the statement are slot patterns rather than value patterns. Control +branches to the arm with a @code{case} that matches the @emph{actual type} of +the value in the @code{any}. + +An example of an @code{alt type} statement: + +@example +let any x = foo(); + +alt type (x) @{ + case (int i) @{ + ret i; + @} + case (list[int] li) @{ + ret int_list_sum(li); + @} + case (list[X] lx) @{ + ret list_len(lx); + @} + case (_) @{ + ret 0; + @} +@} +@end example + + +@page +@node Ref.Stmt.Prove +@subsection Ref.Stmt.Prove +@c * Ref.Stmt.Prove:: Statement for static assertion of typestate. + +A @code{prove} statement has no run-time effect. Its purpose is to statically +check (and document) that its argument constraint holds at its statement entry +point. If its argument typestate does not hold, under the typestate algorithm, +the program containing it will fail to compile. + +@page +@node Ref.Stmt.Check +@subsection Ref.Stmt.Check +@c * Ref.Stmt.Check:: Statement for dynamic assertion of typestate. + +A @code{check} statement connects dynamic assertions made at run-time to the +static typestate system. A @code{check} statement takes a constraint to check +at run-time. If the constraint holds at run-time, control passes through the +@code{check} and on to the next statement in the enclosing block. If the +condition fails to hold at run-time, the @code{check} statement behaves as a +@code{fail} statement. + +The typestate algorithm is built around @code{check} statements, and in +particular the fact that control @emph{will not pass} a check statement with a +condition that fails to hold. The typestate algorithm can therefore assume +that the (static) postcondition of a @code{check} statement includes the +checked constraint itself. From there, the typestate algorithm can perform +dataflow calculations on subsequent statements, propagating conditions forward +and statically comparing implied states and their +specifications. @xref{Ref.Stmt.Stat}. + +@example +fn even(&int x) -> bool @{ + ret x & 1 == 0; +@} + +fn print_even(int x) : even(x) @{ + print(x); +@} + +fn test() @{ + let int y = 8; + + // Cannot call print_even(y) here. + + check even(y); + + // Can call print_even(y) here, since even(y) now holds. + print_even(y); +@} +@end example + +@page +@node Ref.Stmt.IfCheck +@subsection Ref.Stmt.IfCheck +@c * Ref.Stmt.IfCheck:: Statement for dynamic testing of typestate. + +An @code{if check} statement combines a @code{if} statement and a @code{check} +statement in an indivisible unit that can be used to build more complex +conditional control flow than the @code{check} statement affords. + +In fact, @code{if check} is a ``more primitive'' statement @code{check}; +instances of the latter can be rewritten as instances of the former. The +following two examples are equivalent: + +@sp 1 +Example using @code{check}: +@example +check even(x); +print_even(x); +@end example + +@sp 1 +Equivalent example using @code{if check}: +@example +if check even(x) @{ + print_even(x); +@} else @{ + fail; +@} +@end example + +@page +@node Ref.Run +@section Ref.Run +@c * Ref.Run:: Organization of runtime services. + +The Rust @dfn{runtime} is a relatively compact collection of C and Rust code +that provides fundamental services and datatypes to all Rust tasks at +run-time. It is smaller and simpler than many modern language runtimes. It is +tightly integrated into the language's execution model of slots, tasks, +communication, reflection, logging and signal handling. + +@menu +* Ref.Run.Mem:: Runtime memory management service. +* Ref.Run.Type:: Runtime built-in type services. +* Ref.Run.Comm:: Runtime communication service. +* Ref.Run.Refl:: Runtime reflection system. +* Ref.Run.Log:: Runtime logging system. +* Ref.Run.Sig:: Runtime signal handler. +@end menu + +@page +@node Ref.Run.Mem +@subsection Ref.Run.Mem +@c * Ref.Run.Mem:: Runtime memory management service. + +The runtime memory-management system is based on a @emph{service-provider +interface}, through which the runtime requests blocks of memory from its +environment and releases them back to its environment when they are no longer +in use. The default implementation of the service-provider interface consists +of the C runtime functions @code{malloc} and @code{free}. + +The runtime memory-management system in turn supplies Rust tasks with +facilities for allocating, extending and releasing stacks, as well as +allocating and freeing exterior values. + +@page +@node Ref.Run.Type +@subsection Ref.Run.Type +@c * Ref.Run.Mem:: Runtime built-in type services. + +The runtime provides C and Rust code to manage several built-in types: +@itemize +@item @code{vec}, the type of vectors. +@item @code{str}, the type of UTF-8 strings. +@item @code{big}, the type of arbitrary-precision integers. +@item @code{chan}, the type of communication channels. +@item @code{port}, the type of communication ports. +@item @code{task}, the type of tasks. +@end itemize +Support for other built-in types such as simple types, tuples, +records, and tags is open-coded by the Rust compiler. + +@page +@node Ref.Run.Comm +@subsection Ref.Run.Comm +@c * Ref.Run.Comm:: Runtime communication service. + +The runtime provides code to manage inter-task communication. This includes +the system of task-lifecycle state transitions depending on the contents of +queues, as well as code to copy values between queues and their recipients and +to serialize values for transmission over operating-system inter-process +communication facilities. + +@page +@node Ref.Run.Refl +@subsection Ref.Run.Refl +@c * Ref.Run.Refl:: Runtime reflection system. + +The runtime reflection system is driven by the DWARF tables emitted into a +crate at compile-time. Reflecting on a slot or item allocates a Rust data +structure corresponding to the DWARF DIE for that slot or item. + +@page +@node Ref.Run.Log +@subsection Ref.Run.Log +@c * Ref.Run.Log:: Runtime logging system. + +The runtime contains a system for directing logging statements to a logging +console and/or internal logging buffers. @xref{Ref.Stmt.Log}. Logging +statements can be enabled or disabled via a two-dimensional filtering process: + +@itemize + +@sp 1 +@item +By Item + +Each @emph{item} (module, function, iterator, object, type) in Rust has a +static name-path within its crate module, and can have logging enabled or +disabled on a name-path-prefix basis. + +@sp 1 +@item +By Task + +Each @emph{task} in a running Rust program has a unique ownership-path through +the task ownership tree, and can have logging enabled or disabled on an +ownership-path-prefix basis. +@end itemize + +Logging is integrated into the language for efficiency reasons, as well as the +need to filter logs based on these two built-in dimensions. + +@page +@node Ref.Run.Sig +@subsection Ref.Run.Sig +@c * Ref.Run.Sig:: Runtime signal handler. + +The runtime signal-handling system is driven by a signal-dispatch table and a +signal queue associated with each task. Sending a signal to a task inserts the +signal into the task's signal queue and marks the task as having a pending +signal. At the next scheduling opportunity, the runtime processes signals in +the task's queue using its dispatch table. The signal queue memory is charged +to the task's domain; if the queue grows too big, the task will fail. + +@c ############################################################ +@c end main body of nodes +@c ############################################################ + +@page +@node Index +@chapter Index + +@printindex cp + +@bye diff --git a/src/Makefile b/src/Makefile index 95d530dfa81..5d4e6aa0fd3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -19,27 +19,29 @@ endif CFG_INFO := $(info cfg: building on $(CFG_OSTYPE) $(CFG_CPUTYPE)) -CFG_GCC_COMPILE_FLAGS := +CFG_GCC_CFLAGS := CFG_GCC_LINK_FLAGS := CFG_VALGRIND := CFG_LLVM_CONFIG := llvm-config -CFG_BOOT_FLAGS := +CFG_BOOT_FLAGS := $(FLAGS) ifeq ($(CFG_OSTYPE), Linux) CFG_RUNTIME := librustrt.so CFG_STDLIB := libstd.so - CFG_GCC_COMPILE_FLAGS += -fPIC + CFG_GCC_CFLAGS += -fPIC CFG_GCC_LINK_FLAGS += -shared -fPIC -ldl -lpthread ifeq ($(CFG_CPUTYPE), x86_64) - CFG_GCC_COMPILE_FLAGS += -m32 + CFG_GCC_CFLAGS += -m32 CFG_GCC_LINK_FLAGS += -m32 endif CFG_NATIVE := 1 CFG_UNIXY := 1 CFG_VALGRIND := $(shell which valgrind) ifdef CFG_VALGRIND - CFG_VALGRIND += --run-libc-freeres=no --leak-check=full --quiet --vex-iropt-level=0 + CFG_VALGRIND += --leak-check=full \ + --quiet --vex-iropt-level=0 \ + --suppressions=etc/x86.supp endif endif @@ -52,7 +54,7 @@ ifeq ($(CFG_OSTYPE), Darwin) # "on an i386" when the whole userspace is 64-bit and the compiler # emits 64-bit binaries by default. So we just force -m32 here. Smarter # approaches welcome! - CFG_GCC_COMPILE_FLAGS += -m32 + CFG_GCC_CFLAGS += -m32 CFG_GCC_LINK_FLAGS += -m32 endif @@ -73,7 +75,7 @@ ifdef CFG_WINDOWSY CFG_EXE_SUFFIX := .exe CFG_BOOT := ./rustboot.exe CFG_COMPILER := ./rustc.exe - CFG_GCC_COMPILE_FLAGS += -march=i686 + CFG_GCC_CFLAGS += -march=i686 CFG_GCC_LINK_FLAGS += -shared -fPIC CFG_RUN_TARG = $(1) # FIXME: support msvc at some point @@ -99,10 +101,10 @@ ifdef CFG_UNIXY endif CFG_OBJ_SUFFIX := .o CFG_EXE_SUFFIX := .exe - CFG_GCC_COMPILE_FLAGS := + CFG_GCC_CFLAGS := CFG_GCC_LINK_FLAGS := -shared ifeq ($(CFG_CPUTYPE), x86_64) - CFG_GCC_COMPILE_FLAGS += -m32 + CFG_GCC_CFLAGS += -m32 CFG_GCC_LINK_FLAGS += -m32 endif endif @@ -110,11 +112,11 @@ endif ifdef CFG_GCC CFG_INFO := $(info cfg: using gcc) - CFG_GCC_COMPILE_FLAGS += -Wall -Werror -fno-rtti -fno-exceptions -g + CFG_GCC_CFLAGS += -Wall -Werror -fno-rtti -fno-exceptions -g CFG_GCC_LINK_FLAGS += -g - CFG_COMPILE_C = $(CFG_GCC_CROSS)g++ $(CFG_GCC_COMPILE_FLAGS) -c -o $(1) $(2) + CFG_COMPILE_C = $(CFG_GCC_CROSS)g++ $(CFG_GCC_CFLAGS) -c -o $(1) $(2) CFG_LINK_C = $(CFG_GCC_CROSS)g++ $(CFG_GCC_LINK_FLAGS) -o $(1) - CFG_DEPEND_C = $(CFG_GCC_CROSS)g++ $(CFG_GCC_COMPILE_FLAGS) -MT "$(1)" -MM $(2) + CFG_DEPEND_C = $(CFG_GCC_CROSS)g++ $(CFG_GCC_CFLAGS) -MT "$(1)" -MM $(2) else CFG_ERR := $(error please try on a system with gcc) endif @@ -153,7 +155,8 @@ ifneq ($(CFG_LLVM_CONFIG),) $(info cfg: using LLVM version 2.8svn) else CFG_LLVM_CONFIG := - $(info cfg: incompatible LLVM version $(CFG_LLVM_VERSION), expected 2.8svn) + $(info cfg: incompatible LLVM version $(CFG_LLVM_VERSION), \ + expected 2.8svn) endif endif ifdef CFG_LLVM_CONFIG @@ -161,11 +164,12 @@ ifdef CFG_LLVM_CONFIG WHERE := $(shell ocamlc -where) LLVM_LIBS := llvm.cma llvm_bitwriter.cma LLVM_NATIVE_LIBS := llvm.cmxa llvm_bitwiter.cmxa - LLVM_CLIBS := $(shell for c in `$(CFG_LLVM_CONFIG) --ldflags --libs` -lllvm -lllvm_bitwriter; do echo -cclib && echo $$c; done | xargs echo) + LLVM_CLIBS := $(shell for c in `$(CFG_LLVM_CONFIG) --ldflags --libs` \ + -lllvm -lllvm_bitwriter; do echo -cclib && echo $$c; done | xargs echo) LLVM_INCS := -I boot/llvm -I $(WHERE) - LLVM_MLS := $(addprefix boot/llvm/, llabi.ml llasm.ml llfinal.ml lltrans.ml \ - llemit.ml) - CFG_LLC_COMPILE_FLAGS := -march=x86 + LLVM_MLS := $(addprefix boot/llvm/, llabi.ml llasm.ml llfinal.ml \ + lltrans.ml llemit.ml) + CFG_LLC_CFLAGS := -march=x86 $(info cfg: found llvm-config at $(CFG_LLVM_CONFIG)) else VARIANT=x86 @@ -190,7 +194,8 @@ ML_INCS := -I boot/fe -I boot/me -I boot/be -I boot/driver/$(VARIANT) \ ML_LIBS := unix.cma nums.cma bigarray.cma ML_NATIVE_LIBS := unix.cmxa nums.cmxa bigarray.cmxa OCAMLC_FLAGS := -g $(ML_INCS) -w Ael -warn-error Ael -OCAMLOPT_FLAGS := $(ML_INCS) -w Ael -warn-error Ael $(CFG_OCAMLOPT_PROFILE_FLAGS) +OCAMLOPT_FLAGS := $(ML_INCS) -w Ael -warn-error Ael \ + $(CFG_OCAMLOPT_PROFILE_FLAGS) ifdef CFG_LLVM_CONFIG ML_LIBS += $(LLVM_LIBS) -custom -cclib -lstdc++ $(LLVM_CLIBS) @@ -205,11 +210,12 @@ DRIVER_BOT_MLS := $(addprefix boot/driver/, session.ml) BE_MLS := $(addprefix boot/be/, x86.ml ra.ml pe.ml elf.ml \ macho.ml) IL_MLS := $(addprefix boot/be/, asm.ml il.ml abi.ml) -ME_MLS := $(addprefix boot/me/, walk.ml semant.ml resolve.ml alias.ml type.ml dead.ml \ - typestate.ml mode.ml mutable.ml gctype.ml loop.ml layout.ml transutil.ml \ - trans.ml dwarf.ml) -FE_MLS := $(addprefix boot/fe/, ast.ml token.ml lexer.ml parser.ml pexp.ml item.ml cexp.ml) -DRIVER_TOP_MLS := $(addprefix boot/driver/, $(VARIANT)/glue.ml lib.ml main.ml) +ME_MLS := $(addprefix boot/me/, walk.ml semant.ml resolve.ml alias.ml \ + type.ml dead.ml effect.ml typestate.ml loop.ml layout.ml \ + transutil.ml trans.ml dwarf.ml) +FE_MLS := $(addprefix boot/fe/, ast.ml token.ml lexer.ml parser.ml pexp.ml \ + item.ml cexp.ml) +DRIVER_TOP_MLS := $(addprefix boot/driver/, lib.ml $(VARIANT)/glue.ml main.ml) BOOT_MLS := $(UTIL_BOT_MLS) $(DRIVER_BOT_MLS) $(FE_MLS) $(IL_MLS) $(ME_MLS) \ $(BE_MLS) $(LLVM_MLS) $(DRIVER_TOP_MLS) @@ -226,8 +232,12 @@ RUNTIME_CS := rt/rust.cpp \ rt/rust_comm.cpp \ rt/rust_dom.cpp \ rt/rust_task.cpp \ + rt/rust_chan.cpp \ rt/rust_upcall.cpp \ + rt/rust_log.cpp \ + rt/rust_timer.cpp \ rt/isaac/randport.cpp + RUNTIME_HDR := rt/rust.h \ rt/rust_dwarf.h \ rt/rust_internal.h \ @@ -253,7 +263,8 @@ $(CFG_RUNTIME): $(RUNTIME_OBJS) $(MKFILES) $(RUNTIME_HDR) $(CFG_STDLIB): $(STDLIB_CRATE) $(CFG_BOOT) $(MKFILES) @$(call CFG_ECHO, compile: $<) - $(CFG_QUIET)OCAMLRUNPARAM="b1" $(CFG_BOOT) $(CFG_BOOT_FLAGS) -shared -o $@ $(STDLIB_CRATE) + $(CFG_QUIET)OCAMLRUNPARAM="b1" $(CFG_BOOT) $(CFG_BOOT_FLAGS) \ + -shared -o $@ $(STDLIB_CRATE) %$(CFG_OBJ_SUFFIX): %.cpp $(MKFILES) @$(call CFG_ECHO, compile: $<) @@ -262,7 +273,8 @@ $(CFG_STDLIB): $(STDLIB_CRATE) $(CFG_BOOT) $(MKFILES) ifdef CFG_NATIVE $(CFG_BOOT): $(BOOT_CMXS) $(MKFILES) @$(call CFG_ECHO, compile: $<) - $(CFG_QUIET)ocamlopt$(OPT) -o $@ $(OCAMLOPT_FLAGS) $(ML_NATIVE_LIBS) $(BOOT_CMXS) + $(CFG_QUIET)ocamlopt$(OPT) -o $@ $(OCAMLOPT_FLAGS) $(ML_NATIVE_LIBS) \ + $(BOOT_CMXS) else $(CFG_BOOT): $(BOOT_CMOS) $(MKFILES) @$(call CFG_ECHO, compile: $<) @@ -288,7 +300,7 @@ endif # Main compiler targets and rules ###################################################################### -$(CFG_COMPILER): $(COMPILER_CRATE) $(CFG_BOOT) $(CFG_RUNTIME) $(CFG_STDLIB) +$(CFG_COMPILER): $(COMPILER_INPUTS) $(CFG_BOOT) $(CFG_RUNTIME) $(CFG_STDLIB) @$(call CFG_ECHO, compile: $<) $(CFG_QUIET)OCAMLRUNPARAM="b1" $(CFG_BOOT) $(CFG_BOOT_FLAGS) -o $@ $< $(CFG_QUIET)chmod 0755 $@ @@ -302,13 +314,17 @@ self: $(CFG_COMPILER) # Testing ###################################################################### -TEST_XFAILS_X86 := test/run-pass/mlist_cycle.rs \ +TEST_XFAILS_X86 := test/run-pass/mlist-cycle.rs \ test/run-pass/clone-with-exterior.rs \ + test/run-pass/obj-as.rs \ test/run-pass/rec-auto.rs \ test/run-pass/vec-slice.rs \ test/run-pass/generic-fn-infer.rs \ + test/run-pass/generic-recursive-tag.rs \ test/run-pass/generic-tag.rs \ + test/run-pass/generic-tag-alt.rs \ test/run-pass/bind-obj-ctor.rs \ + test/run-pass/task-comm.rs \ test/compile-fail/rec-missing-fields.rs \ test/compile-fail/infinite-tag-type-recursion.rs \ test/compile-fail/infinite-vec-type-recursion.rs @@ -316,61 +332,74 @@ TEST_XFAILS_X86 := test/run-pass/mlist_cycle.rs \ TEST_XFAILS_LLVM := $(addprefix test/run-pass/, \ acyclic-unwind.rs \ alt-tag.rs \ + argv.rs \ basic.rs \ bind-obj-ctor.rs \ bind-thunk.rs \ bind-trivial.rs \ + bitwise.rs \ + box-unbox.rs \ cast.rs \ char.rs \ clone-with-exterior.rs \ comm.rs \ + command-line-args.rs \ complex.rs \ dead-code-one-arm-if.rs \ deep.rs \ div-mod.rs \ drop-on-ret.rs \ + else-if.rs \ + export-non-interference.rs \ exterior.rs \ - foreach-simple.rs \ - foreach-simple-outer-slot.rs \ foreach-put-structured.rs \ - vec-slice.rs \ - simple-obj.rs \ - import.rs \ + foreach-simple-outer-slot.rs \ + foreach-simple.rs \ fun-call-variants.rs \ fun-indirect-call.rs \ generic-derived-type.rs \ generic-drop-glue.rs \ - generic-fn.rs \ - generic-obj.rs \ - generic-obj-with-derived-type.rs \ - generic-tag.rs \ - generic-type.rs \ + generic-exterior-box.rs \ generic-fn-infer.rs \ - vec-append.rs \ - vec-concat.rs \ - vec-drop.rs \ - mutable-vec-drop.rs \ + generic-fn.rs \ + generic-obj-with-derived-type.rs \ + generic-obj.rs \ + generic-recursive-tag.rs \ + generic-tag-alt.rs \ + generic-tag.rs \ + generic-type-synonym.rs \ + generic-type.rs \ + i32-sub.rs \ + i8-incr.rs \ + import.rs \ inner-module.rs \ large-records.rs \ + lazy-and-or.rs \ lazychan.rs \ linear-for-loop.rs \ + list.rs \ many.rs \ + mlist-cycle.rs \ mlist.rs \ - mlist_cycle.rs \ + mutable-vec-drop.rs \ mutual-recursion-group.rs \ + native-mod.rc \ + native-opaque-type.rs \ native.rc \ - command-line-args.rs \ - native_mod.rc \ + obj-as.rs \ + obj-drop.rs \ + obj-dtor.rs \ + obj-with-vec.rs \ opeq.rs \ + preempt.rs \ pred.rs \ readalias.rs \ rec-auto.rs \ rec-extend.rs \ + rec-tup.rs \ rec.rs \ - rec_tup.rs \ return-nil.rs \ - i32-sub.rs \ - i8-incr.rs \ + simple-obj.rs \ spawn-fn.rs \ spawn.rs \ stateful-obj.rs \ @@ -383,31 +412,31 @@ TEST_XFAILS_LLVM := $(addprefix test/run-pass/, \ tail-direct.rs \ threads.rs \ tup.rs \ + type-sizes.rs \ u32-decr.rs \ u8-incr-decr.rs \ u8-incr.rs \ unit.rs \ user.rs \ + utf8.rs \ + vec-append.rs \ + vec-concat.rs \ + vec-drop.rs \ + vec-slice.rs \ vec.rs \ writealias.rs \ yield.rs \ yield2.rs \ - native-opaque-type.rs \ - type-sizes.rs \ - obj-drop.rs \ - obj-dtor.rs \ - obj-with-vec.rs \ - else-if.rs \ - lazy-and-or.rs \ + task-comm.rs \ ) \ $(addprefix test/run-fail/, \ explicit-fail.rs \ fail.rs \ linked-failure.rs \ pred.rs \ - vec_overrun.rs \ - str_overrun.rs \ - vec_underrun.rs \ + vec-overrun.rs \ + str-overrun.rs \ + vec-underrun.rs \ ) \ $(addprefix test/compile-fail/, \ rec-missing-fields.rs \ @@ -416,93 +445,109 @@ TEST_XFAILS_LLVM := $(addprefix test/run-pass/, \ ) ifdef CFG_WINDOWSY -TEST_XFAILS_X86 += test/run-pass/native_mod.rc -TEST_XFAILS_LLVM += test/run-pass/native_mod.rc +TEST_XFAILS_X86 += test/run-pass/native-mod.rc +TEST_XFAILS_LLVM += test/run-pass/native-mod.rc +else +TEST_XFAILS_X86 += test/run-pass/preempt.rs +TEST_XFAILS_LLVM += test/run-pass/preempt.rs endif -TEST_RUN_PASS_CRATES_X86 := $(filter-out $(TEST_XFAILS_X86), $(wildcard test/run-pass/*.rc)) -TEST_RUN_PASS_CRATES_LLVM := $(filter-out $(TEST_XFAILS_LLVM), $(wildcard test/run-pass/*.rc)) -TEST_RUN_PASS_SOURCES_X86 := $(filter-out $(TEST_XFAILS_X86), $(wildcard test/run-pass/*.rs)) -TEST_RUN_PASS_SOURCES_LLVM := $(filter-out $(TEST_XFAILS_LLVM), $(wildcard test/run-pass/*.rs)) -TEST_RUN_PASS_EXTRAS := $(wildcard test/run-pass/*/*.rs) -TEST_RUN_PASS_EXES_X86 := \ - $(TEST_RUN_PASS_CRATES_X86:.rc=.x86$(CFG_EXE_SUFFIX)) \ - $(TEST_RUN_PASS_SOURCES_X86:.rs=.x86$(CFG_EXE_SUFFIX)) -TEST_RUN_PASS_EXES_LLVM := \ - $(TEST_RUN_PASS_CRATES_LLVM:.rc=.llvm$(CFG_EXE_SUFFIX)) \ - $(TEST_RUN_PASS_SOURCES_LLVM:.rs=.llvm$(CFG_EXE_SUFFIX)) -TEST_RUN_PASS_OUTS_X86 := \ - $(TEST_RUN_PASS_EXES_X86:.x86$(CFG_EXE_SUFFIX)=.x86.out) -TEST_RUN_PASS_OUTS_LLVM := \ - $(TEST_RUN_PASS_EXES_LLVM:.llvm$(CFG_EXE_SUFFIX)=.llvm.out) +RPASS_RC := $(wildcard test/run-pass/*.rc) +RPASS_RS := $(wildcard test/run-pass/*.rs) +RFAIL_RC := $(wildcard test/run-fail/*.rc) +RFAIL_RS := $(wildcard test/run-fail/*.rs) +CFAIL_RC := $(wildcard test/compile-fail/*.rc) +CFAIL_RS := $(wildcard test/compile-fail/*.rs) + +TEST_RPASS_CRATES_X86 := $(filter-out $(TEST_XFAILS_X86), $(RPASS_RC)) +TEST_RPASS_CRATES_LLVM := $(filter-out $(TEST_XFAILS_LLVM), $(RPASS_RC)) +TEST_RPASS_SOURCES_X86 := $(filter-out $(TEST_XFAILS_X86), $(RPASS_RS)) +TEST_RPASS_SOURCES_LLVM := $(filter-out $(TEST_XFAILS_LLVM), $(RPASS_RS)) +TEST_RPASS_EXTRAS := $(wildcard test/run-pass/*/*.rs) +TEST_RPASS_EXES_X86 := \ + $(TEST_RPASS_CRATES_X86:.rc=.x86$(CFG_EXE_SUFFIX)) \ + $(TEST_RPASS_SOURCES_X86:.rs=.x86$(CFG_EXE_SUFFIX)) +TEST_RPASS_EXES_LLVM := \ + $(TEST_RPASS_CRATES_LLVM:.rc=.llvm$(CFG_EXE_SUFFIX)) \ + $(TEST_RPASS_SOURCES_LLVM:.rs=.llvm$(CFG_EXE_SUFFIX)) +TEST_RPASS_OUTS_X86 := \ + $(TEST_RPASS_EXES_X86:.x86$(CFG_EXE_SUFFIX)=.x86.out) +TEST_RPASS_OUTS_LLVM := \ + $(TEST_RPASS_EXES_LLVM:.llvm$(CFG_EXE_SUFFIX)=.llvm.out) -TEST_RUN_FAIL_CRATES_X86 := $(filter-out $(TEST_XFAILS_X86), $(wildcard test/run-fail/*.rc)) -TEST_RUN_FAIL_CRATES_LLVM := $(filter-out $(TEST_XFAILS_LLVM), $(wildcard test/run-fail/*.rc)) -TEST_RUN_FAIL_SOURCES_X86 := $(filter-out $(TEST_XFAILS_X86), $(wildcard test/run-fail/*.rs)) -TEST_RUN_FAIL_SOURCES_LLVM := $(filter-out $(TEST_XFAILS_LLVM), $(wildcard test/run-fail/*.rs)) -TEST_RUN_FAIL_EXTRAS := $(wildcard test/run-fail/*/*.rs) -TEST_RUN_FAIL_EXES_X86 := \ - $(TEST_RUN_FAIL_CRATES_X86:.rc=.x86$(CFG_EXE_SUFFIX)) \ - $(TEST_RUN_FAIL_SOURCES_X86:.rs=.x86$(CFG_EXE_SUFFIX)) -TEST_RUN_FAIL_EXES_LLVM := \ - $(TEST_RUN_FAIL_CRATES_LLVM:.rc=.llvm$(CFG_EXE_SUFFIX)) \ - $(TEST_RUN_FAIL_SOURCES_LLVM:.rs=.llvm$(CFG_EXE_SUFFIX)) -TEST_RUN_FAIL_OUTS_X86 := \ - $(TEST_RUN_FAIL_EXES_X86:.x86$(CFG_EXE_SUFFIX)=.x86.out) -TEST_RUN_FAIL_OUTS_LLVM := \ - $(TEST_RUN_FAIL_EXES_LLVM:.llvm$(CFG_EXE_SUFFIX)=.llvm.out) +TEST_RFAIL_CRATES_X86 := $(filter-out $(TEST_XFAILS_X86), $(RFAIL_RC)) +TEST_RFAIL_CRATES_LLVM := $(filter-out $(TEST_XFAILS_LLVM), $(RFAIL_RC)) +TEST_RFAIL_SOURCES_X86 := $(filter-out $(TEST_XFAILS_X86), $(RFAIL_RS)) +TEST_RFAIL_SOURCES_LLVM := $(filter-out $(TEST_XFAILS_LLVM), $(RFAIL_RS)) +TEST_RFAIL_EXTRAS := $(wildcard test/run-fail/*/*.rs) +TEST_RFAIL_EXES_X86 := \ + $(TEST_RFAIL_CRATES_X86:.rc=.x86$(CFG_EXE_SUFFIX)) \ + $(TEST_RFAIL_SOURCES_X86:.rs=.x86$(CFG_EXE_SUFFIX)) +TEST_RFAIL_EXES_LLVM := \ + $(TEST_RFAIL_CRATES_LLVM:.rc=.llvm$(CFG_EXE_SUFFIX)) \ + $(TEST_RFAIL_SOURCES_LLVM:.rs=.llvm$(CFG_EXE_SUFFIX)) +TEST_RFAIL_OUTS_X86 := \ + $(TEST_RFAIL_EXES_X86:.x86$(CFG_EXE_SUFFIX)=.x86.out) +TEST_RFAIL_OUTS_LLVM := \ + $(TEST_RFAIL_EXES_LLVM:.llvm$(CFG_EXE_SUFFIX)=.llvm.out) -TEST_COMPILE_FAIL_CRATES_X86 := $(filter-out $(TEST_XFAILS_X86), $(wildcard test/compile-fail/*.rc)) -TEST_COMPILE_FAIL_CRATES_LLVM := $(filter-out $(TEST_XFAILS_LLVM), $(wildcard test/compile-fail/*.rc)) -TEST_COMPILE_FAIL_SOURCES_X86 := $(filter-out $(TEST_XFAILS_X86), $(wildcard test/compile-fail/*.rs)) -TEST_COMPILE_FAIL_SOURCES_LLVM := $(filter-out $(TEST_XFAILS_LLVM), $(wildcard test/compile-fail/*.rs)) -TEST_COMPILE_FAIL_EXTRAS := $(wildcard test/compile-fail/*/*.rs) -TEST_COMPILE_FAIL_EXES_X86 := \ - $(TEST_COMPILE_FAIL_CRATES_X86:.rc=.x86$(CFG_EXE_SUFFIX)) \ - $(TEST_COMPILE_FAIL_SOURCES_X86:.rs=.x86$(CFG_EXE_SUFFIX)) -TEST_COMPILE_FAIL_EXES_LLVM := \ - $(TEST_COMPILE_FAIL_CRATES_LLVM:.rc=.llvm$(CFG_EXE_SUFFIX)) \ - $(TEST_COMPILE_FAIL_SOURCES_LLVM:.rs=.llvm$(CFG_EXE_SUFFIX)) -TEST_COMPILE_FAIL_OUTS_X86 := \ - $(TEST_COMPILE_FAIL_EXES_X86:.x86$(CFG_EXE_SUFFIX)=.x86.out) -TEST_COMPILE_FAIL_OUTS_LLVM := \ - $(TEST_COMPILE_FAIL_EXES_LLVM:.llvm$(CFG_EXE_SUFFIX)=.llvm.out) +TEST_CFAIL_CRATES_X86 := $(filter-out $(TEST_XFAILS_X86), $(CFAIL_RC)) +TEST_CFAIL_CRATES_LLVM := $(filter-out $(TEST_XFAILS_LLVM), $(CFAIL_RC)) +TEST_CFAIL_SOURCES_X86 := $(filter-out $(TEST_XFAILS_X86), $(CFAIL_RS)) +TEST_CFAIL_SOURCES_LLVM := $(filter-out $(TEST_XFAILS_LLVM), $(CFAIL_RS)) +TEST_CFAIL_EXTRAS := $(wildcard test/compile-fail/*/*.rs) +TEST_CFAIL_EXES_X86 := \ + $(TEST_CFAIL_CRATES_X86:.rc=.x86$(CFG_EXE_SUFFIX)) \ + $(TEST_CFAIL_SOURCES_X86:.rs=.x86$(CFG_EXE_SUFFIX)) +TEST_CFAIL_EXES_LLVM := \ + $(TEST_CFAIL_CRATES_LLVM:.rc=.llvm$(CFG_EXE_SUFFIX)) \ + $(TEST_CFAIL_SOURCES_LLVM:.rs=.llvm$(CFG_EXE_SUFFIX)) +TEST_CFAIL_OUTS_X86 := \ + $(TEST_CFAIL_EXES_X86:.x86$(CFG_EXE_SUFFIX)=.x86.out) +TEST_CFAIL_OUTS_LLVM := \ + $(TEST_CFAIL_EXES_LLVM:.llvm$(CFG_EXE_SUFFIX)=.llvm.out) -ALL_TEST_CRATES := $(TEST_COMPILE_FAIL_CRATES_X86) \ - $(TEST_RUN_FAIL_CRATES_X86) \ - $(TEST_RUN_PASS_CRATES_X86) +ALL_TEST_CRATES := $(TEST_CFAIL_CRATES_X86) \ + $(TEST_RFAIL_CRATES_X86) \ + $(TEST_RPASS_CRATES_X86) -ALL_TEST_SOURCES := $(TEST_COMPILE_FAIL_SOURCES_X86) \ - $(TEST_RUN_FAIL_SOURCES_X86) \ - $(TEST_RUN_PASS_SOURCES_X86) +ALL_TEST_SOURCES := $(TEST_CFAIL_SOURCES_X86) \ + $(TEST_RFAIL_SOURCES_X86) \ + $(TEST_RPASS_SOURCES_X86) ALL_TEST_INPUTS := $(wildcard test/*/*.rs test/*/*/*.rs test/*/*.rc) -check_nocompile: $(TEST_COMPILE_FAIL_OUTS_X86) +check_nocompile: $(TEST_CFAIL_OUTS_X86) + +check: tidy \ + $(TEST_RPASS_EXES_X86) $(TEST_RFAIL_EXES_X86) \ + $(TEST_RPASS_OUTS_X86) $(TEST_RFAIL_OUTS_X86) \ + $(TEST_CFAIL_OUTS_X86) -check: $(TEST_RUN_PASS_EXES_X86) $(TEST_RUN_FAIL_EXES_X86) \ - $(TEST_RUN_PASS_OUTS_X86) $(TEST_RUN_FAIL_OUTS_X86) \ - $(TEST_COMPILE_FAIL_OUTS_X86) ifeq ($(VARIANT),llvm) -ALL_TEST_CRATES += $(TEST_COMPILE_FAIL_CRATES_LLVM) \ - $(TEST_RUN_FAIL_CRATES_LLVM) \ - $(TEST_RUN_PASS_CRATES_LLVM) +ALL_TEST_CRATES += $(TEST_CFAIL_CRATES_LLVM) \ + $(TEST_RFAIL_CRATES_LLVM) \ + $(TEST_RPASS_CRATES_LLVM) -ALL_TEST_SOURCES += $(TEST_COMPILE_FAIL_SOURCES_LLVM) \ - $(TEST_RUN_FAIL_SOURCES_LLVM) \ - $(TEST_RUN_PASS_SOURCES_LLVM) +ALL_TEST_SOURCES += $(TEST_CFAIL_SOURCES_LLVM) \ + $(TEST_RFAIL_SOURCES_LLVM) \ + $(TEST_RPASS_SOURCES_LLVM) -check_nocompile: $(TEST_COMPILE_FAIL_OUTS_LLVM) +check_nocompile: $(TEST_CFAIL_OUTS_LLVM) -check: $(TEST_RUN_PASS_EXES_LLVM) $(TEST_RUN_FAIL_EXES_LLVM) \ - $(TEST_RUN_PASS_OUTS_LLVM) $(TEST_RUN_FAIL_OUTS_LLVM) \ - $(TEST_COMPILE_FAIL_OUTS_LLVM) +check: tidy \ + $(TEST_RPASS_EXES_LLVM) $(TEST_RFAIL_EXES_LLVM) \ + $(TEST_RPASS_OUTS_LLVM) $(TEST_RFAIL_OUTS_LLVM) \ + $(TEST_CFAIL_OUTS_LLVM) endif +REQ := $(CFG_BOOT) $(CFG_RUNTIME) $(CFG_STDLIB) +BOOT := $(CFG_QUIET)OCAMLRUNPARAM="b1" $(CFG_BOOT) $(CFG_BOOT_FLAGS) + test/run-pass/%.out: test/run-pass/%$(CFG_EXE_SUFFIX) $(CFG_RUNTIME) @$(call CFG_ECHO, run: $<) $(CFG_QUIET)$(call CFG_RUN_TARG, $<) > $@ @@ -510,55 +555,57 @@ test/run-pass/%.out: test/run-pass/%$(CFG_EXE_SUFFIX) $(CFG_RUNTIME) test/run-fail/%.out: test/run-fail/%$(CFG_EXE_SUFFIX) $(CFG_RUNTIME) @$(call CFG_ECHO, run: $<) $(CFG_QUIET)rm -f $@ - $(CFG_QUIET)$(call CFG_RUN_TARG, $<) >$@ 2>&1 ; X=$$? ; if [ $$X -eq 0 ] ; then exit 1 ; else exit 0 ; fi - $(CFG_QUIET)grep --text --quiet "`awk -F: '/error-pattern/ { print $$2 }' $(basename $(basename $@)).rs | tr -d '\n\r'`" $@ + $(CFG_QUIET)$(call CFG_RUN_TARG, $<) >$@ 2>&1 ; X=$$? ; \ + if [ $$X -eq 0 ] ; then exit 1 ; else exit 0 ; fi + $(CFG_QUIET)grep --text --quiet \ + "`awk -F: '/error-pattern/ { print $$2 }' \ + $(basename $(basename $@)).rs | tr -d '\n\r'`" $@ -test/compile-fail/%.x86.out: test/compile-fail/%.rs $(CFG_BOOT) $(CFG_RUNTIME) +test/compile-fail/%.x86.out: test/compile-fail/%.rs $(REQ) @$(call CFG_ECHO, compile [x86]: $<) $(CFG_QUIET)rm -f $@ - $(CFG_QUIET)OCAMLRUNPARAM="b1" $(CFG_BOOT) -o $(@:.out=$(CFG_EXE_SUFFIX)) $< >$@ 2>&1 || true - $(CFG_QUIET)grep --text --quiet "`awk -F: '/error-pattern/ { print $$2 }' $< | tr -d '\n\r'`" $@ + $(BOOT) -o $(@:.out=$(CFG_EXE_SUFFIX)) $< >$@ 2>&1 || true + $(CFG_QUIET)grep --text --quiet \ + "`awk -F: '/error-pattern/ { print $$2 }' $< | tr -d '\n\r'`" $@ -test/compile-fail/%.llvm.out: test/compile-fail/%.rs $(CFG_BOOT) $(CFG_RUNTIME) +test/compile-fail/%.llvm.out: test/compile-fail/%.rs $(REQ) @$(call CFG_ECHO, compile [llvm]: $<) $(CFG_QUIET)rm -f $@ - $(CFG_QUIET)OCAMLRUNPARAM="b1" $(CFG_BOOT) $(CFG_BOOT_FLAGS) -o $(@:.out=$(CFG_EXE_SUFFIX)) $< >$@ 2>&1 || true - $(CFG_QUIET)grep --text --quiet "`awk -F: '/error-pattern/ { print $$2 }' $< | tr -d '\n\r'`" $@ + $(BOOT) -o $(@:.out=$(CFG_EXE_SUFFIX)) $< >$@ 2>&1 || true + $(CFG_QUIET)grep --text --quiet \ + "`awk -F: '/error-pattern/ { print $$2 }' $< | tr -d '\n\r'`" $@ -test/run-pass/%.x86$(CFG_EXE_SUFFIX): test/run-pass/%.rc $(CFG_BOOT) $(CFG_RUNTIME) $(CFG_STDLIB) +test/run-pass/%.x86$(CFG_EXE_SUFFIX): test/run-pass/%.rc $(REQ) @$(call CFG_ECHO, compile [x86]: $<) - $(CFG_QUIET)OCAMLRUNPARAM="b1" $(CFG_BOOT) $(CFG_BOOT_FLAGS) -o $@ $< - $(CFG_QUIET)chmod 0755 $@ + $(BOOT) -o $@ $< %.s: %.bc @$(call CFG_ECHO, compile [llvm]: $<) - $(CFG_QUIET)llc $(CFG_LLC_COMPILE_FLAGS) -o $@ $< + $(CFG_QUIET)llc $(CFG_LLC_CFLAGS) -o $@ $< %.llvm$(CFG_EXE_SUFFIX): %.s $(CFG_RUNTIME) @$(call CFG_ECHO, compile [llvm]: $<) - $(CFG_QUIET)gcc $(CFG_GCC_COMPILE_FLAGS) -o $@ $< -L. -lrustrt + $(CFG_QUIET)gcc $(CFG_GCC_CFLAGS) -o $@ $< -L. -lrustrt -test/run-pass/%.bc: test/run-pass/%.rc $(CFG_BOOT) $(CFG_STDLIB) +test/run-pass/%.bc: test/run-pass/%.rc $(REQ) @$(call CFG_ECHO, compile [llvm]: $<) - $(CFG_QUIET)OCAMLRUNPARAM="b1" $(CFG_BOOT) $(CFG_BOOT_FLAGS) -o $@ -llvm $< + $(BOOT) -o $@ -llvm $< -test/run-pass/%.x86$(CFG_EXE_SUFFIX): test/run-pass/%.rs $(CFG_BOOT) $(CFG_RUNTIME) $(CFG_STDLIB) +test/run-pass/%.x86$(CFG_EXE_SUFFIX): test/run-pass/%.rs $(REQ) @$(call CFG_ECHO, compile [x86]: $<) - $(CFG_QUIET)OCAMLRUNPARAM="b1" $(CFG_BOOT) $(CFG_BOOT_FLAGS) -o $@ $< - $(CFG_QUIET)chmod 0755 $@ + $(BOOT) -o $@ $< -test/run-pass/%.bc: test/run-pass/%.rs $(CFG_BOOT) $(CFG_STDLIB) +test/run-pass/%.bc: test/run-pass/%.rs $(REQ) @$(call CFG_ECHO, compile [llvm]: $<) - $(CFG_QUIET)OCAMLRUNPARAM="b1" $(CFG_BOOT) $(CFG_BOOT_FLAGS) -o $@ -llvm $< + $(BOOT) -o $@ -llvm $< -test/run-fail/%.x86$(CFG_EXE_SUFFIX): test/run-fail/%.rs $(CFG_BOOT) $(CFG_RUNTIME) $(CFG_STDLIB) +test/run-fail/%.x86$(CFG_EXE_SUFFIX): test/run-fail/%.rs $(REQ) @$(call CFG_ECHO, compile [x86]: $<) - $(CFG_QUIET)OCAMLRUNPARAM="b1" $(CFG_BOOT) $(CFG_BOOT_FLAGS) -o $@ $< - $(CFG_QUIET)chmod 0755 $@ + $(BOOT) -o $@ $< -test/run-fail/%.bc: test/run-fail/%.rs $(CFG_BOOT) $(CFG_STDLIB) +test/run-fail/%.bc: test/run-fail/%.rs $(REQ) @$(call CFG_ECHO, compile [llvm]: $<) - $(CFG_QUIET)OCAMLRUNPARAM="b1" $(CFG_BOOT) $(CFG_BOOT_FLAGS) -o $@ -llvm $< + $(BOOT) -o $@ -llvm $< ###################################################################### @@ -570,7 +617,9 @@ C_DEPFILES := $(RUNTIME_CS:%.cpp=%.d) %.d: %.cpp $(MKFILES) @$(call CFG_ECHO, dep: $<) - $(CFG_QUIET)$(call CFG_DEPEND_C, $@ $(patsubst %.cpp, %$(CFG_OBJ_SUFFIX), $<), $(RUNTIME_INCS)) $< $(CFG_PATH_MUNGE) >$@ + $(CFG_QUIET)$(call CFG_DEPEND_C, $@ \ + $(patsubst %.cpp, %$(CFG_OBJ_SUFFIX), $<), \ + $(RUNTIME_INCS)) $< $(CFG_PATH_MUNGE) >$@ %.d: %.ml $(MKFILES) @$(call CFG_ECHO, dep: $<) @@ -593,15 +642,15 @@ CRATE_DEPFILES := $(ALL_TEST_CRATES:%.rc=%.d) $(STDLIB_DEPFILE) $(STDLIB_DEPFILE): $(STDLIB_CRATE) $(MKFILES) $(CFG_BOOT) @$(call CFG_ECHO, dep: $<) - $(CFG_QUIET)$(CFG_BOOT) $(CFG_BOOT_FLAGS) -shared -rdeps $< $(CFG_PATH_MUNGE) >$@ + $(BOOT) -shared -rdeps $< $(CFG_PATH_MUNGE) >$@ %.d: %.rc $(MKFILES) $(CFG_BOOT) @$(call CFG_ECHO, dep: $<) - $(CFG_QUIET)$(CFG_BOOT) $(CFG_BOOT_FLAGS) -rdeps $< $(CFG_PATH_MUNGE) >$@ + $(BOOT) -rdeps $< $(CFG_PATH_MUNGE) >$@ %.d: %.rs $(MKFILES) $(CFG_BOOT) @$(call CFG_ECHO, dep: $<) - $(CFG_QUIET)$(CFG_BOOT) $(CFG_BOOT_FLAGS) -rdeps $< $(CFG_PATH_MUNGE) >$@ + $(BOOT) -rdeps $< $(CFG_PATH_MUNGE) >$@ ifneq ($(MAKECMDGOALS),clean) -include $(CRATE_DEPFILES) @@ -622,8 +671,9 @@ PKG_3RDPARTY := rt/valgrind.h rt/memcheck.h \ rt/bigint/bigint.h rt/bigint/bigint_int.cpp \ rt/bigint/bigint_ext.cpp rt/bigint/low_primes.h PKG_FILES := README \ + $(wildcard etc/*.*) \ $(MKFILES) $(BOOT_MLS) boot/fe/lexer.mll \ - $(COMPILER_CRATE) $(COMPILER_INPUTS) \ + $(COMPILER_INPUTS) \ $(STDLIB_CRATE) $(STDLIB_INPUTS) \ $(RUNTIME_CS) $(RUNTIME_HDR) $(PKG_3RDPARTY) \ $(ALL_TEST_INPUTS) @@ -658,20 +708,29 @@ distcheck: # Cleanup ###################################################################### -.PHONY: clean +.PHONY: clean tidy + +tidy: + @$(call CFG_ECHO, check: formatting) + $(CFG_QUIET) python etc/tidy.py \ + $(wildcard ../*.txt) \ + ../README \ + $(filter-out boot/fe/lexer.ml $(PKG_3RDPARTY), $(PKG_FILES)) clean: @$(call CFG_ECHO, cleaning) - $(CFG_QUIET)rm -f $(RUNTIME_OBJS) $(BOOT_CMOS) $(BOOT_CMIS) $(BOOT_CMXS) $(BOOT_OBJS) + $(CFG_QUIET)rm -f $(RUNTIME_OBJS) + $(CFG_QUIET)rm -f $(BOOT_CMOS) $(BOOT_CMIS) $(BOOT_CMXS) $(BOOT_OBJS) $(CFG_QUIET)rm -f $(CFG_COMPILER) $(CFG_QUIET)rm -f $(ML_DEPFILES) $(C_DEPFILES) $(CRATE_DEPFILES) $(CFG_QUIET)rm -f boot/fe/lexer.ml $(CFG_QUIET)rm -f $(CFG_BOOT) $(CFG_RUNTIME) $(CFG_STDLIB) - $(CFG_QUIET)rm -f $(TEST_RUN_PASS_EXES_X86) $(TEST_RUN_PASS_OUTS_X86) - $(CFG_QUIET)rm -f $(TEST_RUN_PASS_EXES_LLVM) $(TEST_RUN_PASS_OUTS_LLVM) - $(CFG_QUIET)rm -f $(TEST_RUN_FAIL_EXES_X86) $(TEST_RUN_FAIL_OUTS_X86) - $(CFG_QUIET)rm -f $(TEST_RUN_FAIL_EXES_LLVM) $(TEST_RUN_FAIL_OUTS_LLVM) - $(CFG_QUIET)rm -f $(TEST_COMPILE_FAIL_EXES_X86) $(TEST_COMPILE_FAIL_OUTS_X86) - $(CFG_QUIET)rm -f $(TEST_COMPILE_FAIL_EXES_LLVM) $(TEST_COMPILE_FAIL_OUTS_LLVM) + $(CFG_QUIET)rm -f $(TEST_RPASS_EXES_X86) $(TEST_RPASS_OUTS_X86) + $(CFG_QUIET)rm -f $(TEST_RPASS_EXES_LLVM) $(TEST_RPASS_OUTS_LLVM) + $(CFG_QUIET)rm -f $(TEST_RFAIL_EXES_X86) $(TEST_RFAIL_OUTS_X86) + $(CFG_QUIET)rm -f $(TEST_RFAIL_EXES_LLVM) $(TEST_RFAIL_OUTS_LLVM) + $(CFG_QUIET)rm -f $(TEST_CFAIL_EXES_X86) $(TEST_CFAIL_OUTS_X86) + $(CFG_QUIET)rm -f $(TEST_CFAIL_EXES_LLVM) $(TEST_CFAIL_OUTS_LLVM) $(CFG_QUIET)rm -Rf $(PKG_NAME)-*.tar.gz dist - $(CFG_QUIET)rm -f $(foreach ext,cmx cmi cmo cma o a d exe,$(wildcard boot/*/*.$(ext) boot/*/*/*.$(ext))) + $(CFG_QUIET)rm -f $(foreach ext,cmx cmi cmo cma o a d exe,\ + $(wildcard boot/*/*.$(ext) boot/*/*/*.$(ext))) diff --git a/src/README b/src/README new file mode 100644 index 00000000000..c51709d0556 --- /dev/null +++ b/src/README @@ -0,0 +1,28 @@ +This is preliminary version of the Rust compiler. + +Source layout: + +boot/ The bootstrap compiler +boot/fe - Front end (lexer, parser, AST) +boot/me - Middle end (resolve, check, layout, trans) +boot/be - Back end (IL, RA, insns, asm, objfiles) +boot/util - Ubiquitous helpers +boot/llvm - LLVM-based alternative back end +boot/driver - Compiler driver + +comp/ The self-hosted compiler (doesn't exist yet) +comp/* - Same structure as in boot/ + +rt/ The runtime system +rt/rust_*.cpp - The majority of the runtime services +rt/isaac - The PRNG used for pseudo-random choices in the runtime +rt/bigint - The bigint library used for the 'big' type +rt/uthash - Small hashtable-and-list library for C, used in runtime +rt/{sync,util} - Small utility classes for the runtime. + +test/ Testsuite (for both bootstrap and self-hosted) +test/compile-fail - Tests that should fail to compile +test/run-fail - Tests that should compile, run and fail +test/run-pass - Tests that should compile, run and succeed + +Please be gentle, it's a work in progress. diff --git a/src/boot/be/abi.ml b/src/boot/be/abi.ml new file mode 100644 index 00000000000..fd9ca750e91 --- /dev/null +++ b/src/boot/be/abi.ml @@ -0,0 +1,207 @@ + +(* + * The 'abi' structure is pretty much just a grab-bag of machine + * dependencies and structure-layout information. Part of the latter + * is shared with trans and semant. + * + * Make some attempt to factor it as time goes by. + *) + +(* Word offsets for structure fields in rust-internal.h, and elsewhere in + compiler. *) + +let rc_base_field_refcnt = 0;; + +let task_field_refcnt = rc_base_field_refcnt;; +let task_field_stk = task_field_refcnt + 1;; +let task_field_runtime_sp = task_field_stk + 1;; +let task_field_rust_sp = task_field_runtime_sp + 1;; +let task_field_gc_alloc_chain = task_field_rust_sp + 1;; +let task_field_dom = task_field_gc_alloc_chain + 1;; +let n_visible_task_fields = task_field_dom + 1;; + +let dom_field_interrupt_flag = 0;; + +let frame_glue_fns_field_mark = 0;; +let frame_glue_fns_field_drop = 1;; +let frame_glue_fns_field_reloc = 2;; + +let exterior_rc_slot_field_refcnt = 0;; +let exterior_rc_slot_field_body = 1;; + +let exterior_gc_slot_field_next = (-2);; +let exterior_gc_slot_field_ctrl = (-1);; +let exterior_gc_slot_field_refcnt = 0;; +let exterior_gc_slot_field_body = 1;; + +let exterior_rc_header_size = 1;; +let exterior_gc_header_size = 3;; + +let exterior_gc_malloc_return_adjustment = 2;; + +let stk_field_valgrind_id = 0 + 1;; +let stk_field_limit = stk_field_valgrind_id + 1;; +let stk_field_data = stk_field_limit + 1;; + +let binding_size = 2;; +let binding_field_item = 0;; +let binding_field_binding = 1;; + +let general_code_alignment = 16;; + +let tydesc_field_first_param = 0;; +let tydesc_field_size = 1;; +let tydesc_field_align = 2;; +let tydesc_field_copy_glue = 3;; +let tydesc_field_drop_glue = 4;; +let tydesc_field_free_glue = 5;; +let tydesc_field_mark_glue = 6;; +let tydesc_field_obj_drop_glue = 7;; + +let vec_elt_rc = 0;; +let vec_elt_alloc = 1;; +let vec_elt_fill = 2;; +let vec_elt_data = 3;; + +let calltup_elt_out_ptr = 0;; +let calltup_elt_task_ptr = 1;; +let calltup_elt_ty_params = 2;; +let calltup_elt_args = 3;; +let calltup_elt_iterator_args = 4;; +let calltup_elt_indirect_args = 5;; + +let iterator_args_elt_block_fn = 0;; +let iterator_args_elt_outer_frame_ptr = 1;; + +let indirect_args_elt_closure = 0;; + +(* ty_params, src, dst, tydesc, taskptr. *) +let worst_case_glue_call_args = 5;; + +type abi = + { + abi_word_sz: int64; + abi_word_bits: Il.bits; + abi_word_ty: Common.ty_mach; + + abi_is_2addr_machine: bool; + abi_has_pcrel_data: bool; + abi_has_pcrel_code: bool; + + abi_n_hardregs: int; + abi_str_of_hardreg: (int -> string); + + abi_prealloc_quad: (Il.quad' -> Il.quad'); + abi_constrain_vregs: (Il.quad -> Bits.t array -> unit); + + abi_emit_fn_prologue: (Il.emitter + -> Common.size (* framesz *) + -> Common.size (* callsz *) + -> Common.nabi + -> Common.fixup (* grow_task *) + -> unit); + + abi_emit_fn_epilogue: (Il.emitter -> unit); + + abi_emit_fn_tail_call: (Il.emitter + -> int64 (* caller_callsz *) + -> int64 (* caller_argsz *) + -> Il.code (* callee_code *) + -> int64 (* callee_argsz *) + -> unit); + + abi_clobbers: (Il.quad -> Il.hreg list); + + abi_emit_native_call: (Il.emitter + -> Il.cell (* ret *) + -> Common.nabi + -> Common.fixup (* callee *) + -> Il.operand array (* args *) + -> unit); + + abi_emit_native_void_call: (Il.emitter + -> Common.nabi + -> Common.fixup (* callee *) + -> Il.operand array (* args *) + -> unit); + + abi_emit_native_call_in_thunk: (Il.emitter + -> Il.cell (* ret *) + -> Common.nabi + -> Il.operand (* callee *) + -> Il.operand array (* args *) + -> unit); + abi_emit_inline_memcpy: (Il.emitter + -> int64 (* n_bytes *) + -> Il.reg (* dst_ptr *) + -> Il.reg (* src_ptr *) + -> Il.reg (* tmp_reg *) + -> bool (* ascending *) + -> unit); + + (* Global glue. *) + abi_activate: (Il.emitter -> unit); + abi_yield: (Il.emitter -> unit); + abi_unwind: (Il.emitter -> Common.nabi -> Common.fixup -> unit); + abi_get_next_pc_thunk: + ((Il.reg (* output *) + * Common.fixup (* thunk in objfile *) + * (Il.emitter -> unit)) (* fn to make thunk *) + option); + + abi_sp_reg: Il.reg; + abi_fp_reg: Il.reg; + abi_dwarf_fp_reg: int; + abi_tp_cell: Il.cell; + abi_implicit_args_sz: int64; + abi_frame_base_sz: int64; + abi_frame_info_sz: int64; + abi_spill_slot: (Il.spill -> Il.mem); + } +;; + +let load_fixup_addr + (e:Il.emitter) + (out_reg:Il.reg) + (fix:Common.fixup) + (rty:Il.referent_ty) + : unit = + + let cell = Il.Reg (out_reg, Il.AddrTy rty) in + let op = Il.ImmPtr (fix, rty) in + Il.emit e (Il.lea cell op); +;; + +let load_fixup_codeptr + (e:Il.emitter) + (out_reg:Il.reg) + (fixup:Common.fixup) + (has_pcrel_code:bool) + (indirect:bool) + : Il.code = + if indirect + then + begin + load_fixup_addr e out_reg fixup (Il.ScalarTy (Il.AddrTy Il.CodeTy)); + Il.CodePtr (Il.Cell (Il.Mem (Il.RegIn (out_reg, None), + Il.ScalarTy (Il.AddrTy Il.CodeTy)))) + end + else + if has_pcrel_code + then (Il.CodePtr (Il.ImmPtr (fixup, Il.CodeTy))) + else + begin + load_fixup_addr e out_reg fixup Il.CodeTy; + Il.CodePtr (Il.Cell (Il.Reg (out_reg, Il.AddrTy Il.CodeTy))) + end +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/asm.ml b/src/boot/be/asm.ml new file mode 100644 index 00000000000..10b2142aad6 --- /dev/null +++ b/src/boot/be/asm.ml @@ -0,0 +1,755 @@ +(* + + Our assembler is an all-at-once, buffer-in-memory job, very simple + minded. I have 1gb of memory on my laptop: I don't expect to ever + emit a program that large with this code. + + It is based on the 'frag' type, which has a variant for every major + type of machine-blob we know how to write (bytes, zstrings, BSS + blocks, words of various sorts). + + A frag can contain symbolic references between the sub-parts of + it. These are accomplished through ref cells we call fixups, and a + 2-pass (resolution and writing) process defined recursively over + the frag structure. + + Fixups are defined by wrapping a frag in a DEF pseudo-frag with + a fixup attached. This will record information about the wrapped + frag -- positions and sizes -- in the fixup during resolution. + + We say "positions" and "sizes" there, in plural, because both a + file number and a memory number is recorded for each concept. + + File numbers refer to positions and sizes in the file we're + generating, and are based on the native int type for the host + platform -- usually 31 or 62 bits -- whereas the expressions that + *use* position fixups tend to promote them up to 32 or 64 bits + somehow. On a 32 bit platform, you can't generate output buffers + with 64-bit positions (ocaml limitation!) + + Memory numbers are 64 bit, always, and refer to sizes and positions + of frags when they are loaded into memory in the target. When + you're generating code for a 32-bit target, or using a memory + number in a context that's less than 64 bits, the value is + range-checked and truncated. But in all other respects, we imagine + a 32-bit address space is just the prefix of the continuing 64-bit + address space. If you need to pin an object at a particular place + from the point 2^32-1, say, you will need to do arithmetic and use + the MEMPOS pseudo-frag, that sets the current memory position as + it's being processed. + + Fixups can be *used* anywhere else in the frag tree, as many times + as you like. If you try to write an unresolved fixup, the emitter + faults. When you specify the use of a fixup, you need to specify + whether you want to use its file size, file position, memory size, + or memory position. + + Positions, addresses, sizes and such, of course, are in bytes. + + Expressions are evaluated to an int64 (signed), even if the + expression is an int32 or less. Depending on how you use the result + of the expression, a range check error may fire (for example, if + the expression evaluates to -2^24 and you're emitting a word16). + + Word endianness is per-file. At the moment this seems acceptable. + + Because we want to be *very specific* about the time and place + arithmetic promotions occur, we define two separate expression-tree + types (with the same polymorphic constructors) and two separate + evaluation functions, with an explicit operator for marking the + promotion-points. + +*) + +open Common;; + + +let log (sess:Session.sess) = + Session.log "asm" + sess.Session.sess_log_asm + sess.Session.sess_log_out +;; + +let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = + if sess.Session.sess_log_asm + then thunk () + else () +;; + +exception Bad_fit of string;; +exception Undef_sym of string;; + +type ('a, 'b) expr = + IMM of 'a + | ADD of (('a, 'b) expr) * (('a, 'b) expr) + | SUB of (('a, 'b) expr) * (('a, 'b) expr) + | MUL of (('a, 'b) expr) * (('a, 'b) expr) + | DIV of (('a, 'b) expr) * (('a, 'b) expr) + | REM of (('a, 'b) expr) * (('a, 'b) expr) + | MAX of (('a, 'b) expr) * (('a, 'b) expr) + | ALIGN of (('a, 'b) expr) * (('a, 'b) expr) + | SLL of (('a, 'b) expr) * int + | SLR of (('a, 'b) expr) * int + | SAR of (('a, 'b) expr) * int + | AND of (('a, 'b) expr) * (('a, 'b) expr) + | XOR of (('a, 'b) expr) * (('a, 'b) expr) + | OR of (('a, 'b) expr) * (('a, 'b) expr) + | NOT of (('a, 'b) expr) + | NEG of (('a, 'b) expr) + | F_POS of fixup + | F_SZ of fixup + | M_POS of fixup + | M_SZ of fixup + | EXT of 'b + +type expr32 = (int32, int) expr +;; + +type expr64 = (int64, expr32) expr +;; + + +let rec eval32 (e:expr32) + : int32 = + let chop64 kind name v = + let x = Int64.to_int32 v in + if (Int64.compare v (Int64.of_int32 x)) = 0 then + x + else raise (Bad_fit (kind + ^ " fixup " + ^ name + ^ " overflowed 32 bits in eval32: " + ^ Int64.to_string v)) + in + let expandInt _ _ v = Int32.of_int v in + let checkdef kind name v inj = + match v with + None -> + raise (Undef_sym (kind ^ " fixup " ^ name + ^ " undefined in eval32")) + | Some x -> inj kind name x + in + match e with + IMM i -> i + | ADD (a, b) -> Int32.add (eval32 a) (eval32 b) + | SUB (a, b) -> Int32.sub (eval32 a) (eval32 b) + | MUL (a, b) -> Int32.mul (eval32 a) (eval32 b) + | DIV (a, b) -> Int32.div (eval32 a) (eval32 b) + | REM (a, b) -> Int32.rem (eval32 a) (eval32 b) + | MAX (a, b) -> i32_max (eval32 a) (eval32 b) + | ALIGN (a, b) -> i32_align (eval32 a) (eval32 b) + | SLL (a, b) -> Int32.shift_left (eval32 a) b + | SLR (a, b) -> Int32.shift_right_logical (eval32 a) b + | SAR (a, b) -> Int32.shift_right (eval32 a) b + | AND (a, b) -> Int32.logand (eval32 a) (eval32 b) + | XOR (a, b) -> Int32.logxor (eval32 a) (eval32 b) + | OR (a, b) -> Int32.logor (eval32 a) (eval32 b) + | NOT a -> Int32.lognot (eval32 a) + | NEG a -> Int32.neg (eval32 a) + | F_POS f -> + checkdef "file position" + f.fixup_name f.fixup_file_pos expandInt + | F_SZ f -> + checkdef "file size" + f.fixup_name f.fixup_file_sz expandInt + | M_POS f -> + checkdef "mem position" + f.fixup_name f.fixup_mem_pos chop64 + | M_SZ f -> + checkdef "mem size" f.fixup_name f.fixup_mem_sz chop64 + | EXT i -> Int32.of_int i +;; + +let rec eval64 (e:expr64) + : int64 = + let checkdef kind name v inj = + match v with + None -> + raise (Undef_sym (kind ^ " fixup '" + ^ name ^ "' undefined in eval64")) + | Some x -> inj x + in + match e with + IMM i -> i + | ADD (a, b) -> Int64.add (eval64 a) (eval64 b) + | SUB (a, b) -> Int64.sub (eval64 a) (eval64 b) + | MUL (a, b) -> Int64.mul (eval64 a) (eval64 b) + | DIV (a, b) -> Int64.div (eval64 a) (eval64 b) + | REM (a, b) -> Int64.rem (eval64 a) (eval64 b) + | MAX (a, b) -> i64_max (eval64 a) (eval64 b) + | ALIGN (a, b) -> i64_align (eval64 a) (eval64 b) + | SLL (a, b) -> Int64.shift_left (eval64 a) b + | SLR (a, b) -> Int64.shift_right_logical (eval64 a) b + | SAR (a, b) -> Int64.shift_right (eval64 a) b + | AND (a, b) -> Int64.logand (eval64 a) (eval64 b) + | XOR (a, b) -> Int64.logxor (eval64 a) (eval64 b) + | OR (a, b) -> Int64.logor (eval64 a) (eval64 b) + | NOT a -> Int64.lognot (eval64 a) + | NEG a -> Int64.neg (eval64 a) + | F_POS f -> + checkdef "file position" + f.fixup_name f.fixup_file_pos Int64.of_int + | F_SZ f -> + checkdef "file size" + f.fixup_name f.fixup_file_sz Int64.of_int + | M_POS f -> + checkdef "mem position" + f.fixup_name f.fixup_mem_pos (fun x -> x) + | M_SZ f -> + checkdef "mem size" + f.fixup_name f.fixup_mem_sz (fun x -> x) + | EXT e -> Int64.of_int32 (eval32 e) +;; + + +type frag = + MARK (* MARK == 'PAD (IMM 0L)' *) + | SEQ of frag array + | PAD of int + | BSS of int64 + | MEMPOS of int64 + | BYTE of int + | BYTES of int array + | CHAR of char + | STRING of string + | ZSTRING of string + | ULEB128 of expr64 + | SLEB128 of expr64 + | WORD of (ty_mach * expr64) + | ALIGN_FILE of (int * frag) + | ALIGN_MEM of (int * frag) + | DEF of (fixup * frag) + | RELAX of relaxation + +and relaxation = + { relax_options: frag array; + relax_choice: int ref; } +;; + +exception Relax_more of relaxation;; + +let new_relaxation (frags:frag array) = + RELAX { relax_options = frags; + relax_choice = ref ((Array.length frags) - 1); } +;; + + +let rec write_frag + ~(sess:Session.sess) + ~(lsb0:bool) + ~(buf:Buffer.t) + ~(frag:frag) + : unit = + let relax = Queue.create () in + let bump_relax r = + iflog sess (fun _ -> + log sess "bumping relaxation to position %d" + ((!(r.relax_choice)) - 1)); + r.relax_choice := (!(r.relax_choice)) - 1; + if !(r.relax_choice) < 0 + then bug () "relaxation ran out of options" + in + let rec loop _ = + Queue.clear relax; + Buffer.clear buf; + resolve_frag_full relax frag; + lower_frag ~sess ~lsb0 ~buf ~relax ~frag; + if Queue.is_empty relax + then () + else + begin + iflog sess (fun _ -> log sess "relaxing"); + Queue.iter bump_relax relax; + loop () + end + in + loop () + + +and resolve_frag_full (relax:relaxation Queue.t) (frag:frag) + : unit = + let file_pos = ref 0 in + let mem_pos = ref 0L in + let bump i = + mem_pos := Int64.add (!mem_pos) (Int64.of_int i); + file_pos := (!file_pos) + i + in + + let uleb (e:expr64) : unit = + let rec loop value = + let value = Int64.shift_right_logical value 7 in + if value = 0L + then bump 1 + else + begin + bump 1; + loop value + end + in + loop (eval64 e) + in + + let sleb (e:expr64) : unit = + let rec loop value = + let byte = Int64.logand value 0xf7L in + let value = Int64.shift_right value 7 in + let signbit = Int64.logand byte 0x40L in + if (((value = 0L) && (signbit = 0L)) || + ((value = -1L) && (signbit = 0x40L))) + then bump 1 + else + begin + bump 1; + loop value + end + in + loop (eval64 e) + in + let rec resolve_frag it = + match it with + | MARK -> () + | SEQ frags -> Array.iter resolve_frag frags + | PAD i -> bump i + | BSS i -> mem_pos := Int64.add (!mem_pos) i + | MEMPOS i -> mem_pos := i + | BYTE _ -> bump 1 + | BYTES ia -> bump (Array.length ia) + | CHAR _ -> bump 1 + | STRING s -> bump (String.length s) + | ZSTRING s -> bump ((String.length s) + 1) + | ULEB128 e -> uleb e + | SLEB128 e -> sleb e + | WORD (mach,_) -> bump (bytes_of_ty_mach mach) + | ALIGN_FILE (n, frag) -> + let spill = (!file_pos) mod n in + let pad = (n - spill) mod n in + file_pos := (!file_pos) + pad; + (* + * NB: aligning the file *causes* likewise alignment of + * memory, since we implement "file alignment" by + * padding! + *) + mem_pos := Int64.add (!mem_pos) (Int64.of_int pad); + resolve_frag frag + + | ALIGN_MEM (n, frag) -> + let n64 = Int64.of_int n in + let spill = Int64.rem (!mem_pos) n64 in + let pad = Int64.rem (Int64.sub n64 spill) n64 in + mem_pos := Int64.add (!mem_pos) pad; + resolve_frag frag + + | DEF (f, i) -> + let fpos1 = !file_pos in + let mpos1 = !mem_pos in + resolve_frag i; + f.fixup_file_pos <- Some fpos1; + f.fixup_mem_pos <- Some mpos1; + f.fixup_file_sz <- Some ((!file_pos) - fpos1); + f.fixup_mem_sz <- Some (Int64.sub (!mem_pos) mpos1) + + | RELAX rel -> + begin + try + resolve_frag rel.relax_options.(!(rel.relax_choice)) + with + Bad_fit _ -> Queue.add rel relax + end + in + resolve_frag frag + +and lower_frag + ~(sess:Session.sess) + ~(lsb0:bool) + ~(buf:Buffer.t) + ~(relax:relaxation Queue.t) + ~(frag:frag) + : unit = + let byte (i:int) = + if i < 0 + then raise (Bad_fit "byte underflow") + else + if i > 255 + then raise (Bad_fit "byte overflow") + else Buffer.add_char buf (Char.chr i) + in + + let uleb (e:expr64) : unit = + let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in + let rec loop value = + let byte = Int64.logand value 0x7fL in + let value = Int64.shift_right_logical value 7 in + if value = 0L + then emit1 byte + else + begin + emit1 (Int64.logor byte 0x80L); + loop value + end + in + loop (eval64 e) + in + + let sleb (e:expr64) : unit = + let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in + let rec loop value = + let byte = Int64.logand value 0x7fL in + let value = Int64.shift_right value 7 in + let signbit = Int64.logand byte 0x40L in + if (((value = 0L) && (signbit = 0L)) || + ((value = -1L) && (signbit = 0x40L))) + then emit1 byte + else + begin + emit1 (Int64.logor byte 0x80L); + loop value + end + in + loop (eval64 e) + in + + let word (nbytes:int) (signed:bool) (e:expr64) = + let i = eval64 e in + + (* + FIXME: + + We should really base the entire assembler and memory-position + system on Big_int.big_int, but in ocaml the big_int type lacks, + oh, just about every useful function (no format string spec, no + bitwise ops, blah blah) so it's useless; we're stuck on int64 + for bootstrapping. + + For the time being we're just going to require you to represent + those few unsigned 64 bit terms you have in mind via their + signed bit pattern. Suboptimal but it's the best we can do. + *) + + let (top,bot) = + if nbytes >= 8 + then + if signed + then (Int64.max_int,Int64.min_int) + else (Int64.max_int,0L) + else + if signed + then + let bound = (Int64.shift_left 1L ((8 * nbytes) - 1)) in + (Int64.sub bound 1L, Int64.neg bound) + else + let bound = (Int64.shift_left 1L (8 * nbytes)) in + (Int64.sub bound 1L, 0L) + in + + let mask1 = Int64.logand 0xffL in + let shift = Int64.shift_right_logical in + let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in + if Int64.compare i bot = (-1) + then raise (Bad_fit ("word underflow: " + ^ (Int64.to_string i) + ^ " into " + ^ (string_of_int nbytes) + ^ (if signed then " signed" else " unsigned") + ^ " bytes")) + else + if Int64.compare i top = 1 + then raise (Bad_fit ("word overflow: " + ^ (Int64.to_string i) + ^ " into " + ^ (string_of_int nbytes) + ^ (if signed then " signed" else " unsigned") + ^ " bytes")) + else + if lsb0 + then + for n = 0 to (nbytes - 1) do + emit1 (mask1 (shift i (8*n))) + done + else + for n = (nbytes - 1) downto 0 do + emit1 (mask1 (shift i (8*n))) + done + in + match frag with + MARK -> () + + | SEQ frags -> + Array.iter + begin + fun frag -> + lower_frag ~sess ~lsb0 ~buf ~relax ~frag + end frags + + | PAD c -> + for i = 1 to c do + Buffer.add_char buf '\x00' + done + + | BSS _ -> () + + | MEMPOS _ -> () + + | BYTE i -> byte i + + | BYTES bs -> + iflog sess (fun _ -> log sess "lowering %d bytes" + (Array.length bs)); + Array.iter byte bs + + | CHAR c -> + iflog sess (fun _ -> log sess "lowering char: %c" c); + Buffer.add_char buf c + + | STRING s -> + iflog sess (fun _ -> log sess "lowering string: %s" s); + Buffer.add_string buf s + + | ZSTRING s -> + iflog sess (fun _ -> log sess "lowering zstring: %s" s); + Buffer.add_string buf s; + byte 0 + + | ULEB128 e -> uleb e + | SLEB128 e -> sleb e + + | WORD (m,e) -> + iflog sess + (fun _ -> + log sess "lowering word %s" + (string_of_ty_mach m)); + word (bytes_of_ty_mach m) (mach_is_signed m) e + + | ALIGN_FILE (n, frag) -> + let spill = (Buffer.length buf) mod n in + let pad = (n - spill) mod n in + for i = 1 to pad do + Buffer.add_char buf '\x00' + done; + lower_frag sess lsb0 buf relax frag + + | ALIGN_MEM (_, i) -> lower_frag sess lsb0 buf relax i + | DEF (f, i) -> + iflog sess (fun _ -> log sess "lowering fixup: %s" f.fixup_name); + lower_frag sess lsb0 buf relax i; + + | RELAX rel -> + begin + try + lower_frag sess lsb0 buf relax + rel.relax_options.(!(rel.relax_choice)) + with + Bad_fit _ -> Queue.add rel relax + end +;; + +let fold_flags (f:'a -> int64) (flags:'a list) : int64 = + List.fold_left (Int64.logor) 0x0L (List.map f flags) +;; + +let write_out_frag sess lsb0 frag = + let buf = Buffer.create 0xffff in + let file = Session.filename_of sess.Session.sess_out in + let out = open_out_bin file in + write_frag ~sess ~lsb0 ~buf ~frag; + Buffer.output_buffer out buf; + flush out; + close_out out; + Unix.chmod file 0o755 +;; + +(* Asm-reader stuff for loading info back from mapped files. *) +(* + * Unfortunately the ocaml Bigarray interface takes 'int' indices, so + * f.e. can't do 64-bit offsets / files when running on a 32bit platform. + * Despite the fact that we can possibly produce them. Sigh. Yet another + * "bootstrap compiler limitation". + *) +type asm_reader = + { + asm_seek: int -> unit; + asm_get_u32: unit -> int; + asm_get_u16: unit -> int; + asm_get_u8: unit -> int; + asm_get_uleb: unit -> int; + asm_get_zstr: unit -> string; + asm_get_zstr_padded: int -> string; + asm_get_off: unit -> int; + asm_adv: int -> unit; + asm_adv_u32: unit -> unit; + asm_adv_u16: unit -> unit; + asm_adv_u8: unit -> unit; + asm_adv_zstr: unit -> unit; + asm_close: unit -> unit; + } +;; + +type mmap_arr = + (int, Bigarray.int8_unsigned_elt, Bigarray.c_layout) + Bigarray.Array1.t +;; + +let new_asm_reader (sess:Session.sess) (s:filename) : asm_reader = + iflog sess (fun _ -> log sess "opening file %s" s); + let fd = Unix.openfile s [ Unix.O_RDONLY ] 0 in + let arr = (Bigarray.Array1.map_file + fd ~pos:0L + Bigarray.int8_unsigned + Bigarray.c_layout + false (-1)) + in + let tmp = ref Nativeint.zero in + let buf = Buffer.create 16 in + let off = ref 0 in + let is_open = ref true in + let get_word_as_int (nbytes:int) : int = + assert (!is_open); + let lsb0 = true in + tmp := Nativeint.zero; + if lsb0 + then + for j = nbytes-1 downto 0 do + tmp := Nativeint.shift_left (!tmp) 8; + tmp := Nativeint.logor (!tmp) (Nativeint.of_int arr.{(!off) + j}) + done + else + for j = 0 to nbytes-1 do + tmp := Nativeint.shift_left (!tmp) 8; + tmp := Nativeint.logor (!tmp) (Nativeint.of_int arr.{(!off) + j}) + done; + off := (!off) + nbytes; + Nativeint.to_int (!tmp) + in + let get_zstr_padded pad_opt = + assert (!is_open); + let i = ref (!off) in + Buffer.clear buf; + let buflen_ok _ = + match pad_opt with + None -> true + | Some pad -> (Buffer.length buf) < pad + in + while arr.{!i} != 0 && (buflen_ok()) do + Buffer.add_char buf (Char.chr arr.{!i}); + incr i + done; + begin + match pad_opt with + None -> off := (!off) + (Buffer.length buf) + 1 + | Some pad -> + begin + assert ((Buffer.length buf) <= pad); + off := (!off) + pad + end + end; + Buffer.contents buf + in + let bump i = + assert (!is_open); + off := (!off) + i + in + { + asm_seek = (fun i -> off := i); + asm_get_u32 = (fun _ -> get_word_as_int 4); + asm_get_u16 = (fun _ -> get_word_as_int 2); + asm_get_u8 = (fun _ -> get_word_as_int 1); + asm_get_uleb = + begin + fun _ -> + let rec loop result shift = + let byte = arr.{!off} in + incr off; + let result = result lor ((byte land 0x7f) lsl shift) in + if (byte land 0x80) = 0 + then result + else loop result (shift+7) + in + loop 0 0 + end; + asm_get_zstr = (fun _ -> get_zstr_padded None); + asm_get_zstr_padded = (fun pad -> get_zstr_padded (Some pad)); + asm_get_off = (fun _ -> !off); + asm_adv = bump; + asm_adv_u32 = (fun _ -> bump 4); + asm_adv_u16 = (fun _ -> bump 2); + asm_adv_u8 = (fun _ -> bump 1); + asm_adv_zstr = (fun _ -> while arr.{!off} != 0 + do incr off done); + asm_close = (fun _ -> + assert (!is_open); + Unix.close fd; + is_open := false) + } +;; + + +(* + * Metadata note-section encoding / decoding. + * + * Since the only object format that defines a "note" section at all is + * ELF, we model the contents of the metadata section on ELF's + * notes. But the same blob of data is stuck into PE and Mach-O files + * too. + * + * The format is essentially just the ELF note format: + * + * + * + * + * + * <0-pad to 4-byte boundary> + * + * + * ... + * + * <0-pad to 4-byte boundary> + * + *) +let note_rust_frags (meta:(Ast.ident * string) array) : frag = + let desc_fixup = new_fixup ".rust.note metadata" in + let desc = + DEF (desc_fixup, + SEQ [| + WORD (TY_u32, IMM (Int64.of_int (Array.length meta))); + SEQ (Array.map + (fun (k,v) -> SEQ [| ZSTRING k; ZSTRING v; |]) + meta); + ALIGN_FILE (4, MARK) |]) + in + let name = "rust" in + let ty = 0L in + let padded_name = SEQ [| ZSTRING name; + ALIGN_FILE (4, MARK) |] + in + let name_sz = IMM (Int64.of_int ((String.length name) + 1)) in + SEQ [| WORD (TY_u32, name_sz); + WORD (TY_u32, F_SZ desc_fixup); + WORD (TY_u32, IMM ty); + padded_name; + desc;|] +;; + +let read_rust_note (ar:asm_reader) : (Ast.ident * string) array = + ar.asm_adv_u32 (); + ar.asm_adv_u32 (); + assert ((ar.asm_get_u32 ()) = 0); + let rust_name = ar.asm_get_zstr_padded 8 in + assert (rust_name = "rust"); + let n = ar.asm_get_u32() in + let meta = Queue.create () in + for i = 1 to n + do + let k = ar.asm_get_zstr() in + let v = ar.asm_get_zstr() in + Queue.add (k,v) meta + done; + queue_to_arr meta +;; + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/elf.ml b/src/boot/be/elf.ml new file mode 100644 index 00000000000..56905b2a4d4 --- /dev/null +++ b/src/boot/be/elf.ml @@ -0,0 +1,1760 @@ +(* + * Module for writing System V ELF files. + * + * FIXME: Presently heavily infected with x86 and elf32 specificities, + * though they are reasonably well marked. Needs to be refactored to + * depend on abi fields if it's to be usable for other elf + * configurations. + *) + +open Asm;; +open Common;; + +let log (sess:Session.sess) = + Session.log "obj (elf)" + sess.Session.sess_log_obj + sess.Session.sess_log_out +;; + +let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = + if sess.Session.sess_log_obj + then thunk () + else () +;; + + +(* Fixed sizes of structs involved in elf32 spec. *) +let elf32_ehsize = 52L;; +let elf32_phentsize = 32L;; +let elf32_shentsize = 40L;; +let elf32_symsize = 16L;; +let elf32_rela_entsz = 0xcL;; + +type ei_class = + ELFCLASSNONE + | ELFCLASS32 + | ELFCLASS64 +;; + + +type ei_data = + ELFDATANONE + | ELFDATA2LSB + | ELFDATA2MSB +;; + + +let elf_identification ei_class ei_data = + SEQ + [| + STRING "\x7fELF"; + BYTES + [| + (match ei_class with (* EI_CLASS *) + ELFCLASSNONE -> 0 + | ELFCLASS32 -> 1 + | ELFCLASS64 -> 2); + (match ei_data with (* EI_DATA *) + ELFDATANONE -> 0 + | ELFDATA2LSB -> 1 + | ELFDATA2MSB -> 2); + 1; (* EI_VERSION = EV_CURRENT *) + 0; (* EI_PAD #7 *) + 0; (* EI_PAD #8 *) + 0; (* EI_PAD #9 *) + 0; (* EI_PAD #A *) + 0; (* EI_PAD #B *) + 0; (* EI_PAD #C *) + 0; (* EI_PAD #D *) + 0; (* EI_PAD #E *) + 0; (* EI_PAD #F *) + |] + |] +;; + + +type e_type = + ET_NONE + | ET_REL + | ET_EXEC + | ET_DYN + | ET_CORE +;; + + +type e_machine = + (* Maybe support more later. *) + EM_NONE + | EM_386 + | EM_X86_64 +;; + + +type e_version = + EV_NONE + | EV_CURRENT +;; + + +let elf32_header + ~(sess:Session.sess) + ~(ei_data:ei_data) + ~(e_type:e_type) + ~(e_machine:e_machine) + ~(e_version:e_version) + ~(e_entry_fixup:fixup) + ~(e_phoff_fixup:fixup) + ~(e_shoff_fixup:fixup) + ~(e_phnum:int64) + ~(e_shnum:int64) + ~(e_shstrndx:int64) + : frag = + let elf_header_fixup = new_fixup "elf header" in + let entry_pos = + if sess.Session.sess_library_mode + then (IMM 0L) + else (M_POS e_entry_fixup) + in + DEF + (elf_header_fixup, + SEQ [| elf_identification ELFCLASS32 ei_data; + WORD (TY_u16, (IMM (match e_type with + ET_NONE -> 0L + | ET_REL -> 1L + | ET_EXEC -> 2L + | ET_DYN -> 3L + | ET_CORE -> 4L))); + WORD (TY_u16, (IMM (match e_machine with + EM_NONE -> 0L + | EM_386 -> 3L + | EM_X86_64 -> 62L))); + WORD (TY_u32, (IMM (match e_version with + EV_NONE -> 0L + | EV_CURRENT -> 1L))); + WORD (TY_u32, entry_pos); + WORD (TY_u32, (F_POS e_phoff_fixup)); + WORD (TY_u32, (F_POS e_shoff_fixup)); + WORD (TY_u32, (IMM 0L)); (* e_flags *) + WORD (TY_u16, (IMM elf32_ehsize)); + WORD (TY_u16, (IMM elf32_phentsize)); + WORD (TY_u16, (IMM e_phnum)); + WORD (TY_u16, (IMM elf32_shentsize)); + WORD (TY_u16, (IMM e_shnum)); + WORD (TY_u16, (IMM e_shstrndx)); + |]) +;; + + +type sh_type = + SHT_NULL + | SHT_PROGBITS + | SHT_SYMTAB + | SHT_STRTAB + | SHT_RELA + | SHT_HASH + | SHT_DYNAMIC + | SHT_NOTE + | SHT_NOBITS + | SHT_REL + | SHT_SHLIB + | SHT_DYNSYM +;; + + +type sh_flags = + SHF_WRITE + | SHF_ALLOC + | SHF_EXECINSTR +;; + + +let section_header + ~(shstring_table_fixup:fixup) + ~(shname_string_fixup:fixup) + ~(sh_type:sh_type) + ~(sh_flags:sh_flags list) + ~(section_fixup:fixup option) + ~(sh_addralign:int64) + ~(sh_entsize:int64) + ~(sh_link:int64 option) + : frag = + SEQ + [| + WORD (TY_i32, (SUB + ((F_POS shname_string_fixup), + (F_POS shstring_table_fixup)))); + WORD (TY_u32, (IMM (match sh_type with + SHT_NULL -> 0L + | SHT_PROGBITS -> 1L + | SHT_SYMTAB -> 2L + | SHT_STRTAB -> 3L + | SHT_RELA -> 4L + | SHT_HASH -> 5L + | SHT_DYNAMIC -> 6L + | SHT_NOTE -> 7L + | SHT_NOBITS -> 8L + | SHT_REL -> 9L + | SHT_SHLIB -> 10L + | SHT_DYNSYM -> 11L))); + WORD (TY_u32, (IMM (fold_flags + (fun f -> match f with + SHF_WRITE -> 0x1L + | SHF_ALLOC -> 0x2L + | SHF_EXECINSTR -> 0x4L) sh_flags))); + WORD (TY_u32, (match section_fixup with + None -> (IMM 0L) + | Some s -> (M_POS s))); + WORD (TY_u32, (match section_fixup with + None -> (IMM 0L) + | Some s -> (F_POS s))); + WORD (TY_u32, (match section_fixup with + None -> (IMM 0L) + | Some s -> (F_SZ s))); + WORD (TY_u32, (IMM (match sh_link with + None -> 0L + | Some i -> i))); + WORD (TY_u32, (IMM 0L)); (* sh_info *) + WORD (TY_u32, (IMM sh_addralign)); + WORD (TY_u32, (IMM sh_entsize)); + |] +;; + + +type p_type = + PT_NULL + | PT_LOAD + | PT_DYNAMIC + | PT_INTERP + | PT_NOTE + | PT_SHLIB + | PT_PHDR +;; + + +type p_flag = + PF_X + | PF_W + | PF_R +;; + + +let program_header + ~(p_type:p_type) + ~(segment_fixup:fixup) + ~(p_flags:p_flag list) + ~(p_align:int64) + : frag = + SEQ + [| + WORD (TY_u32, (IMM (match p_type with + PT_NULL -> 0L + | PT_LOAD -> 1L + | PT_DYNAMIC -> 2L + | PT_INTERP -> 3L + | PT_NOTE -> 4L + | PT_SHLIB -> 5L + | PT_PHDR -> 6L))); + WORD (TY_u32, (F_POS segment_fixup)); + WORD (TY_u32, (M_POS segment_fixup)); + WORD (TY_u32, (M_POS segment_fixup)); + WORD (TY_u32, (F_SZ segment_fixup)); + WORD (TY_u32, (M_SZ segment_fixup)); + WORD (TY_u32, (IMM (fold_flags + (fun f -> + match f with + PF_X -> 0x1L + | PF_W -> 0x2L + | PF_R -> 0x4L) + p_flags))); + WORD (TY_u32, (IMM p_align)); + |] +;; + + +type st_bind = + STB_LOCAL + | STB_GLOBAL + | STB_WEAK +;; + + +type st_type = + STT_NOTYPE + | STT_OBJECT + | STT_FUNC + | STT_SECTION + | STT_FILE +;; + + +(* Special symbol-section indices *) +let shn_UNDEF = 0L;; +let shn_ABS = 0xfff1L;; +let shn_ABS = 0xfff2L;; + + +let symbol + ~(string_table_fixup:fixup) + ~(name_string_fixup:fixup) + ~(sym_target_fixup:fixup option) + ~(st_bind:st_bind) + ~(st_type:st_type) + ~(st_shndx:int64) + : frag = + let st_bind_num = + match st_bind with + STB_LOCAL -> 0L + | STB_GLOBAL -> 1L + | STB_WEAK -> 2L + in + let st_type_num = + match st_type with + STT_NOTYPE -> 0L + | STT_OBJECT -> 1L + | STT_FUNC -> 2L + | STT_SECTION -> 3L + | STT_FILE -> 4L + in + SEQ + [| + WORD (TY_u32, (SUB + ((F_POS name_string_fixup), + (F_POS string_table_fixup)))); + WORD (TY_u32, (match sym_target_fixup with + None -> (IMM 0L) + | Some f -> (M_POS f))); + WORD (TY_u32, (match sym_target_fixup with + None -> (IMM 0L) + | Some f -> (M_SZ f))); + WORD (TY_u8, (* st_info *) + (OR + ((SLL ((IMM st_bind_num), 4)), + (AND ((IMM st_type_num), (IMM 0xfL)))))); + WORD (TY_u8, (IMM 0L)); (* st_other *) + WORD (TY_u16, (IMM st_shndx)); + |] +;; + +type d_tag = + DT_NULL + | DT_NEEDED + | DT_PLTRELSZ + | DT_PLTGOT + | DT_HASH + | DT_STRTAB + | DT_SYMTAB + | DT_RELA + | DT_RELASZ + | DT_RELAENT + | DT_STRSZ + | DT_SYMENT + | DT_INIT + | DT_FINI + | DT_SONAME + | DT_RPATH + | DT_SYMBOLIC + | DT_REL + | DT_RELSZ + | DT_RELENT + | DT_PLTREL + | DT_DEBUG + | DT_TEXTREL + | DT_JMPREL + | DT_BIND_NOW + | DT_INIT_ARRAY + | DT_FINI_ARRAY + | DT_INIT_ARRAYSZ + | DT_FINI_ARRAYSZ + | DT_RUNPATH + | DT_FLAGS + | DT_ENCODING + | DT_PREINIT_ARRAY + | DT_PREINIT_ARRAYSZ +;; + +type elf32_dyn = (d_tag * expr64);; + +let elf32_num_of_dyn_tag tag = + match tag with + DT_NULL -> 0L + | DT_NEEDED -> 1L + | DT_PLTRELSZ -> 2L + | DT_PLTGOT -> 3L + | DT_HASH -> 4L + | DT_STRTAB -> 5L + | DT_SYMTAB -> 6L + | DT_RELA -> 7L + | DT_RELASZ -> 8L + | DT_RELAENT -> 9L + | DT_STRSZ -> 10L + | DT_SYMENT -> 11L + | DT_INIT -> 12L + | DT_FINI -> 13L + | DT_SONAME -> 14L + | DT_RPATH -> 15L + | DT_SYMBOLIC -> 16L + | DT_REL -> 17L + | DT_RELSZ -> 18L + | DT_RELENT -> 19L + | DT_PLTREL -> 20L + | DT_DEBUG -> 21L + | DT_TEXTREL -> 22L + | DT_JMPREL -> 23L + | DT_BIND_NOW -> 24L + | DT_INIT_ARRAY -> 25L + | DT_FINI_ARRAY -> 26L + | DT_INIT_ARRAYSZ -> 27L + | DT_FINI_ARRAYSZ -> 28L + | DT_RUNPATH -> 29L + | DT_FLAGS -> 30L + | DT_ENCODING -> 31L + | DT_PREINIT_ARRAY -> 32L + | DT_PREINIT_ARRAYSZ -> 33L +;; + +let elf32_dyn_frag d = + let (tag, expr) = d in + let tagval = elf32_num_of_dyn_tag tag in + SEQ [| WORD (TY_u32, (IMM tagval)); WORD (TY_u32, expr) |] +;; + +type elf32_386_reloc_type = + R_386_NONE + | R_386_32 + | R_386_PC32 + | R_386_GOT32 + | R_386_PLT32 + | R_386_COPY + | R_386_GLOB_DAT + | R_386_JMP_SLOT + | R_386_RELATIVE + | R_386_GOTOFF + | R_386_GOTPC +;; + + +type elf32_386_rela = + { elf32_386_rela_type: elf32_386_reloc_type; + elf32_386_rela_offset: expr64; + elf32_386_rela_sym: expr64; + elf32_386_rela_addend: expr64 } +;; + +let elf32_386_rela_frag r = + let type_val = + match r.elf32_386_rela_type with + R_386_NONE -> 0L + | R_386_32 -> 1L + | R_386_PC32 -> 2L + | R_386_GOT32 -> 3L + | R_386_PLT32 -> 4L + | R_386_COPY -> 5L + | R_386_GLOB_DAT -> 6L + | R_386_JMP_SLOT -> 7L + | R_386_RELATIVE -> 8L + | R_386_GOTOFF -> 9L + | R_386_GOTPC -> 10L + in + let info_expr = + WORD (TY_u32, + (OR + (SLL ((r.elf32_386_rela_sym), 8), + AND ((IMM 0xffL), (IMM type_val))))) + in + SEQ [| WORD (TY_u32, r.elf32_386_rela_offset); + info_expr; + WORD (TY_u32, r.elf32_386_rela_addend) |] +;; + + +let elf32_linux_x86_file + ~(sess:Session.sess) + ~(crate:Ast.crate) + ~(entry_name:string) + ~(text_frags:(string option, frag) Hashtbl.t) + ~(data_frags:(string option, frag) Hashtbl.t) + ~(rodata_frags:(string option, frag) Hashtbl.t) + ~(required_fixups:(string, fixup) Hashtbl.t) + ~(dwarf:Dwarf.debug_records) + ~(sem:Semant.ctxt) + ~(needed_libs:string array) + : frag = + + (* Procedure Linkage Tables (PLTs), Global Offset Tables + * (GOTs), and the relocations that set them up: + * + * The PLT goes in a section called .plt and GOT in a section called + * .got. The portion of the GOT that holds PLT jump slots goes in a + * section called .got.plt. Dynamic relocations for these jump slots go in + * section .rela.plt. + * + * The easiest way to understand the PLT/GOT system is to draw it: + * + * PLT GOT + * +----------------------+ +----------------------+ + * 0| push & 0| + * | jmp *GOT[2] 1| + * | 2| & + * 1| jmp *GOT[3] 3| & <'push 0' in PLT[1]> + * | push 0 4| & <'push 1' in PLT[2]> + * | jmp *PLT[0] 5| & <'push 2' in PLT[3]> + * | + * 2| jmp *GOT[4] + * | push 1 + * | jmp *PLT[0] + * | + * 2| jmp *GOT[5] + * | push 2 + * | jmp *PLT[0] + * + * + * In normal user code, we call PLT entries with a call to a + * PC-relative address, the PLT entry, which itself does an indirect + * jump through a slot in the GOT that it also addresses + * PC-relative. This makes the whole scheme PIC. + * + * The linker fills in the GOT on startup. For the first 3, it uses + * its own thinking. For the remainder it needs to be instructed to + * fill them in with "jump slot relocs", type R_386_JUMP_SLOT, each + * of which says in effect which PLT entry it's to point back to and + * which symbol it's to be resolved to later. These relocs go in the + * section .rela.plt. + *) + + let plt0_fixup = new_fixup "PLT[0]" in + let got_prefix = SEQ [| WORD (TY_u32, (IMM 0L)); + WORD (TY_u32, (IMM 0L)); + WORD (TY_u32, (IMM 0L)); |] + in + + let got_cell reg i = + let got_entry_off = Int64.of_int (i*4) in + let got_entry_mem = Il.RegIn (reg, (Some (Asm.IMM got_entry_off))) in + Il.Mem (got_entry_mem, Il.ScalarTy (Il.AddrTy Il.CodeTy)) + in + + let got_code_cell reg i = + Il.CodePtr (Il.Cell (got_cell reg i)) + in + + let plt0_frag = + let reg = Il.Hreg X86.eax in + let e = X86.new_emitter_without_vregs () in + Il.emit e (Il.Push (Il.Cell (got_cell reg 1))); + Il.emit e (Il.jmp Il.JMP (got_code_cell reg 2)); + Il.emit e Il.Nop; + Il.emit e Il.Nop; + Il.emit e Il.Nop; + Il.emit e Il.Nop; + DEF (plt0_fixup, (X86.frags_of_emitted_quads sess e)) + in + + (* + * The existence of the GOT/PLT mish-mash causes, therefore, the + * following new sections: + * + * .plt - the PLT itself, in the r/x text segment + * .got.plt - the PLT-used portion of the GOT, in the r/w segment + * .rela.plt - the dynamic relocs for the GOT-PLT, in the r/x segment + * + * In addition, because we're starting up a dynamically linked executable, + * we have to have several more sections! + * + * .interp - the read-only section that names ld.so + * .dynsym - symbols named by the PLT/GOT entries, r/x segment + * .dynstr - string-names used in those symbols, r/x segment + * .hash - hashtable in which to look these up, r/x segment + * .dynamic - the machine-readable description of the dynamic + * linkage requirements of this elf file, in the + * r/w _DYNAMIC segment + * + * The Dynamic section contains a sequence of 2-word records of type + * d_tag. + * + *) + + (* There are 17 official section headers in the file we're making: *) + (* *) + (* section 0: *) + (* *) + (* section 1: .interp (segment 1: R+X, INTERP) *) + (* *) + (* section 2: .text (segment 2: R+X, LOAD) *) + (* section 3: .rodata ... *) + (* section 4: .dynsym ... *) + (* section 5: .dynstr ... *) + (* section 6: .hash ... *) + (* section 7: .plt ... *) + (* section 8: .got ... *) + (* section 9: .rela.plt ... *) + (* *) + (* section 10: .data (segment 3: R+W, LOAD) *) + (* section 11: .bss ... *) + (* *) + (* section 12: .dynamic (segment 4: R+W, DYNAMIC) *) + (* *) + (* section 13: .shstrtab (not in a segment) *) + (* section 14: .debug_aranges (segment 2: cont'd) *) + (* section 15: .debug_pubnames ... *) + (* section 14: .debug_info ... *) + (* section 15: .debug_abbrev ... *) + (* section 14: .debug_line ... *) + (* section 15: .debug_frame ... *) + (* section 16: .note..rust (segment 5: NOTE) *) + + let sname s = + new_fixup (Printf.sprintf "string name of '%s' section" s) + in + let null_section_name_fixup = sname "" in + let interp_section_name_fixup = sname ".interp"in + let text_section_name_fixup = sname ".text" in + let rodata_section_name_fixup = sname ".rodata" in + let dynsym_section_name_fixup = sname ".dynsym" in + let dynstr_section_name_fixup = sname ".dynstr" in + let hash_section_name_fixup = sname ".hash" in + let plt_section_name_fixup = sname ".plt" in + let got_plt_section_name_fixup = sname ".got.plt" in + let rela_plt_section_name_fixup = sname ".rela.plt" in + let data_section_name_fixup = sname ".data" in + let bss_section_name_fixup = sname ".bss" in + let dynamic_section_name_fixup = sname ".dynamic" in + let shstrtab_section_name_fixup = sname ".shstrtab" in + let debug_aranges_section_name_fixup = sname ".debug_aranges" in + let debug_pubnames_section_name_fixup = sname ".debug_pubnames" in + let debug_info_section_name_fixup = sname ".debug_info" in + let debug_abbrev_section_name_fixup = sname ".debug_abbrev" in + let debug_line_section_name_fixup = sname ".debug_line" in + let debug_frame_section_name_fixup = sname ".debug_frame" in + let note_rust_section_name_fixup = sname ".note.rust" in + + (* let interpndx = 1L in *) (* Section index of .interp *) + let textndx = 2L in (* Section index of .text *) + let rodatandx = 3L in (* Section index of .rodata *) + let dynsymndx = 4L in (* Section index of .dynsym *) + let dynstrndx = 5L in (* Section index of .dynstr *) + (* let hashndx = 6L in *) (* Section index of .hash *) + (* let pltndx = 7L in *) (* Section index of .plt *) + (* let gotpltndx = 8L in *) (* Section index of .got.plt *) + (* let relapltndx = 9L in *) (* Section index of .rela.plt *) + let datandx = 10L in (* Section index of .data *) + (* let bssndx = 11L in *) (* Section index of .bss *) + (* let dynamicndx = 12L in *) (* Section index of .dynamic *) + let shstrtabndx = 13L in (* Section index of .shstrtab *) + + let section_header_table_fixup = new_fixup ".section header table" in + let interp_section_fixup = new_fixup ".interp section" in + let text_section_fixup = new_fixup ".text section" in + let rodata_section_fixup = new_fixup ".rodata section" in + let dynsym_section_fixup = new_fixup ".dynsym section" in + let dynstr_section_fixup = new_fixup ".dynstr section" in + let hash_section_fixup = new_fixup ".hash section" in + let plt_section_fixup = new_fixup ".plt section" in + let got_plt_section_fixup = new_fixup ".got.plt section" in + let rela_plt_section_fixup = new_fixup ".rela.plt section" in + let data_section_fixup = new_fixup ".data section" in + let bss_section_fixup = new_fixup ".bss section" in + let dynamic_section_fixup = new_fixup ".dynamic section" in + let shstrtab_section_fixup = new_fixup ".shstrtab section" in + let note_rust_section_fixup = new_fixup ".shstrtab section" in + + let shstrtab_section = + SEQ + [| + DEF (null_section_name_fixup, ZSTRING ""); + DEF (interp_section_name_fixup, ZSTRING ".interp"); + DEF (text_section_name_fixup, ZSTRING ".text"); + DEF (rodata_section_name_fixup, ZSTRING ".rodata"); + DEF (dynsym_section_name_fixup, ZSTRING ".dynsym"); + DEF (dynstr_section_name_fixup, ZSTRING ".dynstr"); + DEF (hash_section_name_fixup, ZSTRING ".hash"); + DEF (plt_section_name_fixup, ZSTRING ".plt"); + DEF (got_plt_section_name_fixup, ZSTRING ".got.plt"); + DEF (rela_plt_section_name_fixup, ZSTRING ".rela.plt"); + DEF (data_section_name_fixup, ZSTRING ".data"); + DEF (bss_section_name_fixup, ZSTRING ".bss"); + DEF (dynamic_section_name_fixup, ZSTRING ".dynamic"); + DEF (shstrtab_section_name_fixup, ZSTRING ".shstrtab"); + DEF (debug_aranges_section_name_fixup, ZSTRING ".debug_aranges"); + DEF (debug_pubnames_section_name_fixup, ZSTRING ".debug_pubnames"); + DEF (debug_info_section_name_fixup, ZSTRING ".debug_info"); + DEF (debug_abbrev_section_name_fixup, ZSTRING ".debug_abbrev"); + DEF (debug_line_section_name_fixup, ZSTRING ".debug_line"); + DEF (debug_frame_section_name_fixup, ZSTRING ".debug_frame"); + DEF (note_rust_section_name_fixup, ZSTRING ".note.rust"); + |] + in + + let section_headers = + [| + (* *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: null_section_name_fixup + ~sh_type: SHT_NULL + ~sh_flags: [] + ~section_fixup: None + ~sh_addralign: 0L + ~sh_entsize: 0L + ~sh_link: None); + + (* .interp *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: interp_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some interp_section_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + + (* .text *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: text_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC; SHF_EXECINSTR ] + ~section_fixup: (Some text_section_fixup) + ~sh_addralign: 32L + ~sh_entsize: 0L + ~sh_link: None); + + (* .rodata *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: rodata_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some rodata_section_fixup) + ~sh_addralign: 32L + ~sh_entsize: 0L + ~sh_link: None); + + (* .dynsym *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: dynsym_section_name_fixup + ~sh_type: SHT_DYNSYM + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some dynsym_section_fixup) + ~sh_addralign: 8L + ~sh_entsize: elf32_symsize + ~sh_link: (Some dynstrndx) ); + + (* .dynstr *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: dynstr_section_name_fixup + ~sh_type: SHT_STRTAB + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some dynstr_section_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + + (* .hash *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: hash_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some hash_section_fixup) + ~sh_addralign: 4L + ~sh_entsize: 4L + ~sh_link: (Some dynsymndx)); + + (* .plt *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: plt_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC; SHF_EXECINSTR ] + ~section_fixup: (Some plt_section_fixup) + ~sh_addralign: 4L + ~sh_entsize: 0L + ~sh_link: None); + + (* .got.plt *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: got_plt_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] + ~section_fixup: (Some got_plt_section_fixup) + ~sh_addralign: 4L + ~sh_entsize: 0L + ~sh_link: None); + + (* .rela.plt *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: rela_plt_section_name_fixup + ~sh_type: SHT_RELA + ~sh_flags: [ SHF_ALLOC ] + ~section_fixup: (Some rela_plt_section_fixup) + ~sh_addralign: 4L + ~sh_entsize: elf32_rela_entsz + ~sh_link: (Some dynsymndx)); + + (* .data *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: data_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] + ~section_fixup: (Some data_section_fixup) + ~sh_addralign: 32L + ~sh_entsize: 0L + ~sh_link: None); + + (* .bss *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: bss_section_name_fixup + ~sh_type: SHT_NOBITS + ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] + ~section_fixup: (Some bss_section_fixup) + ~sh_addralign: 32L + ~sh_entsize: 0L + ~sh_link: None); + + (* .dynamic *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: dynamic_section_name_fixup + ~sh_type: SHT_DYNAMIC + ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] + ~section_fixup: (Some dynamic_section_fixup) + ~sh_addralign: 8L + ~sh_entsize: 0L + ~sh_link: None); + + (* .shstrtab *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: shstrtab_section_name_fixup + ~sh_type: SHT_STRTAB + ~sh_flags: [] + ~section_fixup: (Some shstrtab_section_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + +(* + FIXME: uncomment the dwarf section headers as you make use of them; + recent gdb versions have got fussier about parsing dwarf and don't + like seeing junk there. +*) + + (* .debug_aranges *) +(* + + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_aranges_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_aranges_fixup) + ~sh_addralign: 8L + ~sh_entsize: 0L + ~sh_link: None); +*) + (* .debug_pubnames *) +(* + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_pubnames_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_pubnames_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); +*) + + (* .debug_info *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_info_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_info_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + + (* .debug_abbrev *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_abbrev_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_abbrev_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + (* .debug_line *) +(* + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_line_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_line_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); +*) + + (* .debug_frame *) +(* + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: debug_frame_section_name_fixup + ~sh_type: SHT_PROGBITS + ~sh_flags: [] + ~section_fixup: (Some sem.Semant.ctxt_debug_frame_fixup) + ~sh_addralign: 4L + ~sh_entsize: 0L + ~sh_link: None); +*) + + (* .note.rust *) + (section_header + ~shstring_table_fixup: shstrtab_section_fixup + ~shname_string_fixup: note_rust_section_name_fixup + ~sh_type: SHT_NOTE + ~sh_flags: [] + ~section_fixup: (Some note_rust_section_fixup) + ~sh_addralign: 1L + ~sh_entsize: 0L + ~sh_link: None); + + |] + in + let section_header_table = SEQ section_headers in + + + (* There are 6 official program headers in the file we're making: *) + (* segment 0: RX / PHDR *) + (* segment 1: R / INTERP *) + (* segment 2: RX / LOAD *) + (* segment 3: RW / LOAD *) + (* segment 4: RW / DYNAMIC *) + (* segment 5: R *) + + let program_header_table_fixup = new_fixup "program header table" in + let segment_0_fixup = new_fixup "segment 0" in + let segment_1_fixup = new_fixup "segment 1" in + let segment_2_fixup = new_fixup "segment 2" in + let segment_3_fixup = new_fixup "segment 3" in + let segment_4_fixup = new_fixup "segment 4" in + let segment_5_fixup = new_fixup "segment 5" in + + let segment_0_align = 4 in + let segment_1_align = 1 in + let segment_2_align = 0x1000 in + let segment_3_align = 0x1000 in + let segment_4_align = 0x1000 in + let segment_5_align = 1 in + + let program_headers = [| + (program_header + ~p_type: PT_PHDR + ~segment_fixup: segment_0_fixup + ~p_flags: [ PF_R; PF_X ] + ~p_align: (Int64.of_int segment_0_align)); + (program_header + ~p_type: PT_INTERP + ~segment_fixup: segment_1_fixup + ~p_flags: [ PF_R ] + ~p_align: (Int64.of_int segment_1_align)); + (program_header + ~p_type: PT_LOAD + ~segment_fixup: segment_2_fixup + ~p_flags: [ PF_R; PF_X ] + ~p_align: (Int64.of_int segment_2_align)); + (program_header + ~p_type: PT_LOAD + ~segment_fixup: segment_3_fixup + ~p_flags: [ PF_R; PF_W ] + ~p_align: (Int64.of_int segment_3_align)); + (program_header + ~p_type: PT_DYNAMIC + ~segment_fixup: segment_4_fixup + ~p_flags: [ PF_R; PF_W ] + ~p_align: (Int64.of_int segment_4_align)); + (program_header + ~p_type: PT_NOTE + ~segment_fixup: segment_5_fixup + ~p_flags: [ PF_R;] + ~p_align: (Int64.of_int segment_5_align)); + |] + in + let program_header_table = SEQ program_headers in + + let e_entry_fixup = new_fixup "entry symbol" in + + let elf_header = + elf32_header + ~sess + ~ei_data: ELFDATA2LSB + ~e_type: ET_DYN + ~e_machine: EM_386 + ~e_version: EV_CURRENT + + ~e_entry_fixup: e_entry_fixup + ~e_phoff_fixup: program_header_table_fixup + ~e_shoff_fixup: section_header_table_fixup + ~e_phnum: (Int64.of_int (Array.length program_headers)) + ~e_shnum: (Int64.of_int (Array.length section_headers)) + ~e_shstrndx: shstrtabndx + in + + let n_syms = ref 1 in (* The empty symbol, implicit. *) + + let data_sym name st_bind fixup = + let name_fixup = new_fixup ("data symbol name fixup: '" ^ name ^ "'") in + let strtab_entry = DEF (name_fixup, ZSTRING name) in + let symtab_entry = + symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: name_fixup + ~sym_target_fixup: (Some fixup) + ~st_bind + ~st_type: STT_OBJECT + ~st_shndx: datandx + in + incr n_syms; + (strtab_entry, symtab_entry) + in + + let rodata_sym name st_bind fixup = + let name_fixup = new_fixup ("rodata symbol name fixup: '" ^ name ^ "'") in + let strtab_entry = DEF (name_fixup, ZSTRING name) in + let symtab_entry = + symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: name_fixup + ~sym_target_fixup: (Some fixup) + ~st_bind + ~st_type: STT_OBJECT + ~st_shndx: rodatandx + in + incr n_syms; + (strtab_entry, symtab_entry) + in + + let text_sym name st_bind fixup = + let name_fixup = new_fixup ("text symbol name fixup: '" ^ name ^ "'") in + let strtab_frag = DEF (name_fixup, ZSTRING name) in + let symtab_frag = + symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: name_fixup + ~sym_target_fixup: (Some fixup) + ~st_bind: st_bind + ~st_type: STT_FUNC + ~st_shndx: textndx + in + incr n_syms; + (strtab_frag, symtab_frag) + in + + let require_sym name st_bind _(*fixup*) = + let name_fixup = + new_fixup ("require symbol name fixup: '" ^ name ^ "'") + in + let strtab_frag = DEF (name_fixup, ZSTRING name) in + let symtab_frag = + symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: name_fixup + ~sym_target_fixup: None + ~st_bind + ~st_type: STT_FUNC + ~st_shndx: shn_UNDEF + in + incr n_syms; + (strtab_frag, symtab_frag) + in + + let frags_of_symbol sym_emitter st_bind symname_opt symbody x = + let (strtab_frags, symtab_frags, body_frags) = x in + let (strtab_frag, symtab_frag, body_frag) = + match symname_opt with + None -> (MARK, MARK, symbody) + | Some symname -> + let body_fixup = + new_fixup ("symbol body fixup: '" ^ symname ^ "'") + in + let body = + if symname = entry_name + then DEF (e_entry_fixup, DEF (body_fixup, symbody)) + else DEF (body_fixup, symbody) + in + let (str, sym) = sym_emitter symname st_bind body_fixup in + (str, sym, body) + in + ((strtab_frag :: strtab_frags), + (symtab_frag :: symtab_frags), + (body_frag :: body_frags)) + in + + let frags_of_require_symbol sym_emitter st_bind symname plt_entry_fixup x = + let (i, strtab_frags, symtab_frags, + plt_frags, got_plt_frags, rela_plt_frags) = x in + let (strtab_frag, symtab_frag) = sym_emitter symname st_bind None in + let e = X86.new_emitter_without_vregs () in + let jump_slot_fixup = new_fixup ("jump slot #" ^ string_of_int i) in + let jump_slot_initial_target_fixup = + new_fixup ("jump slot #" ^ string_of_int i ^ " initial target") in + + (* You may notice this PLT entry doesn't look like either of the + * types of "normal" PLT entries outlined in the ELF manual. It is, + * however, just what you get when you combine a PIC PLT entry with + * inline calls to the horrible __i686.get_pc_thunk.ax kludge used + * on x86 to support entering PIC PLTs. We're just doing it *in* + * the PLT entries rather than infecting all the callers with the + * obligation of having the GOT address in a register on + * PLT-entry. + *) + + let plt_frag = + let (reg, _, _) = X86.get_next_pc_thunk in + + Il.emit_full e (Some plt_entry_fixup) [] Il.Dead; + + Abi.load_fixup_addr e reg got_plt_section_fixup Il.CodeTy; + + Il.emit e (Il.jmp Il.JMP (got_code_cell reg (2+i))); + + Il.emit_full e (Some jump_slot_initial_target_fixup) + [] (Il.Push (X86.immi (Int64.of_int i))); + + Il.emit e (Il.jmp Il.JMP (Il.direct_code_ptr plt0_fixup)); + X86.frags_of_emitted_quads sess e + in + let got_plt_frag = + DEF (jump_slot_fixup, + WORD (TY_u32, (M_POS jump_slot_initial_target_fixup))) + in + let rela_plt = + { elf32_386_rela_type = R_386_JMP_SLOT; + elf32_386_rela_offset = (M_POS jump_slot_fixup); + elf32_386_rela_sym = (IMM (Int64.of_int i)); + elf32_386_rela_addend = (IMM 0L) } + in + let rela_plt_frag = elf32_386_rela_frag rela_plt in + (i+1, + (strtab_frag :: strtab_frags), + (symtab_frag :: symtab_frags), + (plt_frag :: plt_frags), + (got_plt_frag :: got_plt_frags), + (rela_plt_frag :: rela_plt_frags)) + in + + (* Emit text export symbols. *) + let (global_text_strtab_frags, global_text_symtab_frags) = + match htab_search sem.Semant.ctxt_native_provided SEG_text with + None -> ([], []) + | Some etab -> + Hashtbl.fold + begin + fun name fix x -> + let (strtab_frags, symtab_frags) = x in + let (str, sym) = text_sym name STB_GLOBAL fix in + (str :: strtab_frags, + sym :: symtab_frags) + end + etab + ([],[]) + in + + (* Emit text fragments (possibly named). *) + let (global_text_strtab_frags, + global_text_symtab_frags, + text_body_frags) = + Hashtbl.fold + (frags_of_symbol text_sym STB_GLOBAL) + text_frags + (global_text_strtab_frags, global_text_symtab_frags, []) + in + + let (local_text_strtab_frags, + local_text_symtab_frags) = + + let symbol_frags_of_code _ code accum = + let (strtab_frags, symtab_frags) = accum in + let fix = code.Semant.code_fixup in + let (strtab_frag, symtab_frag) = + text_sym fix.fixup_name STB_LOCAL fix + in + (strtab_frag :: strtab_frags, + symtab_frag :: symtab_frags) + in + + let symbol_frags_of_glue_code g code accum = + let (strtab_frags, symtab_frags) = accum in + let fix = code.Semant.code_fixup in + let (strtab_frag, symtab_frag) = + text_sym (Semant.glue_str sem g) STB_LOCAL fix + in + (strtab_frag :: strtab_frags, + symtab_frag :: symtab_frags) + in + + let item_str_frags, item_sym_frags = + Hashtbl.fold symbol_frags_of_code + sem.Semant.ctxt_all_item_code ([], []) + in + let glue_str_frags, glue_sym_frags = + Hashtbl.fold symbol_frags_of_glue_code + sem.Semant.ctxt_glue_code ([], []) + in + (item_str_frags @ glue_str_frags, + item_sym_frags @ glue_sym_frags) + in + + (* Emit rodata export symbols. *) + let (rodata_strtab_frags, rodata_symtab_frags) = + match htab_search sem.Semant.ctxt_native_provided SEG_data with + None -> ([], []) + | Some etab -> + Hashtbl.fold + begin + fun name fix x -> + let (strtab_frags, symtab_frags) = x in + let (str, sym) = rodata_sym name STB_GLOBAL fix in + (str :: strtab_frags, + sym :: symtab_frags) + end + etab + ([],[]) + in + + (* Emit rodata fragments (possibly named). *) + let (rodata_strtab_frags, + rodata_symtab_frags, + rodata_body_frags) = + Hashtbl.fold + (frags_of_symbol rodata_sym STB_GLOBAL) + rodata_frags + (rodata_strtab_frags, rodata_symtab_frags, []) + in + + + let (data_strtab_frags, + data_symtab_frags, + data_body_frags) = + Hashtbl.fold (frags_of_symbol data_sym STB_GLOBAL) data_frags ([],[],[]) + in + + let (_, + require_strtab_frags, + require_symtab_frags, + plt_frags, + got_plt_frags, + rela_plt_frags) = + Hashtbl.fold (frags_of_require_symbol require_sym STB_GLOBAL) + required_fixups + (1,[],[],[plt0_frag],[got_prefix],[]) + in + let require_symtab_frags = List.rev require_symtab_frags in + let plt_frags = List.rev plt_frags in + let got_plt_frags = List.rev got_plt_frags in + let rela_plt_frags = List.rev rela_plt_frags in + + let dynamic_needed_strtab_frags = + Array.make (Array.length needed_libs) MARK + in + + let dynamic_frags = + let dynamic_needed_frags = Array.make (Array.length needed_libs) MARK in + for i = 0 to (Array.length needed_libs) - 1 do + let fixup = + new_fixup ("needed library name fixup: " ^ needed_libs.(i)) + in + dynamic_needed_frags.(i) <- + elf32_dyn_frag (DT_NEEDED, SUB (M_POS fixup, + M_POS dynstr_section_fixup)); + dynamic_needed_strtab_frags.(i) <- + DEF (fixup, ZSTRING needed_libs.(i)) + done; + (SEQ [| + SEQ dynamic_needed_frags; + elf32_dyn_frag (DT_STRTAB, M_POS dynstr_section_fixup); + elf32_dyn_frag (DT_STRSZ, M_SZ dynstr_section_fixup); + + elf32_dyn_frag (DT_SYMTAB, M_POS dynsym_section_fixup); + elf32_dyn_frag (DT_SYMENT, IMM elf32_symsize); + + elf32_dyn_frag (DT_HASH, M_POS hash_section_fixup); + elf32_dyn_frag (DT_PLTGOT, M_POS got_plt_section_fixup); + + elf32_dyn_frag (DT_PLTREL, IMM (elf32_num_of_dyn_tag DT_RELA)); + elf32_dyn_frag (DT_PLTRELSZ, M_SZ rela_plt_section_fixup); + elf32_dyn_frag (DT_JMPREL, M_POS rela_plt_section_fixup); + + elf32_dyn_frag (DT_NULL, IMM 0L) + |]) + in + + let null_strtab_fixup = new_fixup "null dynstrtab entry" in + let null_strtab_frag = DEF (null_strtab_fixup, ZSTRING "") in + let null_symtab_frag = (symbol + ~string_table_fixup: dynstr_section_fixup + ~name_string_fixup: null_strtab_fixup + ~sym_target_fixup: None + ~st_bind: STB_LOCAL + ~st_type: STT_NOTYPE + ~st_shndx: 0L) in + + let dynsym_frags = (null_symtab_frag :: + (require_symtab_frags @ + global_text_symtab_frags @ + local_text_symtab_frags @ + rodata_symtab_frags @ + data_symtab_frags)) + in + + let dynstr_frags = (null_strtab_frag :: + (require_strtab_frags @ + global_text_strtab_frags @ + local_text_strtab_frags @ + rodata_strtab_frags @ + data_strtab_frags @ + (Array.to_list dynamic_needed_strtab_frags))) + in + + let interp_section = + DEF (interp_section_fixup, ZSTRING "/lib/ld-linux.so.2") + in + + let text_section = + DEF (text_section_fixup, + SEQ (Array.of_list text_body_frags)) + in + let rodata_section = + DEF (rodata_section_fixup, + SEQ (Array.of_list rodata_body_frags)) + in + let data_section = + DEF (data_section_fixup, + SEQ (Array.of_list data_body_frags)) + in + let bss_section = + DEF (bss_section_fixup, + SEQ [| |]) + in + let dynsym_section = + DEF (dynsym_section_fixup, + SEQ (Array.of_list dynsym_frags)) + in + let dynstr_section = + DEF (dynstr_section_fixup, + SEQ (Array.of_list dynstr_frags)) + in + + let hash_section = + let n_syms = !n_syms in + + DEF (hash_section_fixup, + (* Worst hashtable ever: one chain. *) + SEQ [| + WORD (TY_u32, IMM 1L); (* nbucket *) + WORD (TY_u32, (* nchain *) + IMM (Int64.of_int n_syms)); + WORD (TY_u32, IMM 1L); (* bucket 0 => symbol 1. *) + SEQ + begin + Array.init + n_syms + (fun i -> + let next = (* chain[i] => if last then 0 else i+1 *) + if i > 0 && i < (n_syms-1) + then Int64.of_int (i+1) + else 0L + in + WORD (TY_u32, IMM next)) + end; + |]) + in + + let plt_section = + DEF (plt_section_fixup, + SEQ (Array.of_list plt_frags)) + in + + let got_plt_section = + DEF (got_plt_section_fixup, + SEQ (Array.of_list got_plt_frags)) + in + + let rela_plt_section = + DEF (rela_plt_section_fixup, + SEQ (Array.of_list rela_plt_frags)) + in + + let dynamic_section = + DEF (dynamic_section_fixup, dynamic_frags) + in + + let note_rust_section = + DEF (note_rust_section_fixup, + (Asm.note_rust_frags crate.node.Ast.crate_meta)) + in + + + let page_alignment = 0x1000 in + + let align_both i = + ALIGN_FILE (page_alignment, + (ALIGN_MEM (page_alignment, i))) + in + + let def_aligned f i = + align_both + (SEQ [| DEF(f,i); + (align_both MARK)|]) + in + + let debug_aranges_section = + def_aligned + sem.Semant.ctxt_debug_aranges_fixup + dwarf.Dwarf.debug_aranges + in + let debug_pubnames_section = + def_aligned + sem.Semant.ctxt_debug_pubnames_fixup + dwarf.Dwarf.debug_pubnames + in + let debug_info_section = + def_aligned + sem.Semant.ctxt_debug_info_fixup + dwarf.Dwarf.debug_info + in + let debug_abbrev_section = + def_aligned + sem.Semant.ctxt_debug_abbrev_fixup + dwarf.Dwarf.debug_abbrev + in + let debug_line_section = + def_aligned + sem.Semant.ctxt_debug_line_fixup + dwarf.Dwarf.debug_line + in + let debug_frame_section = + def_aligned sem.Semant.ctxt_debug_frame_fixup dwarf.Dwarf.debug_frame + in + + let load_address = 0x0804_8000L in + + SEQ + [| + MEMPOS load_address; + ALIGN_FILE + (segment_2_align, + DEF + (segment_2_fixup, + SEQ + [| + DEF (sem.Semant.ctxt_image_base_fixup, MARK); + elf_header; + ALIGN_FILE + (segment_0_align, + DEF + (segment_0_fixup, + SEQ + [| + DEF (program_header_table_fixup, + program_header_table); + |])); + ALIGN_FILE + (segment_1_align, + DEF (segment_1_fixup, interp_section)); + text_section; + rodata_section; + dynsym_section; + dynstr_section; + hash_section; + plt_section; + rela_plt_section; + debug_aranges_section; + debug_pubnames_section; + debug_info_section; + debug_abbrev_section; + debug_line_section; + debug_frame_section; + |])); + ALIGN_FILE + (segment_3_align, + DEF + (segment_3_fixup, + SEQ + [| + data_section; + got_plt_section; + bss_section; + ALIGN_FILE + (segment_4_align, + DEF (segment_4_fixup, + dynamic_section)); + ALIGN_FILE + (segment_5_align, + DEF (segment_5_fixup, + note_rust_section)); + |])); + DEF (shstrtab_section_fixup, + shstrtab_section); + DEF (section_header_table_fixup, + section_header_table); + |] +;; + +let emit_file + (sess:Session.sess) + (crate:Ast.crate) + (code:Asm.frag) + (data:Asm.frag) + (sem:Semant.ctxt) + (dwarf:Dwarf.debug_records) + : unit = + + let text_frags = Hashtbl.create 4 in + let rodata_frags = Hashtbl.create 4 in + let data_frags = Hashtbl.create 4 in + let required_fixups = Hashtbl.create 4 in + + (* + * Startup on elf-linux is more complex than in win32. It's + * thankfully documented in some detail around the net. + * + * - The elf entry address is for _start. + * + * - _start pushes: + * + * eax (should be zero) + * esp (holding the kernel-provided stack end) + * edx (address of _rtld_fini) + * address of _fini + * address of _init + * ecx (argv) + * esi (argc) + * address of main + * + * and then calls __libc_start_main@plt. + * + * - This means any sensible binary has a PLT. Fun. So + * We call into the PLT, which itself is just a bunch + * of indirect jumps through slots in the GOT, and wind + * up in __libc_start_main. Which calls _init, then + * essentially exit(main(argc,argv)). + *) + + + let init_fixup = new_fixup "_init function entry" in + let fini_fixup = new_fixup "_fini function entry" in + let (start_fixup, rust_start_fixup) = + if sess.Session.sess_library_mode + then (None, None) + else (Some (new_fixup "start function entry"), + Some (Semant.require_native sem REQUIRED_LIB_rustrt "rust_start")) + in + let libc_start_main_fixup = new_fixup "__libc_start_main@plt stub" in + + let start_fn _ = + let start_fixup = + match start_fixup with + None -> bug () "missing start fixup in non-library mode" + | Some s -> s + in + let e = X86.new_emitter_without_vregs () in + let push_r32 r = Il.emit e + (Il.Push (Il.Cell (Il.Reg (Il.Hreg r, Il.ValTy Il.Bits32)))) + in + let push_pos32 = X86.push_pos32 e in + + Il.emit e (Il.unary Il.UMOV (X86.rc X86.ebp) (X86.immi 0L)); + Il.emit e (Il.Pop (X86.rc X86.esi)); + Il.emit e (Il.unary Il.UMOV (X86.rc X86.ecx) (X86.ro X86.esp)); + Il.emit e (Il.binary Il.AND + (X86.rc X86.esp) (X86.ro X86.esp) + (X86.immi 0xfffffffffffffff0L)); + + push_r32 X86.eax; + push_r32 X86.esp; + push_r32 X86.edx; + push_pos32 fini_fixup; + push_pos32 init_fixup; + push_r32 X86.ecx; + push_r32 X86.esi; + push_pos32 start_fixup; + Il.emit e (Il.call + (Il.Reg (Il.Hreg X86.eax, Il.ValTy Il.Bits32)) + (Il.direct_code_ptr libc_start_main_fixup)); + X86.frags_of_emitted_quads sess e + in + + let do_nothing_fn _ = + let e = X86.new_emitter_without_vregs () in + Il.emit e Il.Ret; + X86.frags_of_emitted_quads sess e + in + + let main_fn _ = + match (start_fixup, rust_start_fixup, sem.Semant.ctxt_main_fn_fixup) with + (None, _, _) + | (_, None, _) + | (_, _, None) -> MARK + | (Some start_fixup, + Some rust_start_fixup, + Some main_fn_fixup) -> + let e = X86.new_emitter_without_vregs () in + X86.objfile_start e + ~start_fixup + ~rust_start_fixup + ~main_fn_fixup + ~crate_fixup: sem.Semant.ctxt_crate_fixup + ~indirect_start: false; + X86.frags_of_emitted_quads sess e + in + + let needed_libs = + [| + "libc.so.6"; + "librustrt.so" + |] + in + + let _ = + if not sess.Session.sess_library_mode + then + begin + htab_put text_frags (Some "_start") (start_fn()); + htab_put text_frags (Some "_init") + (DEF (init_fixup, do_nothing_fn())); + htab_put text_frags (Some "_fini") + (DEF (fini_fixup, do_nothing_fn())); + htab_put text_frags (Some "main") (main_fn ()); + htab_put required_fixups "__libc_start_main" libc_start_main_fixup; + end; + htab_put text_frags None code; + htab_put rodata_frags None data; + + Hashtbl.iter + begin + fun _ tab -> + Hashtbl.iter + begin + fun name fixup -> + htab_put required_fixups name fixup + end + tab + end + sem.Semant.ctxt_native_required + in + let all_frags = + elf32_linux_x86_file + ~sess + ~crate + ~entry_name: "_start" + ~text_frags + ~data_frags + ~dwarf + ~sem + ~rodata_frags + ~required_fixups + ~needed_libs + in + write_out_frag sess true all_frags +;; + +let elf_magic = "\x7fELF";; + +let sniff + (sess:Session.sess) + (filename:filename) + : asm_reader option = + try + let stat = Unix.stat filename in + if (stat.Unix.st_kind = Unix.S_REG) && + (stat.Unix.st_size > 4) + then + let ar = new_asm_reader sess filename in + let _ = log sess "sniffing ELF file" in + if (ar.asm_get_zstr_padded 4) = elf_magic + then (ar.asm_seek 0; Some ar) + else None + else + None + with + _ -> None +;; + +let get_sections + (sess:Session.sess) + (ar:asm_reader) + : (string,(int*int)) Hashtbl.t = + let sects = Hashtbl.create 0 in + let _ = log sess "reading sections" in + let elf_id = ar.asm_get_zstr_padded 4 in + let _ = assert (elf_id = elf_magic) in + + let _ = ar.asm_seek 0x10 in + let _ = ar.asm_adv_u16 () in (* e_type *) + let _ = ar.asm_adv_u16 () in (* e_machine *) + let _ = ar.asm_adv_u32 () in (* e_version *) + let _ = ar.asm_adv_u32 () in (* e_entry *) + let _ = ar.asm_adv_u32 () in (* e_phoff *) + let e_shoff = ar.asm_get_u32 () in (* e_shoff *) + let _ = ar.asm_adv_u32 () in (* e_flags *) + let _ = ar.asm_adv_u16 () in (* e_ehsize *) + let _ = ar.asm_adv_u16 () in (* e_phentsize *) + let _ = ar.asm_adv_u16 () in (* e_phnum *) + let e_shentsize = ar.asm_get_u16 () in + let e_shnum = ar.asm_get_u16 () in + let e_shstrndx = ar.asm_get_u16 () in + let _ = log sess + "%d ELF section headers, %d bytes each, starting at 0x%x" + e_shnum e_shentsize e_shoff + in + let _ = log sess "section %d is .shstrtab" e_shstrndx in + + let read_section_hdr n = + let _ = ar.asm_seek (e_shoff + n * e_shentsize) in + let str_off = ar.asm_get_u32() in + let _ = ar.asm_adv_u32() in (* sh_type *) + let _ = ar.asm_adv_u32() in (* sh_flags *) + let _ = ar.asm_adv_u32() in (* sh_addr *) + let off = ar.asm_get_u32() in (* sh_off *) + let size = ar.asm_get_u32() in (* sh_size *) + let _ = ar.asm_adv_u32() in (* sh_link *) + let _ = ar.asm_adv_u32() in (* sh_info *) + let _ = ar.asm_adv_u32() in (* sh_addralign *) + let _ = ar.asm_adv_u32() in (* sh_entsize *) + (str_off, off, size) + in + + let (_, str_base, _) = read_section_hdr e_shstrndx in + + let _ = ar.asm_seek e_shoff in + for i = 0 to (e_shnum - 1) do + let (str_off, off, size) = read_section_hdr i in + let _ = ar.asm_seek (str_base + str_off) in + let name = ar.asm_get_zstr() in + log sess "section %d: %s, size %d, offset 0x%x" i name size off; + Hashtbl.add sects name (off, size); + done; + sects +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/be/il.ml b/src/boot/be/il.ml new file mode 100644 index 00000000000..e095e627b71 --- /dev/null +++ b/src/boot/be/il.ml @@ -0,0 +1,1135 @@ +open Common;; + +(* FIXME (issue #1): thread a session object through this eventually. *) +let log_iltypes = ref false;; + +(* IL type system, very rudimentary. *) + +type bits = + Bits8 + | Bits16 + | Bits32 + | Bits64 +;; + +type scalar_ty = + ValTy of bits + | AddrTy of referent_ty + +and referent_ty = + ScalarTy of scalar_ty + | StructTy of referent_ty array + | UnionTy of referent_ty array + | ParamTy of ty_param_idx (* Thing of current-frame type-param #n *) + | OpaqueTy (* Unknown memory-resident thing. *) + | CodeTy (* Executable machine code. *) + | NilTy (* 0 bits of space. *) +;; + +let (voidptr_t:scalar_ty) = AddrTy OpaqueTy;; +let (codeptr_t:scalar_ty) = AddrTy CodeTy;; + +(* Operands. *) + +type vreg = int ;; +type hreg = int ;; +type label = int ;; +type spill = int ;; + +type reg = + Vreg of vreg + | Hreg of hreg +;; + +type mem = + Abs of Asm.expr64 + | RegIn of (reg * (Asm.expr64 option)) + | Spill of spill +;; + +type typed_reg = (reg * scalar_ty);; +type typed_mem = (mem * referent_ty);; +type typed_imm = (Asm.expr64 * ty_mach);; +type typed_imm_ptr = (fixup * referent_ty);; + +type cell = + Reg of typed_reg + | Mem of typed_mem +;; + +(* + * ImmPtr (a, rty) can be assigned to anything of scalar_ty + * AddrTy rty; the difference is that ImmAddr carries its value + * so can be used in cases where we want to have an immediate + * address constant-propagated through the code to the backend. + *) +type operand = + Cell of cell + | Imm of typed_imm + | ImmPtr of typed_imm_ptr +;; + + +type code = + CodeLabel of label (* Index into current quad block. *) + | CodePtr of operand + | CodeNone +;; + +(* NB: for the most part, we let the register allocator assign spills + * from vregs, and we permanently allocate aliased slots to stack + * locations by static aliasing information early, in layout. + * + * The one awkward case this doesn't handle is when someone tries to + * pass a literal-atom to an alias-slot. This *requires* a memory slot + * but we only realize it rather late, much later than we'd normally + * have thougt to desugar the literal into a temporary. + * + * So in these cases, we let the trans module explicitly demand a + * "Spill n" operand, which the register allocator mops up before it + * gets started on the vregs. + * + * NOTE: if we were more clever we'd integrate vregs and spills like + * this together along with the general notion of a temporary way back + * at the desugaring stage, and use some kind of size-class + * consolidation so that spills with non-overlapping lifetimes could + * share memory. But we're not that clever yet. + *) + + +(* Helpers. *) + +let direct_code_ptr fix = + (CodePtr (ImmPtr (fix, CodeTy))) +;; + +let cell_referent_ty c = + match c with + Reg (_, st) -> ScalarTy st + | Mem (_, rt) -> rt +;; + +let cell_is_nil c = + match c with + Mem (_, NilTy) -> true + | Reg (_, AddrTy NilTy) -> true + | _ -> false +;; + +let operand_is_nil o = + match o with + Cell c -> cell_is_nil c + | _ -> false +;; + +let mem_off (mem:mem) (off:Asm.expr64) : mem = + let addto e = Asm.ADD (off, e) in + match mem with + Abs e -> Abs (addto e) + | RegIn (r, None) -> RegIn (r, Some off) + | RegIn (r, Some e) -> RegIn (r, Some (addto e)) + | Spill _ -> bug () "Adding offset to spill slot" +;; + +let mem_off_imm (mem:mem) (imm:int64) : mem = + mem_off mem (Asm.IMM imm) +;; + + +(* Quads. *) + +type binop = + ADD | SUB + | IMUL | UMUL + | IDIV | UDIV + | IMOD | UMOD + | AND | OR | XOR + | LSL | LSR | ASR +;; + +type unop = + NEG | NOT + | UMOV | IMOV + | ZERO +;; + +type jmpop = + JE | JNE + | JZ | JNZ (* FIXME: Synonyms with JE/JNE in x86, others? *) + | JL | JLE | JG | JGE (* Signed. *) + | JB | JBE | JA | JAE (* Unsigned. *) + | JC | JNC | JO | JNO + | JMP +;; + +type binary = + { + binary_op: binop; + binary_dst: cell; + binary_lhs: operand; + binary_rhs: operand + } +;; + +type unary = + { + unary_op: unop; + unary_dst: cell; + unary_src: operand + } +;; + +type cmp = + { + cmp_lhs: operand; + cmp_rhs: operand + } +;; + +type lea = + { + lea_dst: cell; + lea_src: operand + } +;; + +type jmp = + { + jmp_op: jmpop; + jmp_targ: code; + } +;; + +type call = + { + call_dst: cell; + call_targ: code + } + +type quad' = + Binary of binary + | Unary of unary + | Lea of lea + | Cmp of cmp + | Jmp of jmp + | Push of operand + | Pop of cell + | Call of call + | Debug (* Debug-break pseudo-instruction. *) + | Enter of fixup (* Enter-fixup-block pseudo-instruction. *) + | Leave (* Leave-fixup-block pseudo-instruction. *) + | Ret (* Return to caller. *) + | Nop (* Keep this quad here, emit CPU nop. *) + | Dead (* Keep this quad but emit nothing. *) + | Regfence (* Clobber all hregs. *) + | End (* Space past the end of quads to emit. *) +;; + +type quad = + { quad_fixup: fixup option; + quad_implicits: label list; + quad_body: quad'; } + +type quads = quad array ;; + +(* Query functions. *) + +let cell_is_scalar (c:cell) : bool = + match c with + Reg (_, _) -> true + | Mem (_, ScalarTy _) -> true + | _ -> false +;; + + +let bits_of_ty_mach (tm:ty_mach) : bits = + match tm with + | TY_u8 -> Bits8 + | TY_i8 -> Bits8 + | TY_u16 -> Bits16 + | TY_i16 -> Bits16 + | TY_u32 -> Bits32 + | TY_i32 -> Bits32 + | TY_u64 -> Bits64 + | TY_i64 -> Bits64 + | TY_f32 -> Bits32 + | TY_f64 -> Bits64 +;; + +let cell_scalar_ty (c:cell) : scalar_ty = + match c with + Reg (_, st) -> st + | Mem (_, ScalarTy st) -> st + | _ -> bug () "mem of non-scalar in Il.cell_scalar_ty" +;; + +let operand_scalar_ty (op:operand) : scalar_ty = + match op with + Cell c -> cell_scalar_ty c + | Imm (_, t) -> ValTy (bits_of_ty_mach t) + | ImmPtr (_, t) -> AddrTy t +;; + + +let scalar_ty_bits (word_bits:bits) (st:scalar_ty) : bits = + match st with + ValTy bits -> bits + | AddrTy _ -> word_bits +;; + +let cell_bits (word_bits:bits) (c:cell) : bits = + match c with + Reg (_, st) -> scalar_ty_bits word_bits st + | Mem (_, ScalarTy st) -> scalar_ty_bits word_bits st + | Mem _ -> bug () "mem of non-scalar in Il.cell_bits" +;; + +let operand_bits (word_bits:bits) (op:operand) : bits = + match op with + Cell cell -> cell_bits word_bits cell + | Imm (_, tm) -> bits_of_ty_mach tm + | ImmPtr _ -> word_bits +;; + +let bits_size (bits:bits) : int64 = + match bits with + Bits8 -> 1L + | Bits16 -> 2L + | Bits32 -> 4L + | Bits64 -> 8L +;; + +let bits_align (bits:bits) : int64 = + match bits with + Bits8 -> 1L + | Bits16 -> 2L + | Bits32 -> 4L + | Bits64 -> 8L +;; + +let scalar_ty_size (word_bits:bits) (st:scalar_ty) : int64 = + bits_size (scalar_ty_bits word_bits st) +;; + +let scalar_ty_align (word_bits:bits) (st:scalar_ty) : int64 = + bits_align (scalar_ty_bits word_bits st) +;; + +let rec referent_ty_layout (word_bits:bits) (rt:referent_ty) : (size * size) = + match rt with + ScalarTy st -> (SIZE_fixed (scalar_ty_size word_bits st), + SIZE_fixed (scalar_ty_align word_bits st)) + | StructTy rts -> + begin + let accum (off,align) rt : (size * size) = + let (elt_size, elt_align) = referent_ty_layout word_bits rt in + let elt_off = align_sz elt_align off in + (add_sz elt_off elt_size, max_sz elt_align align) + in + Array.fold_left accum (SIZE_fixed 0L, SIZE_fixed 1L) rts + end + | UnionTy rts -> + begin + let accum (sz,align) rt : (size * size) = + let (elt_size, elt_align) = referent_ty_layout word_bits rt in + (max_sz sz elt_size, max_sz elt_align align) + in + Array.fold_left accum (SIZE_fixed 0L, SIZE_fixed 1L) rts + end + | OpaqueTy -> bug () "opaque ty in referent_ty_layout" + | CodeTy -> bug () "code ty in referent_ty_layout" + | ParamTy i -> (SIZE_param_size i, SIZE_param_align i) + | NilTy -> (SIZE_fixed 0L, SIZE_fixed 1L) + +and referent_ty_size (word_bits:bits) (rt:referent_ty) : size = + (fst (referent_ty_layout word_bits rt)) + +and referent_ty_align (word_bits:bits) (rt:referent_ty) : size = + (snd (referent_ty_layout word_bits rt)) + +;; + +let get_element_offset + (word_bits:bits) + (elts:referent_ty array) + (i:int) + : size = + let elts_before = Array.sub elts 0 i in + let elt_rty = elts.(i) in + let elts_before_size = referent_ty_size word_bits (StructTy elts_before) in + let elt_align = referent_ty_align word_bits elt_rty in + let elt_off = align_sz elt_align elts_before_size in + elt_off +;; + +(* Processor. *) + +type quad_processor = + { qp_reg: (quad_processor -> reg -> reg); + qp_mem: (quad_processor -> mem -> mem); + qp_cell_read: (quad_processor -> cell -> cell); + qp_cell_write: (quad_processor -> cell -> cell); + qp_code: (quad_processor -> code -> code); + qp_op: (quad_processor -> operand -> operand); } +;; + +let identity_processor = + let qp_cell = (fun qp c -> match c with + Reg (r, b) -> Reg (qp.qp_reg qp r, b) + | Mem (a, b) -> Mem (qp.qp_mem qp a, b)) + in + { qp_reg = (fun _ r -> r); + qp_mem = (fun qp a -> match a with + RegIn (r, o) -> RegIn (qp.qp_reg qp r, o) + | Abs _ + | Spill _ -> a); + qp_cell_read = qp_cell; + qp_cell_write = qp_cell; + qp_code = (fun qp c -> match c with + CodePtr op -> CodePtr (qp.qp_op qp op) + | CodeLabel _ + | CodeNone -> c); + qp_op = (fun qp op -> match op with + Cell c -> Cell (qp.qp_cell_read qp c) + | ImmPtr _ -> op + | Imm _ -> op) } +;; + +let process_quad (qp:quad_processor) (q:quad) : quad = + { q with + quad_body = match q.quad_body with + Binary b -> + Binary { b with + binary_dst = qp.qp_cell_write qp b.binary_dst; + binary_lhs = qp.qp_op qp b.binary_lhs; + binary_rhs = qp.qp_op qp b.binary_rhs } + | Unary u -> + Unary { u with + unary_dst = qp.qp_cell_write qp u.unary_dst; + unary_src = qp.qp_op qp u.unary_src } + + | Lea le -> + Lea { lea_dst = qp.qp_cell_write qp le.lea_dst; + lea_src = qp.qp_op qp le.lea_src } + + | Cmp c -> + Cmp { cmp_lhs = qp.qp_op qp c.cmp_lhs; + cmp_rhs = qp.qp_op qp c.cmp_rhs } + + | Jmp j -> + Jmp { j with + jmp_targ = qp.qp_code qp j.jmp_targ } + + | Push op -> + Push (qp.qp_op qp op) + + | Pop c -> + Pop (qp.qp_cell_write qp c) + + | Call c -> + Call { call_dst = qp.qp_cell_write qp c.call_dst; + call_targ = qp.qp_code qp c.call_targ } + + | Ret -> Ret + | Nop -> Nop + | Debug -> Debug + | Regfence -> Regfence + | Enter f -> Enter f + | Leave -> Leave + | Dead -> Dead + | End -> End } +;; + +let visit_quads (qp:quad_processor) (qs:quads) : unit = + Array.iter (fun x ->ignore ( process_quad qp x); ()) qs +;; + +let process_quads (qp:quad_processor) (qs:quads) : quads = + Array.map (process_quad qp) qs +;; + +let rewrite_quads (qp:quad_processor) (qs:quads) : unit = + for i = 0 to ((Array.length qs) - 1) do + qs.(i) <- process_quad qp qs.(i) + done +;; + + +(* A little partial-evaluator to help lowering sizes. *) + +let rec size_to_expr64 (a:size) : Asm.expr64 option = + let binary a b f = + match (size_to_expr64 a, size_to_expr64 b) with + (Some a, Some b) -> Some (f a b) + | _ -> None + in + match a with + SIZE_fixed i -> Some (Asm.IMM i) + | SIZE_fixup_mem_sz f -> Some (Asm.M_SZ f) + | SIZE_fixup_mem_pos f -> Some (Asm.M_POS f) + | SIZE_rt_neg s -> + begin + match (size_to_expr64 s) with + None -> None + | Some s -> Some (Asm.NEG s) + end + | SIZE_rt_add (a, b) -> binary a b (fun a b -> Asm.ADD (a,b)) + | SIZE_rt_mul (a, b) -> binary a b (fun a b -> Asm.MUL (a,b)) + | SIZE_rt_max (a, b) -> binary a b (fun a b -> Asm.MAX (a,b)) + | SIZE_rt_align (a, b) -> binary a b (fun a b -> Asm.ALIGN (a,b)) + | _ -> None +;; + + +(* Formatters. *) + +let string_of_bits (b:bits) : string = + match b with + Bits8 -> "b8" + | Bits16 -> "b16" + | Bits32 -> "b32" + | Bits64 -> "b64" +;; + +let rec string_of_scalar_ty (s:scalar_ty) : string = + match s with + ValTy b -> (string_of_bits b) + | AddrTy r -> (string_of_referent_ty r) ^ "*" + +and string_of_referent_ty (r:referent_ty) : string = + match r with + ScalarTy s -> (string_of_scalar_ty s) + | StructTy rs -> + Printf.sprintf "[%s]" + (String.concat "," + (Array.to_list (Array.map string_of_referent_ty rs))) + | UnionTy rs -> + Printf.sprintf "(%s)" + (String.concat "|" + (Array.to_list (Array.map string_of_referent_ty rs))) + | ParamTy i -> Printf.sprintf "#%d" i + | OpaqueTy -> "?" + | CodeTy -> "!" + | NilTy -> "()" +;; + + +type hreg_formatter = hreg -> string;; + +let string_of_reg (f:hreg_formatter) (r:reg) : string = + match r with + Vreg i -> Printf.sprintf "" i + | Hreg i -> f i +;; + +let rec string_of_expr64 (e64:Asm.expr64) : string = + let bin op a b = + Printf.sprintf "(%s %s %s)" (string_of_expr64 a) op (string_of_expr64 b) + in + let bini op a b = + Printf.sprintf "(%s %s %d)" (string_of_expr64 a) op b + in + match e64 with + Asm.IMM i when (i64_lt i 0L) -> Printf.sprintf "-0x%Lx" (Int64.neg i) + | Asm.IMM i -> Printf.sprintf "0x%Lx" i + | Asm.ADD (a,b) -> bin "+" a b + | Asm.SUB (a,b) -> bin "-" a b + | Asm.MUL (a,b) -> bin "*" a b + | Asm.DIV (a,b) -> bin "/" a b + | Asm.REM (a,b) -> bin "%" a b + | Asm.MAX (a,b) -> + Printf.sprintf "(max %s %s)" + (string_of_expr64 a) (string_of_expr64 b) + | Asm.ALIGN (a,b) -> + Printf.sprintf "(align %s %s)" + (string_of_expr64 a) (string_of_expr64 b) + | Asm.SLL (a,b) -> bini "<<" a b + | Asm.SLR (a,b) -> bini ">>" a b + | Asm.SAR (a,b) -> bini ">>>" a b + | Asm.AND (a,b) -> bin "&" a b + | Asm.XOR (a,b) -> bin "xor" a b + | Asm.OR (a,b) -> bin "|" a b + | Asm.NOT a -> Printf.sprintf "(not %s)" (string_of_expr64 a) + | Asm.NEG a -> Printf.sprintf "-%s" (string_of_expr64 a) + | Asm.F_POS f -> Printf.sprintf "<%s>.fpos" f.fixup_name + | Asm.F_SZ f -> Printf.sprintf "<%s>.fsz" f.fixup_name + | Asm.M_POS f -> Printf.sprintf "<%s>.mpos" f.fixup_name + | Asm.M_SZ f -> Printf.sprintf "<%s>.msz" f.fixup_name + | Asm.EXT _ -> "??ext??" +;; + +let string_of_off (e:Asm.expr64 option) : string = + match e with + None -> "" + | Some (Asm.IMM i) when (i64_lt i 0L) -> + Printf.sprintf " - 0x%Lx" (Int64.neg i) + | Some e' -> " + " ^ (string_of_expr64 e') +;; + +let string_of_mem (f:hreg_formatter) (a:mem) : string = + match a with + Abs e -> + Printf.sprintf "[%s]" (string_of_expr64 e) + | RegIn (r, off) -> + Printf.sprintf "[%s%s]" (string_of_reg f r) (string_of_off off) + | Spill i -> + Printf.sprintf "[]" i +;; +let string_of_cell (f:hreg_formatter) (c:cell) : string = + match c with + Reg (r,ty) -> + if !log_iltypes + then + Printf.sprintf "%s:%s" (string_of_reg f r) (string_of_scalar_ty ty) + else + Printf.sprintf "%s" (string_of_reg f r) + | Mem (a,ty) -> + if !log_iltypes + then + Printf.sprintf "%s:%s" + (string_of_mem f a) (string_of_referent_ty ty) + else + Printf.sprintf "%s" (string_of_mem f a) +;; + +let string_of_operand (f:hreg_formatter) (op:operand) : string = + match op with + Cell c -> string_of_cell f c + | ImmPtr (f, ty) -> + if !log_iltypes + then + Printf.sprintf "$<%s>.mpos:%s*" + f.fixup_name (string_of_referent_ty ty) + else + Printf.sprintf "$<%s>.mpos" f.fixup_name + | Imm (i, ty) -> + if !log_iltypes + then + Printf.sprintf "$%s:%s" (string_of_expr64 i) (string_of_ty_mach ty) + else + Printf.sprintf "$%s" (string_of_expr64 i) +;; + + +let string_of_code (f:hreg_formatter) (c:code) : string = + match c with + CodeLabel lab -> Printf.sprintf "