rt: Improve docs for main, kernel, scheduler, and task

2024-11-23 07:14:28 +00:00 · 2012-06-02 23:14:25 -07:00 · 2012-06-02 23:14:25 -07:00 · 94ac30c498
commit 94ac30c498
parent 9b9ceea6bd
4 changed files with 119 additions and 36 deletions
--- a/src/rt/rust.cpp
+++ b/src/rt/rust.cpp
@ -1,9 +1,14 @@
+/**
+ * Main entry point into the Rust runtime. Here we initialize the kernel,
+ * create the initial scheduler and run the main task.
+ */

 #include "rust_globals.h"
 #include "rust_kernel.h"
 #include "rust_util.h"
 #include "rust_scheduler.h"

+// Creates a rust argument vector from the platform argument vector
 struct
 command_line_args : public kernel_owned<command_line_args>
 {
@ -61,42 +66,60 @@ command_line_args : public kernel_owned<command_line_args>
    }
 };

-/**
- * Main entry point into the Rust runtime. Here we create a Rust service,
- * initialize the kernel, create the root domain and run it.
- */
-
+// A global that indicates whether Rust typestate claim statements should be
+// executed Generated code will read this variable directly (I think).
+// FIXME: This belongs somewhere else
 int check_claims = 0;

+/**
+   The runtime entrypoint. The (C ABI) main function generated by rustc calls
+   `rust_start`, providing the address of the Rust ABI main function, the
+   platform argument vector, and a `crate_map` the provides some logging
+   metadata.
+*/
 extern "C" CDECL int
 rust_start(uintptr_t main_fn, int argc, char **argv, void* crate_map) {

+    // Load runtime configuration options from the environment.
+    // FIXME #1497: Should provide a way to get these from the command
+    // line as well.
    rust_env *env = load_env();

    update_log_settings(crate_map, env->logspec);
+
+    // Maybe turn on typestate claim checking
    check_claims = env->check_claims;

    rust_kernel *kernel = new rust_kernel(env);
+
+    // Create the main scheduler and the main task
    rust_sched_id sched_id = kernel->create_scheduler(env->num_sched_threads);
    rust_scheduler *sched = kernel->get_scheduler_by_id(sched_id);
    rust_task *root_task = sched->create_task(NULL, "main");
+
+    // Build the command line arguments to pass to the root task
    command_line_args *args
        = new (kernel, "main command line args")
        command_line_args(root_task, argc, argv);

    LOG(root_task, dom, "startup: %d args in 0x%" PRIxPTR,
-             args->argc, (uintptr_t)args->args);
+        args->argc, (uintptr_t)args->args);
    for (int i = 0; i < args->argc; i++) {
        LOG(root_task, dom, "startup: arg[%d] = '%s'", i, args->argv[i]);
    }

+    // Schedule the main Rust task
    root_task->start((spawn_fn)main_fn, NULL, args->args);
+
+    // At this point the task lifecycle is responsible for it
+    // and our pointer may not be valid
    root_task = NULL;

+    // Run the kernel until all schedulers exit
    int ret = kernel->run();
+
    delete args;
    delete kernel;
-
    free_env(env);

    return ret;
--- a/src/rt/rust_kernel.h
+++ b/src/rt/rust_kernel.h
@ -1,4 +1,34 @@
 // -*- c++ -*-
+
+/**
+   A single runtime instance.
+
+   The kernel is primarily responsible for managing the lifetime of
+   schedulers, which in turn run rust tasks. It provides a memory
+   allocator and logging service for use by other runtime components,
+   it creates unique task and port ids and provides global access
+   to ports by id.
+
+   The kernel runs until there are no live schedulers.
+
+   The kernel internally runs an additional, special scheduler called
+   the 'osmain' (or platform) scheduler, which schedules tasks on the
+   thread that is running the kernel (normally the thread on which the
+   C main function was called). This scheduler may be used by Rust
+   code for interacting with platform APIs that insist on being called
+   from the main thread.
+
+   The requirements of the osmain scheduler has resulted in a complex
+   process for creating and running scheduler loops that involves
+   a thing called a 'rust_sched_launcher_factory' whose function I've
+   already forgotten. rust_scheduler is the main scheduler class,
+   and tasks are scheduled on individual threads by rust_sched_loop.
+
+   Ideally all the in-memory Rust state is encapsulated by a kernel
+   instance, but there is still some truly global data in the runtime
+   (like the check claims flag).
+ */
+
 #ifndef RUST_KERNEL_H
 #define RUST_KERNEL_H

@ -12,24 +42,20 @@
 #include "rust_sched_reaper.h"
 #include "util/hash_map.h"

-struct rust_task_thread;
 class rust_scheduler;
+class rust_sched_driver;
+class rust_sched_launcher_factory;
+struct rust_task_thread;
 class rust_port;

+// Scheduler, task, and port handles. These uniquely identify within a
+// single kernel instance the objects they represent.
 typedef intptr_t rust_sched_id;
 typedef intptr_t rust_task_id;
 typedef intptr_t rust_port_id;

 typedef std::map<rust_sched_id, rust_scheduler*> sched_map;

-class rust_sched_driver;
-class rust_sched_launcher_factory;
-
-/**
- * A global object shared by all thread domains. Most of the data structures
- * in this class are synchronized since they are accessed from multiple
- * threads.
- */
 class rust_kernel {
    memory_region _region;
    rust_log _log;
--- a/src/rt/rust_scheduler.h
+++ b/src/rt/rust_scheduler.h
@ -1,3 +1,10 @@
+/**
+   The rust scheduler. Schedulers may be added to the kernel
+   dynamically and they run until there are no more tasks to
+   schedule. Most of the scheduler work is carried out in worker
+   threads by rust_sched_loop.
+ */
+
 #ifndef RUST_SCHEDULER_H
 #define RUST_SCHEDULER_H

--- a/src/rt/rust_task.h
+++ b/src/rt/rust_task.h
@ -1,3 +1,28 @@
+/**
+   The rust task is a cooperatively-scheduled green thread that executes
+   Rust code on a segmented stack.
+
+   This class has too many responsibilities:
+
+   * Working with the scheduler loop to signal and respond to state changes,
+   and dealing with all the thread synchronization issues involved
+
+   * Managing the dynamically resizing list of Rust stack segments
+
+   * Switching between running Rust code on the Rust segmented stack and
+   native C code on large stacks owned by the scheduler
+
+   The lifetime of a rust_task object closely mirrors that of a running Rust
+   task object, but they are not identical. In particular, the rust_task is an
+   atomically reference counted object that might be accessed from arbitrary
+   threads at any time. This may keep the task from being destroyed even after
+   the task is dead from a Rust task lifecycle perspective.
+
+   FIXME: The task and the scheduler have an over-complicated, undocumented
+   protocol for shutting down the task, hopefully without races. It would be
+   easier to reason about if other runtime objects could not access the task
+   from arbitrary threads, and didn't need to be atomically refcounted.
+ */

 #ifndef RUST_TASK_H
 #define RUST_TASK_H
@ -17,7 +42,8 @@

 // The amount of extra space at the end of each stack segment, available
 // to the rt, compiler and dynamic linker for running small functions
-// FIXME: We want this to be 128 but need to slim the red zone calls down
+// FIXME: We want this to be 128 but need to slim the red zone calls down,
+// disable lazy symbol relocation, and other things we haven't discovered yet
 #define RZ_LINUX_32 (1024*2)
 #define RZ_LINUX_64 (1024*2)
 #define RZ_MAC_32   (1024*20)
@ -59,18 +85,6 @@
 #endif
 #endif

-extern "C" CDECL void
-record_sp_limit(void *limit);
-extern "C" CDECL uintptr_t
-get_sp_limit();
-
-// The function prolog compares the amount of stack needed to the end of
-// the stack. As an optimization, when the frame size is less than 256
-// bytes, it will simply compare %esp to to the stack limit instead of
-// subtracting the frame size. As a result we need our stack limit to
-// account for those 256 bytes.
-const unsigned LIMIT_OFFSET = 256;
-
 struct rust_box;

 struct frame_glue_fns {
@ -323,14 +337,19 @@ template <typename T> struct task_owned {

 // This stuff is on the stack-switching fast path

-// Get a rough approximation of the current stack pointer
-extern "C" uintptr_t get_sp();
+// Records the pointer to the end of the Rust stack in a platform-
+// specific location in the thread control block
+extern "C" CDECL void      record_sp_limit(void *limit);
+extern "C" CDECL uintptr_t get_sp_limit();
+// Gets a pointer to the vicinity of the current stack pointer
+extern "C" uintptr_t       get_sp();

-// This is the function that switches stacks by calling another function with
-// a single void* argument while changing the stack pointer. It has a funny
-// name because gdb doesn't normally like to backtrace through split stacks
-// (thinks it indicates a bug), but has a special case to allow functions
-// named __morestack to move the stack pointer around.
+// This is the function that switches between the C and the Rust stack by
+// calling another function with a single void* argument while changing the
+// stack pointer. It has a funny name because gdb doesn't normally like to
+// backtrace through split stacks (thinks it indicates a bug), but has a
+// special case to allow functions named __morestack to move the stack pointer
+// around.
 extern "C" void __morestack(void *args, void *fn_ptr, uintptr_t stack_ptr);

 inline static uintptr_t
@ -490,6 +509,14 @@ rust_task::prev_stack() {
 extern "C" CDECL void
 record_sp_limit(void *limit);

+// The LLVM-generated segmented-stack function prolog compares the amount of
+// stack needed for each frame to the end-of-stack pointer stored in the
+// TCB. As an optimization, when the frame size is less than 256 bytes, it
+// will simply compare %esp to to the stack limit instead of subtracting the
+// frame size. As a result we need our stack limit to account for those 256
+// bytes.
+const unsigned LIMIT_OFFSET = 256;
+
 inline void
 rust_task::record_stack_limit() {
    assert(stk);