From 5d6d3d056592cf4d68afbce6084245ea6733865c Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Tue, 10 Jul 2012 21:45:53 -0400 Subject: [PATCH] Linked failure in task.rs instead of rust_task.cpp (#1868, #1189) --- src/libcore/task.rs | 216 ++++++++++++++++++++++++++++++++++++------- src/rt/rust_task.cpp | 35 +------ src/rt/rust_task.h | 2 - 3 files changed, 187 insertions(+), 66 deletions(-) diff --git a/src/libcore/task.rs b/src/libcore/task.rs index 3d05611aa98..5f3cf1e5c26 100644 --- a/src/libcore/task.rs +++ b/src/libcore/task.rs @@ -26,6 +26,7 @@ import result::result; import dvec::extensions; import dvec_iter::extensions; +import arc::methods; export task; export task_result; @@ -563,7 +564,11 @@ unsafe fn unkillable(f: fn()) { } -/* Internal */ +/**************************************************************************** + * Internal + ****************************************************************************/ + +/* spawning */ type sched_id = int; type task_id = int; @@ -573,42 +578,185 @@ type task_id = int; type rust_task = libc::c_void; type rust_closure = libc::c_void; -fn spawn_raw(opts: task_opts, +f: fn~()) { +/* linked failure */ - let mut f = if opts.supervise { - f - } else { - // FIXME (#1868, #1789): The runtime supervision API is weird here - // because it was designed to let the child unsupervise itself, - // when what we actually want is for parents to unsupervise new - // children. - fn~() { - rustrt::unsupervise(); - f(); +type taskgroup_arc = arc::exclusive>>>; + +class taskgroup { + // FIXME (#2816): Change dvec to an O(1) data structure (and change 'me' + // to a node-handle or somesuch when so done (or remove the field entirely + // if keyed by *rust_task)). + let tasks: taskgroup_arc; // 'none' means the group already failed. + let me: *rust_task; + let my_pos: uint; + // let parent_group: taskgroup_arc; // TODO(bblum) + // TODO XXX bblum: add a list of empty slots to get runtime back + let mut failed: bool; + new(-tasks: taskgroup_arc, me: *rust_task, my_pos: uint) { + self.tasks = tasks; self.me = me; self.my_pos = my_pos; + self.failed = true; // This will get un-set on successful exit. + } + // Runs on task exit. + drop { + if self.failed { + // Take everybody down with us. + kill_taskgroup(self.tasks, self.me, self.my_pos); + } else { + // Remove ourselves from the group. + leave_taskgroup(self.tasks, self.me, self.my_pos); } + } +} + +fn taskgroup_key(+_group: @taskgroup) { } // For TLS + +fn enlist_in_taskgroup(group_arc: taskgroup_arc, + me: *rust_task) -> option { + do group_arc.with |_c, state| { + // If 'none', the group was failing. Can't enlist. + do state.map |tasks| { + // Try to find an empty slot. + alt tasks.position(|x| x == none) { + some(empty_index) { + tasks.set_elt(empty_index, some(me)); + empty_index + } + none { + tasks.push(some(me)); + tasks.len() - 1 + } + } + } + } +} + +// NB: Runs in destructor/post-exit context. Can't 'fail'. +fn leave_taskgroup(group_arc: taskgroup_arc, me: *rust_task, index: uint) { + do group_arc.with |_c, state| { + // If 'none', already failing and we've already gotten a kill signal. + do state.map |tasks| { + assert tasks[index] == some(me); + tasks.set_elt(index, none); + }; + }; +} + +// NB: Runs in destructor/post-exit context. Can't 'fail'. +fn kill_taskgroup(group_arc: taskgroup_arc, me: *rust_task, index: uint) { + // NB: We could do the killing iteration outside of the group arc, by + // having "let mut newstate" here, swapping inside, and iterating after. + // But that would let other exiting tasks fall-through and exit while we + // were trying to kill them, causing potential use-after-free. A task's + // presence in the arc guarantees it's alive only while we hold the lock, + // so if we're failing, all concurrently exiting tasks must wait for us. + // To do it differently, we'd have to use the runtime's task refcounting. + do group_arc.with |_c, state| { + let mut newstate = none; + *state <-> newstate; + // Might already be none, if somebody is failing simultaneously. + // That's ok; only one task needs to do the dirty work. (Might also + // see 'none' if somebody already failed and we got a kill signal.) + do newstate.map |tasks| { + // First remove ourself (killing ourself won't do much good). This + // is duplicated here to avoid having to lock twice. + assert tasks[index] == some(me); + tasks.set_elt(index, none); + // Now send takedown signal. + for tasks.each |entry| { + do entry.map |task| { + rustrt::rust_task_kill_other(task); + }; + } + }; + }; +} + +fn share_parent_taskgroup() -> taskgroup_arc { + let me = rustrt::rust_get_task(); + alt unsafe { local_get(me, taskgroup_key) } { + some(group) { + group.tasks.clone() + } + none { + /* Main task, doing first spawn ever. */ + let tasks = arc::exclusive(some(dvec::from_elem(some(me)))); + let group = @taskgroup(tasks.clone(), me, 0); + unsafe { local_set(me, taskgroup_key, group); } + tasks + } + } +} + +fn spawn_raw(opts: task_opts, +f: fn~()) { + // Decide whether the child needs to be in a new linked failure group. + let child_tg: taskgroup_arc = if opts.supervise { + share_parent_taskgroup() + } else { + arc::exclusive(some(dvec::from_elem(none))) }; unsafe { - let fptr = ptr::addr_of(f); - let closure: *rust_closure = unsafe::reinterpret_cast(fptr); + let child_data_ptr = ~mut some((child_tg, f)); + // Being killed with the unsafe task/closure pointers would leak them. + do unkillable { + // Agh. Get move-mode items into the closure. FIXME (#2829) + let mut child_data = none; + *child_data_ptr <-> child_data; + let (child_tg, f) = option::unwrap(child_data); + // Create child task. + let new_task = alt opts.sched { + none { rustrt::new_task() } + some(sched_opts) { new_task_in_new_sched(sched_opts) } + }; + assert !new_task.is_null(); + // Getting killed after here would leak the task. - let new_task = alt opts.sched { - none { - rustrt::new_task() - } - some(sched_opts) { - new_task_in_new_sched(sched_opts) - } - }; - assert !new_task.is_null(); + let child_wrapper = + make_child_wrapper(new_task, child_tg, opts.supervise, f); + let fptr = ptr::addr_of(child_wrapper); + let closure: *rust_closure = unsafe::reinterpret_cast(fptr); - do option::iter(opts.notify_chan) |c| { - // FIXME (#1087): Would like to do notification in Rust - rustrt::rust_task_config_notify(new_task, c); + do option::iter(opts.notify_chan) |c| { + // FIXME (#1087): Would like to do notification in Rust + rustrt::rust_task_config_notify(new_task, c); + } + + // Getting killed between these two calls would free the child's + // closure. (Reordering them wouldn't help - then getting killed + // between them would leak.) + rustrt::start_task(new_task, closure); + unsafe::forget(child_wrapper); } + } - rustrt::start_task(new_task, closure); - unsafe::forget(f); + fn make_child_wrapper(child_task: *rust_task, -child_tg: taskgroup_arc, + supervise: bool, -f: fn~()) -> fn~() { + let child_tg_ptr = ~mut some(child_tg); + fn~() { + // Agh. Get move-mode items into the closure. FIXME (#2829) + let mut child_tg_opt = none; + *child_tg_ptr <-> child_tg_opt; + let child_tg = option::unwrap(child_tg_opt); + // Child task runs this code. + if !supervise { + // FIXME (#1868, #1789) take this out later + rustrt::unsupervise(); + } + // Set up membership in taskgroup. If this returns none, the + // parent was already failing, so don't bother doing anything. + alt enlist_in_taskgroup(child_tg, child_task) { + some(my_index) { + let group = @taskgroup(child_tg, child_task, my_index); + unsafe { local_set(child_task, taskgroup_key, group); } + // Run the child's body. + f(); + // Report successful exit. (TLS cleanup code will tear + // down the group.) + group.failed = false; + } + none { } + } + } } fn new_task_in_new_sched(opts: sched_opts) -> *rust_task { @@ -640,7 +788,6 @@ fn spawn_raw(opts: task_opts, +f: fn~()) { }; rustrt::rust_new_task_in_sched(sched_id) } - } /**************************************************************************** @@ -760,7 +907,7 @@ unsafe fn local_get(task: *rust_task, local_get_helper(task, key, false) } -unsafe fn local_set(task: *rust_task, key: local_data_key, -data: @T) { +unsafe fn local_set(task: *rust_task, key: local_data_key, +data: @T) { let map = get_task_local_map(task); // Store key+data as *voids. Data is invisibly referenced once; key isn't. let keyval = key_to_key_value(key); @@ -822,7 +969,7 @@ unsafe fn local_data_get(key: local_data_key) -> option<@T> { * Store a value in task-local data. If this key already has a value, * that value is overwritten (and its destructor is run). */ -unsafe fn local_data_set(key: local_data_key, -data: @T) { +unsafe fn local_data_set(key: local_data_key, +data: @T) { local_set(rustrt::rust_get_task(), key, data) } /** @@ -853,11 +1000,12 @@ extern mod rustrt { fn start_task(task: *rust_task, closure: *rust_closure); - fn rust_task_is_unwinding(rt: *rust_task) -> bool; + fn rust_task_is_unwinding(task: *rust_task) -> bool; fn unsupervise(); fn rust_osmain_sched_id() -> sched_id; fn rust_task_inhibit_kill(); fn rust_task_allow_kill(); + fn rust_task_kill_other(task: *rust_task); #[rust_stack] fn rust_get_task_local_data(task: *rust_task) -> *libc::c_void; @@ -1232,7 +1380,7 @@ fn test_unkillable() { let ch = po.chan(); // We want to do this after failing - do spawn { + do spawn_raw({ supervise: false with default_task_opts() }) { for iter::repeat(10u) { yield() } ch.send(()); } @@ -1269,7 +1417,7 @@ fn test_unkillable_nested() { let ch = po.chan(); // We want to do this after failing - do spawn { + do spawn_raw({ supervise: false with default_task_opts() }) { for iter::repeat(10u) { yield() } ch.send(()); } diff --git a/src/rt/rust_task.cpp b/src/rt/rust_task.cpp index f5e2fcc9a08..d23d74b0ea2 100644 --- a/src/rt/rust_task.cpp +++ b/src/rt/rust_task.cpp @@ -10,6 +10,8 @@ #include "rust_env.h" #include "rust_port.h" +// TODO(bblum): get rid of supervisors + // Tasks rust_task::rust_task(rust_sched_loop *sched_loop, rust_task_state state, rust_task *spawner, const char *name, @@ -146,13 +148,9 @@ cleanup_task(cleanup_args *args) { task->notify(!threw_exception); - if (threw_exception) { -#ifndef __WIN32__ - task->conclude_failure(); -#else - assert(false && "Shouldn't happen"); +#ifdef __WIN32__ + assert(!threw_exception && "No exception-handling yet on windows builds"); #endif - } } extern "C" CDECL void upcall_exchange_free(void *ptr); @@ -262,10 +260,7 @@ void rust_task::kill() { scoped_lock with(kill_lock); - if (dead()) { - // Task is already dead, can't kill what's already dead. - fail_parent(); - } + // XXX: bblum: kill/kill race // Note the distinction here: kill() is when you're in an upcall // from task A and want to force-fail task B, you do B->kill(). @@ -314,31 +309,11 @@ rust_task::begin_failure(char const *expr, char const *file, size_t line) { throw this; #else die(); - conclude_failure(); // FIXME (#908): Need unwinding on windows. This will end up aborting sched_loop->fail(); #endif } -void -rust_task::conclude_failure() { - fail_parent(); -} - -void -rust_task::fail_parent() { - scoped_lock with(supervisor_lock); - if (supervisor) { - DLOG(sched_loop, task, - "task %s @0x%" PRIxPTR - " propagating failure to supervisor %s @0x%" PRIxPTR, - name, this, supervisor->name, supervisor); - supervisor->kill(); - } - if (NULL == supervisor && propagate_failure) - sched_loop->fail(); -} - void rust_task::unsupervise() { diff --git a/src/rt/rust_task.h b/src/rt/rust_task.h index 74a8f65a23e..3bde5202954 100644 --- a/src/rt/rust_task.h +++ b/src/rt/rust_task.h @@ -275,8 +275,6 @@ public: // Fail self, assuming caller-on-stack is this task. void fail(); void fail(char const *expr, char const *file, size_t line); - void conclude_failure(); - void fail_parent(); // Disconnect from our supervisor. void unsupervise();