rust/src/test/bench/task-perf-word-count-generic.rs

370 lines
9.4 KiB
Rust
Raw Normal View History

2011-08-25 22:36:55 +00:00
/**
A parallel word-frequency counting program.
This is meant primarily to demonstrate Rust's MapReduce framework.
It takes a list of files on the command line and outputs a list of
words along with how many times each word is used.
*/
2012-07-13 03:09:19 +00:00
// xfail-pretty
2011-08-25 22:36:55 +00:00
use std;
2012-09-05 19:32:05 +00:00
use option = option;
use option::Some;
use option::None;
use std::map;
use std::map::hashmap;
use io::WriterUtil;
use std::time;
use comm::Chan;
use comm::Port;
use comm::recv;
use comm::send;
macro_rules! move_out (
{ $x:expr } => { unsafe { let y <- *ptr::addr_of($x); y } }
)
trait word_reader {
2012-08-20 19:23:37 +00:00
fn read_word() -> Option<~str>;
}
trait hash_key {
pure fn hash() -> uint;
pure fn eq(&&k: self) -> bool;
}
fn mk_hash<K: Const hash_key, V: Copy>() -> map::hashmap<K, V> {
pure fn hashfn<K: Const hash_key>(k: &K) -> uint { k.hash() }
pure fn hasheq<K: Const hash_key>(k1: &K, k2: &K) -> bool { k1.eq(*k2) }
map::hashmap(hashfn, hasheq)
}
2012-08-08 01:10:06 +00:00
impl ~str: hash_key {
pure fn hash() -> uint { str::hash(&self) }
pure fn eq(&&x: ~str) -> bool { self == x }
}
// These used to be in task, but they disappeard.
2012-08-15 21:10:46 +00:00
type joinable_task = Port<()>;
2012-06-30 23:19:07 +00:00
fn spawn_joinable(+f: fn~()) -> joinable_task {
2012-08-27 21:22:25 +00:00
let p = Port();
let c = Chan(p);
2012-06-30 23:19:07 +00:00
do task::spawn() |move f| {
f();
c.send(());
}
p
}
fn join(t: joinable_task) {
t.recv()
}
2011-08-25 22:36:55 +00:00
2012-08-14 20:38:35 +00:00
impl io::Reader: word_reader {
2012-08-20 19:23:37 +00:00
fn read_word() -> Option<~str> { read_word(self) }
}
fn file_word_reader(filename: ~str) -> word_reader {
match io::file_reader(&Path(filename)) {
2012-08-26 23:54:31 +00:00
result::Ok(f) => { f as word_reader }
result::Err(e) => { fail fmt!("%?", e) }
}
}
2011-08-25 22:36:55 +00:00
fn map(f: fn~() -> word_reader, emit: map_reduce::putter<~str, int>) {
let f = f();
loop {
2012-08-06 19:34:08 +00:00
match f.read_word() {
2012-08-20 19:23:37 +00:00
Some(w) => { emit(w, 1); }
None => { break; }
2011-08-25 22:36:55 +00:00
}
}
}
fn reduce(&&word: ~str, get: map_reduce::getter<int>) {
let mut count = 0;
2011-08-25 22:36:55 +00:00
2012-08-20 19:23:37 +00:00
loop { match get() { Some(_) => { count += 1; } None => { break; } } }
2012-08-23 00:24:52 +00:00
io::println(fmt!("%s\t%?", word, count));
2011-08-25 22:36:55 +00:00
}
2012-08-16 01:46:55 +00:00
struct box<T> {
2012-09-07 02:40:15 +00:00
mut contents: Option<T>,
fn swap(f: fn(+T) -> T) {
2012-08-20 19:23:37 +00:00
let mut tmp = None;
self.contents <-> tmp;
2012-08-20 19:23:37 +00:00
self.contents = Some(f(option::unwrap(tmp)));
}
fn unwrap() -> T {
2012-08-20 19:23:37 +00:00
let mut tmp = None;
self.contents <-> tmp;
option::unwrap(tmp)
}
}
2012-09-05 22:58:43 +00:00
fn box<T>(+x: T) -> box<T> {
box {
contents: Some(x)
}
}
2011-08-25 22:36:55 +00:00
mod map_reduce {
export putter;
export getter;
export mapper;
export reducer;
export map_reduce;
type putter<K: Send, V: Send> = fn(K, V);
2011-08-25 22:36:55 +00:00
type mapper<K1: Send, K2: Send, V: Send> = fn~(K1, putter<K2, V>);
2011-08-25 22:36:55 +00:00
type getter<V: Send> = fn() -> Option<V>;
2011-08-25 22:36:55 +00:00
type reducer<K: Copy Send, V: Copy Send> = fn~(K, getter<V>);
2011-08-25 22:36:55 +00:00
enum ctrl_proto<K: Copy Send, V: Copy Send> {
2012-08-15 21:10:46 +00:00
find_reducer(K, Chan<Chan<reduce_proto<V>>>),
mapper_done
2011-08-25 22:36:55 +00:00
}
proto! ctrl_proto (
open: send<K: Copy Send, V: Copy Send> {
find_reducer(K) -> reducer_response<K, V>,
mapper_done -> !
}
reducer_response: recv<K: Copy Send, V: Copy Send> {
2012-08-15 21:10:46 +00:00
reducer(Chan<reduce_proto<V>>) -> open<K, V>
}
)
enum reduce_proto<V: Copy Send> { emit_val(V), done, addref, release }
2011-08-25 22:36:55 +00:00
fn start_mappers<K1: Copy Send, K2: Const Copy Send hash_key,
V: Copy Send>(
map: mapper<K1, K2, V>,
&ctrls: ~[ctrl_proto::server::open<K2, V>],
inputs: ~[K1])
-> ~[joinable_task]
{
let mut tasks = ~[];
2012-06-30 23:19:07 +00:00
for inputs.each |i| {
let (ctrl, ctrl_server) = ctrl_proto::init();
let ctrl = box(ctrl);
vec::push(tasks, spawn_joinable(|| map_task(map, ctrl, i) ));
vec::push(ctrls, ctrl_server);
2011-08-25 22:36:55 +00:00
}
2012-08-02 00:30:05 +00:00
return tasks;
2011-08-25 22:36:55 +00:00
}
fn map_task<K1: Copy Send, K2: Const Copy Send hash_key, V: Copy Send>(
map: mapper<K1, K2, V>,
ctrl: box<ctrl_proto::client::open<K2, V>>,
input: K1)
{
// log(error, "map_task " + input);
let intermediates = mk_hash();
2011-08-25 22:36:55 +00:00
do map(input) |key, val| {
2012-08-20 19:23:37 +00:00
let mut c = None;
2012-08-06 19:34:08 +00:00
match intermediates.find(key) {
2012-08-20 19:23:37 +00:00
Some(_c) => { c = Some(_c); }
None => {
do ctrl.swap |ctrl| {
let ctrl = ctrl_proto::client::find_reducer(ctrl, key);
2012-08-06 19:34:08 +00:00
match pipes::recv(ctrl) {
2012-08-04 02:59:04 +00:00
ctrl_proto::reducer(c_, ctrl) => {
2012-08-20 19:23:37 +00:00
c = Some(c_);
2012-08-23 00:24:52 +00:00
move_out!(ctrl)
}
}
}
intermediates.insert(key, c.get());
2012-08-01 04:13:57 +00:00
send(c.get(), addref);
2011-08-25 22:36:55 +00:00
}
}
send(c.get(), emit_val(val));
2011-08-25 22:36:55 +00:00
}
fn finish<K: Copy Send, V: Copy Send>(_k: K, v: Chan<reduce_proto<V>>)
{
2011-08-25 22:36:55 +00:00
send(v, release);
}
for intermediates.each_value |v| { send(v, release) }
ctrl_proto::client::mapper_done(ctrl.unwrap());
2011-08-25 22:36:55 +00:00
}
fn reduce_task<K: Copy Send, V: Copy Send>(
reduce: reducer<K, V>,
key: K,
2012-08-15 21:10:46 +00:00
out: Chan<Chan<reduce_proto<V>>>)
{
2012-08-27 21:22:25 +00:00
let p = Port();
2011-08-25 22:36:55 +00:00
2012-08-27 21:22:25 +00:00
send(out, Chan(p));
2011-08-25 22:36:55 +00:00
2012-06-20 02:34:01 +00:00
let mut ref_count = 0;
let mut is_done = false;
2011-08-25 22:36:55 +00:00
fn get<V: Copy Send>(p: Port<reduce_proto<V>>,
&ref_count: int, &is_done: bool)
2012-08-20 19:23:37 +00:00
-> Option<V> {
2011-08-25 22:36:55 +00:00
while !is_done || ref_count > 0 {
2012-08-06 19:34:08 +00:00
match recv(p) {
2012-08-04 02:59:04 +00:00
emit_val(v) => {
2012-08-23 00:24:52 +00:00
// error!("received %d", v);
2012-08-20 19:23:37 +00:00
return Some(v);
2011-08-25 22:36:55 +00:00
}
2012-08-04 02:59:04 +00:00
done => {
2012-08-23 00:24:52 +00:00
// error!("all done");
2011-08-25 22:36:55 +00:00
is_done = true;
}
2012-08-04 02:59:04 +00:00
addref => { ref_count += 1; }
release => { ref_count -= 1; }
2011-08-25 22:36:55 +00:00
}
}
2012-08-20 19:23:37 +00:00
return None;
2011-08-25 22:36:55 +00:00
}
2012-06-30 23:19:07 +00:00
reduce(key, || get(p, ref_count, is_done) );
2011-08-25 22:36:55 +00:00
}
fn map_reduce<K1: Copy Send, K2: Const Copy Send hash_key, V: Copy Send>(
map: mapper<K1, K2, V>,
reduce: reducer<K2, V>,
inputs: ~[K1])
{
let mut ctrl = ~[];
2011-08-25 22:36:55 +00:00
// This task becomes the master control task. It task::_spawns
// to do the rest.
let reducers = mk_hash();
let mut tasks = start_mappers(map, ctrl, inputs);
let mut num_mappers = vec::len(inputs) as int;
2011-08-25 22:36:55 +00:00
while num_mappers > 0 {
let (_ready, message, ctrls) = pipes::select(ctrl);
2012-08-06 19:34:08 +00:00
match option::unwrap(message) {
2012-08-04 02:59:04 +00:00
ctrl_proto::mapper_done => {
2012-08-23 00:24:52 +00:00
// error!("received mapper terminated.");
2011-08-25 22:36:55 +00:00
num_mappers -= 1;
ctrl = ctrls;
2011-08-25 22:36:55 +00:00
}
2012-08-04 02:59:04 +00:00
ctrl_proto::find_reducer(k, cc) => {
2011-08-25 22:36:55 +00:00
let c;
// log(error, "finding reducer for " + k);
2012-08-06 19:34:08 +00:00
match reducers.find(k) {
2012-08-20 19:23:37 +00:00
Some(_c) => {
// log(error,
// "reusing existing reducer for " + k);
2011-08-25 22:36:55 +00:00
c = _c;
}
2012-08-20 19:23:37 +00:00
None => {
// log(error, "creating new reducer for " + k);
2012-08-27 21:22:25 +00:00
let p = Port();
let ch = Chan(p);
2011-08-25 22:36:55 +00:00
let r = reduce, kk = k;
vec::push(tasks,
spawn_joinable(|| reduce_task(r, kk, ch) ));
2011-08-25 22:36:55 +00:00
c = recv(p);
reducers.insert(k, c);
2011-08-25 22:36:55 +00:00
}
}
ctrl = vec::append_one(
ctrls,
2012-08-23 00:24:52 +00:00
ctrl_proto::server::reducer(move_out!(cc), c));
2011-08-25 22:36:55 +00:00
}
}
}
for reducers.each_value |v| { send(v, done) }
2011-08-25 22:36:55 +00:00
2012-06-30 23:19:07 +00:00
for tasks.each |t| { join(t); }
2011-08-25 22:36:55 +00:00
}
}
fn main(argv: ~[~str]) {
if vec::len(argv) < 2u && !os::getenv(~"RUST_BENCH").is_some() {
2011-08-25 22:36:55 +00:00
let out = io::stdout();
2012-08-23 00:24:52 +00:00
out.write_line(fmt!("Usage: %s <filename> ...", argv[0]));
2011-08-25 22:36:55 +00:00
2012-08-02 00:30:05 +00:00
return;
2011-08-25 22:36:55 +00:00
}
let readers: ~[fn~() -> word_reader] = if argv.len() >= 2 {
vec::view(argv, 1u, argv.len()).map(
|f| fn~() -> word_reader { file_word_reader(f) } )
}
else {
let num_readers = 50;
let words_per_reader = 600;
vec::from_fn(
num_readers,
|_i| fn~() -> word_reader {
random_word_reader(words_per_reader) as word_reader
})
};
2011-08-25 22:36:55 +00:00
let start = time::precise_time_ns();
map_reduce::map_reduce(map, reduce, readers);
2011-08-25 22:36:55 +00:00
let stop = time::precise_time_ns();
let elapsed = (stop - start) / 1000000u64;
2011-08-25 22:36:55 +00:00
log(error, ~"MapReduce completed in "
+ u64::str(elapsed) + ~"ms");
2011-08-25 22:36:55 +00:00
}
2012-08-20 19:23:37 +00:00
fn read_word(r: io::Reader) -> Option<~str> {
let mut w = ~"";
2011-08-25 22:36:55 +00:00
while !r.eof() {
let c = r.read_char();
if is_word_char(c) {
w += str::from_char(c);
2012-08-20 19:23:37 +00:00
} else { if w != ~"" { return Some(w); } }
2011-08-25 22:36:55 +00:00
}
2012-08-20 19:23:37 +00:00
return None;
2011-08-25 22:36:55 +00:00
}
fn is_word_char(c: char) -> bool {
char::is_alphabetic(c) || char::is_digit(c) || c == '_'
}
2012-08-16 01:46:55 +00:00
struct random_word_reader: word_reader {
2012-09-07 02:40:15 +00:00
mut remaining: uint,
rng: rand::Rng,
2012-08-20 19:23:37 +00:00
fn read_word() -> Option<~str> {
if self.remaining > 0 {
self.remaining -= 1;
let len = self.rng.gen_uint_range(1, 4);
2012-08-20 19:23:37 +00:00
Some(self.rng.gen_str(len))
}
2012-08-20 19:23:37 +00:00
else { None }
}
}
2012-09-05 22:58:43 +00:00
fn random_word_reader(count: uint) -> random_word_reader {
random_word_reader {
remaining: count,
rng: rand::Rng()
}
}