mirror of
https://github.com/rust-lang/rust.git
synced 2025-01-19 11:12:43 +00:00
Add string_buffer (not used yet)
This commit is contained in:
parent
7417ab5aed
commit
4fd5f86732
291
src/string_buffer.rs
Normal file
291
src/string_buffer.rs
Normal file
@ -0,0 +1,291 @@
|
||||
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// A specialised string-like structure that is optimised for appending text and
|
||||
// sequential reading.
|
||||
|
||||
// TODO
|
||||
// Debug
|
||||
// docs
|
||||
// char iterator
|
||||
// Chars -> CharsAndPos
|
||||
// Eq
|
||||
|
||||
extern crate unicode;
|
||||
|
||||
use std::str::FromStr;
|
||||
use std::{cmp, fmt};
|
||||
|
||||
const MAX_CAPACITY: usize = 0xffff;
|
||||
const INIT_CAPACITY: usize = 0xff;
|
||||
|
||||
pub struct StringBuffer {
|
||||
first: Box<StringNode>,
|
||||
// last: &self StringNode
|
||||
// Optimisation that saves us from walking the whole list of nodes everytime
|
||||
// we append a string.
|
||||
last: *mut StringNode,
|
||||
// The length of the whole StringBuffer.
|
||||
len: usize,
|
||||
}
|
||||
|
||||
pub struct Chars<'a> {
|
||||
// Node we're currently iterating over.
|
||||
cur_node: &'a StringNode,
|
||||
// Byte in cur_node.
|
||||
cur_byte: usize,
|
||||
// Byte since start of StringBuffer.
|
||||
abs_byte: usize,
|
||||
}
|
||||
|
||||
struct StringNode {
|
||||
data: String,
|
||||
next: Option<Box<StringNode>>,
|
||||
}
|
||||
|
||||
impl StringBuffer {
|
||||
pub fn new() -> StringBuffer {
|
||||
StringBuffer::with_capacity(INIT_CAPACITY)
|
||||
}
|
||||
|
||||
pub fn with_capacity(capacity: usize) -> StringBuffer {
|
||||
let mut result = StringBuffer {
|
||||
first: box StringNode::with_capacity(capacity),
|
||||
last: 0 as *mut StringNode,
|
||||
len: 0,
|
||||
};
|
||||
result.last = &mut *result.first;
|
||||
result
|
||||
}
|
||||
|
||||
pub fn push_str(&mut self, text: &str) {
|
||||
self.len += text.len();
|
||||
unsafe {
|
||||
// Safety invariant: the `last` field will only ever point to
|
||||
// a node owned by self, and will live until destruction of self.
|
||||
self.last = (&mut *self.last).push_str(text);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn chars<'a>(&'a self) -> Chars<'a> {
|
||||
Chars::new(&self.first)
|
||||
}
|
||||
}
|
||||
|
||||
impl StringNode {
|
||||
fn with_capacity(capacity: usize) -> StringNode {
|
||||
StringNode {
|
||||
data: String::with_capacity(capacity),
|
||||
next: None,
|
||||
}
|
||||
}
|
||||
|
||||
// Returns a reference to the new last node.
|
||||
fn push_str(&mut self, text: &str) -> &mut StringNode {
|
||||
if let Some(ref mut n) = self.next {
|
||||
return n.push_str(text);
|
||||
}
|
||||
|
||||
if self.data.capacity() - self.data.len() >= text.len() {
|
||||
self.data.push_str(text);
|
||||
self
|
||||
} else {
|
||||
self.data.shrink_to_fit();
|
||||
let next_cap = cmp::min(cmp::max(self.data.capacity(),
|
||||
INIT_CAPACITY) * 2,
|
||||
MAX_CAPACITY);
|
||||
let next_cap = cmp::max(next_cap, text.len());
|
||||
self.next = Some(box StringNode::with_capacity(next_cap));
|
||||
let next = self.next.as_mut().unwrap();
|
||||
next.push_str(text);
|
||||
&mut **next
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for StringBuffer {
|
||||
type Err = ();
|
||||
fn from_str(text: &str) -> Result<StringBuffer, ()> {
|
||||
let mut result = StringBuffer::with_capacity(cmp::max(INIT_CAPACITY, text.len()));
|
||||
result.push_str(text);
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for StringBuffer {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
fn fmt_node(node: &StringNode, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
try!(write!(fmt, "{}", node.data));
|
||||
if let Some(ref n) = node.next {
|
||||
fmt_node(n, fmt)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fmt_node(&self.first, fmt)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Chars<'a> {
|
||||
type Item = (char, usize);
|
||||
|
||||
fn next(&mut self) -> Option<(char, usize)> {
|
||||
while self.cur_byte >= self.cur_node.data.len() {
|
||||
if let Some(ref n) = self.cur_node.next {
|
||||
self.cur_byte = 0;
|
||||
self.cur_node = n;
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
let byte = self.abs_byte;
|
||||
let result = self.read_char();
|
||||
|
||||
return Some((result, byte));
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Chars<'a> {
|
||||
fn new<'b>(first_node: &'b StringNode) -> Chars<'b> {
|
||||
Chars {
|
||||
cur_node: first_node,
|
||||
cur_byte: 0,
|
||||
abs_byte: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn read_char(&mut self) -> char {
|
||||
let first_byte = self.read_byte();
|
||||
let width = unicode::str::utf8_char_width(first_byte);
|
||||
if width == 1 {
|
||||
return first_byte as char
|
||||
}
|
||||
if width == 0 {
|
||||
panic!("non-utf8 char in StringBuffer");
|
||||
}
|
||||
let mut buf = [first_byte, 0, 0, 0];
|
||||
{
|
||||
let mut start = 1;
|
||||
while start < width {
|
||||
buf[start] = self.read_byte();
|
||||
start += 1;
|
||||
}
|
||||
}
|
||||
match ::std::str::from_utf8(&buf[..width]).ok() {
|
||||
Some(s) => s.char_at(0),
|
||||
None => panic!("bad chars in StringBuffer")
|
||||
}
|
||||
}
|
||||
|
||||
fn read_byte(&mut self) -> u8 {
|
||||
let result = self.cur_node.data.as_bytes()[self.cur_byte];
|
||||
self.cur_byte += 1;
|
||||
self.abs_byte += 1;
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
// Bug #23157
|
||||
use super::{StringNode, INIT_CAPACITY};
|
||||
|
||||
#[test]
|
||||
fn test_new() {
|
||||
let s = StringBuffer::new();
|
||||
assert!(s.len == 0);
|
||||
assert!(s.to_string() == "");
|
||||
assert!(count_nodes(&s) == 1);
|
||||
assert!(first_capacity(&s) == INIT_CAPACITY);
|
||||
|
||||
let s = StringBuffer::with_capacity(64);
|
||||
assert!(s.len == 0);
|
||||
assert!(s.to_string() == "");
|
||||
assert!(count_nodes(&s) == 1);
|
||||
assert!(first_capacity(&s) == 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_str() {
|
||||
let s: StringBuffer = "Hello".parse().unwrap();
|
||||
assert!(s.len == 5);
|
||||
assert!(s.to_string() == "Hello");
|
||||
assert!(count_nodes(&s) == 1);
|
||||
assert!(first_capacity(&s) == INIT_CAPACITY);
|
||||
|
||||
let expected = "Hello";
|
||||
for ((i, (c, b)), cc) in s.chars().enumerate().zip(expected.chars()) {
|
||||
assert!(c == cc);
|
||||
assert!(i == b);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_push_str() {
|
||||
let mut s: StringBuffer = "Hello".parse().unwrap();
|
||||
assert!(first_capacity(&s) == INIT_CAPACITY);
|
||||
|
||||
s.push_str(" world!");
|
||||
assert!(s.to_string() == "Hello world!");
|
||||
assert!(s.len == 12);
|
||||
s.push_str(" foo");
|
||||
assert!(s.to_string() == "Hello world! foo");
|
||||
assert!(s.len == 16);
|
||||
|
||||
assert!(count_nodes(&s) == 1);
|
||||
|
||||
let expected = "Hello world! foo";
|
||||
for ((i, (c, b)), cc) in s.chars().enumerate().zip(expected.chars()) {
|
||||
assert!(c == cc);
|
||||
assert!(i == b);
|
||||
}
|
||||
}
|
||||
|
||||
// push_str requiring multiple nodes
|
||||
#[test]
|
||||
fn test_push_str_multi() {
|
||||
let mut s: StringBuffer = StringBuffer::with_capacity(2);
|
||||
assert!(first_capacity(&s) == 2);
|
||||
|
||||
s.push_str("Hello");
|
||||
assert!(s.to_string() == "Hello");
|
||||
assert!(s.len == 5);
|
||||
assert!(count_nodes(&s) == 2);
|
||||
s.push_str(" world!");
|
||||
assert!(s.to_string() == "Hello world!");
|
||||
assert!(s.len == 12);
|
||||
assert!(count_nodes(&s) == 2);
|
||||
|
||||
let expected = "Hello world!";
|
||||
for ((i, (c, b)), cc) in s.chars().enumerate().zip(expected.chars()) {
|
||||
assert!(c == cc);
|
||||
assert!(i == b);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO test unicode
|
||||
|
||||
fn count_nodes(s: &StringBuffer) -> usize {
|
||||
count_nodes_from(&s.first)
|
||||
}
|
||||
fn count_nodes_from(s: &StringNode) -> usize {
|
||||
match s.next {
|
||||
Some(ref n) => 1 + count_nodes_from(n),
|
||||
None => 1,
|
||||
}
|
||||
}
|
||||
fn first_capacity(s: &StringBuffer) -> usize {
|
||||
s.first.data.capacity()
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user