diff --git a/Cargo.lock b/Cargo.lock index be9e8be98ca..63854ccae46 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,7 @@ dependencies = [ "strings 0.0.1 (git+https://github.com/nrc/strings.rs.git)", "term 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "toml 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-segmentation 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -87,6 +88,11 @@ dependencies = [ "rustc-serialize 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "unicode-segmentation" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "winapi" version = "0.2.2" diff --git a/Cargo.toml b/Cargo.toml index 4716be7098e..377c88a955b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,8 @@ git = "https://github.com/nrc/strings.rs.git" [dependencies] toml = "0.1.20" rustc-serialize = "0.3.14" +unicode-segmentation = "0.1.2" +regex = "0.1.41" [dev-dependencies] diff = "0.1.0" diff --git a/src/expr.rs b/src/expr.rs index aac13f552d0..66a4327a7ac 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -28,8 +28,8 @@ impl Rewrite for ast::Expr { match self.node { ast::Expr_::ExprLit(ref l) => { match l.node { - ast::Lit_::LitStr(ref is, ast::StrStyle::CookedStr) => { - rewrite_string_lit(context, &is, l.span, width, offset) + ast::Lit_::LitStr(_, ast::StrStyle::CookedStr) => { + rewrite_string_lit(context, l.span, width, offset) } _ => Some(context.snippet(self.span)), } @@ -823,7 +823,6 @@ fn rewrite_pat_expr(context: &RewriteContext, } fn rewrite_string_lit(context: &RewriteContext, - s: &str, span: Span, width: usize, offset: usize) @@ -842,7 +841,10 @@ fn rewrite_string_lit(context: &RewriteContext, trim_end: false, }; - Some(rewrite_string(&s.escape_default(), &fmt)) + let string_lit = context.snippet(span); + let str_lit = &string_lit[1..string_lit.len() - 1]; // Remove the quote characters. + + Some(rewrite_string(str_lit, &fmt)) } fn rewrite_call(context: &RewriteContext, diff --git a/src/lib.rs b/src/lib.rs index 8214012a908..ab748f62a95 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,8 +9,6 @@ // except according to those terms. #![feature(rustc_private)] -#![feature(str_escape)] -#![feature(str_char)] #![feature(custom_attribute)] #![allow(unused_attributes)] @@ -30,6 +28,9 @@ extern crate rustc_serialize; extern crate strings; +extern crate unicode_segmentation; +extern crate regex; + use rustc::session::Session; use rustc::session::config as rustc_config; use rustc::session::config::Input; diff --git a/src/string.rs b/src/string.rs index 413237e182c..ba93b2db81d 100644 --- a/src/string.rs +++ b/src/string.rs @@ -10,7 +10,12 @@ // Format string literals. -use utils::{make_indent, next_char, prev_char, round_up_to_power_of_two}; + + +use unicode_segmentation::UnicodeSegmentation; +use regex::Regex; + +use utils::{make_indent, round_up_to_power_of_two}; use MIN_STRING; @@ -26,8 +31,12 @@ pub struct StringFormat<'a> { // TODO: simplify this! pub fn rewrite_string<'a>(s: &str, fmt: &StringFormat<'a>) -> String { - // FIXME I bet this stomps unicode escapes in the source string // TODO if lo.col > IDEAL - 10, start a new line (need cur indent for that) + // Strip line breaks. + let re = Regex::new(r"(\\[:space:]+)").unwrap(); + let stripped_str = re.replace_all(s, ""); + + let graphemes = UnicodeSegmentation::graphemes(&*stripped_str, false).collect::>(); let indent = make_indent(fmt.offset); let indent = &indent; @@ -39,41 +48,36 @@ pub fn rewrite_string<'a>(s: &str, fmt: &StringFormat<'a>) -> String { let ender_length = fmt.line_end.len(); let max_chars = fmt.width.checked_sub(fmt.opener.len()).unwrap_or(0) .checked_sub(ender_length).unwrap_or(1); - loop { let mut cur_end = cur_start + max_chars; - if cur_end >= s.len() { - result.push_str(&s[cur_start..]); + if cur_end >= graphemes.len() { + let line = &graphemes[cur_start..].join(""); + result.push_str(line); break; } - - // Make sure we're on a char boundary. - cur_end = next_char(&s, cur_end); - // Push cur_end left until we reach whitespace. - while !s.char_at(cur_end - 1).is_whitespace() { - cur_end = prev_char(&s, cur_end); - + while !(graphemes[cur_end - 1].trim().len() == 0) { + cur_end -= 1; if cur_end - cur_start < MIN_STRING { // We can't break at whitespace, fall back to splitting // anywhere that doesn't break an escape sequence. - cur_end = next_char(&s, cur_start + max_chars); - while s.char_at(prev_char(&s, cur_end)) == '\\' { - cur_end = prev_char(&s, cur_end); + cur_end = cur_start + max_chars; + while graphemes[cur_end - 1] == "\\" { + cur_end -= 1; } break; } } // Make sure there is no whitespace to the right of the break. - while cur_end < s.len() && s.char_at(cur_end).is_whitespace() { - cur_end = next_char(&s, cur_end + 1); + while cur_end < s.len() && graphemes[cur_end].trim().len() == 0 { + cur_end += 1; } - + let raw_line = graphemes[cur_start..cur_end].join(""); let line: &str = if fmt.trim_end { - &s[cur_start..cur_end].trim_right_matches(char::is_whitespace) + &(raw_line.trim()) } else { - &s[cur_start..cur_end] + &raw_line }; result.push_str(line); diff --git a/src/utils.rs b/src/utils.rs index 00e18c3a65b..936a712ca18 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -32,31 +32,6 @@ pub fn span_after(original: Span, needle: &str, codemap: &CodeMap) -> BytePos { original.lo + BytePos(snippet.find_uncommented(needle).unwrap() as u32 + 1) } -#[inline] -pub fn prev_char(s: &str, mut i: usize) -> usize { - if i == 0 { - return 0; - } - - i -= 1; - while !s.is_char_boundary(i) { - i -= 1; - } - i -} - -#[inline] -pub fn next_char(s: &str, mut i: usize) -> usize { - if i >= s.len() { - return s.len(); - } - - while !s.is_char_boundary(i) { - i += 1; - } - i -} - #[inline] pub fn make_indent(width: usize) -> String { let mut indent = String::with_capacity(width); diff --git a/tests/source/string-lit.rs b/tests/source/string-lit.rs index d7e57ea65c7..e95aaae75e0 100644 --- a/tests/source/string-lit.rs +++ b/tests/source/string-lit.rs @@ -24,6 +24,11 @@ formatting"#; let xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx = funktion("yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"); + + let unicode = "a̐éö̲\r\n"; + let unicode2 = "Löwe 老虎 Léopard"; + let unicode3 = "中华Việt Nam"; + let unicode4 = "☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃"; "stuff" } diff --git a/tests/target/string-lit.rs b/tests/target/string-lit.rs index 2107d902e19..21cdc199d6e 100644 --- a/tests/target/string-lit.rs +++ b/tests/target/string-lit.rs @@ -30,5 +30,11 @@ formatting"#; yyyyyyyyyyyyyyyyyyyyy\ yyyyyyyyyy"); + let unicode = "a̐éö̲\r\n"; + let unicode2 = "Löwe 老虎 Léopard"; + let unicode3 = "中华Việt Nam"; + let unicode4 = "☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃\ + ☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃"; + "stuff" }