Auto merge of #25839 - bluss:str-split-at-impl, r=alexcrichton

Implement RFC rust-lang/rfcs#1123 Add str method str::split_at(mid: usize) -> (&str, &str). Also a minor cleanup in the collections::str module. Remove redundant slicing of self.
2024-11-23 07:14:28 +00:00 · 2015-06-11 00:22:27 +00:00 · 2015-06-11 00:22:27 +00:00 · fbb13543fc
commit fbb13543fc
parent 01ab4f761c 1112a05b8f
3 changed files with 105 additions and 45 deletions
--- a/src/libcollections/str.rs
+++ b/src/libcollections/str.rs
@ -439,7 +439,7 @@ impl str {
    #[stable(feature = "rust1", since = "1.0.0")]
    #[inline]
    pub fn len(&self) -> usize {
-        core_str::StrExt::len(&self[..])
+        core_str::StrExt::len(self)
    }

    /// Returns true if this slice has a length of zero bytes.
@ -452,7 +452,7 @@ impl str {
    #[inline]
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn is_empty(&self) -> bool {
-        core_str::StrExt::is_empty(&self[..])
+        core_str::StrExt::is_empty(self)
    }

    /// Returns a string's displayed width in columns.
@ -473,7 +473,7 @@ impl str {
    #[unstable(feature = "unicode",
               reason = "this functionality may only be provided by libunicode")]
    pub fn width(&self, is_cjk: bool) -> usize {
-        UnicodeStr::width(&self[..], is_cjk)
+        UnicodeStr::width(self, is_cjk)
    }

    /// Checks that `index`-th byte lies at the start and/or end of a
@ -509,7 +509,7 @@ impl str {
                         this method may want to be replaced with checked \
                         slicing")]
    pub fn is_char_boundary(&self, index: usize) -> bool {
-        core_str::StrExt::is_char_boundary(&self[..], index)
+        core_str::StrExt::is_char_boundary(self, index)
    }

    /// Converts `self` to a byte slice.
@ -522,7 +522,7 @@ impl str {
    #[stable(feature = "rust1", since = "1.0.0")]
    #[inline(always)]
    pub fn as_bytes(&self) -> &[u8] {
-        core_str::StrExt::as_bytes(&self[..])
+        core_str::StrExt::as_bytes(self)
    }

    /// Returns a raw pointer to the `&str`'s buffer.
@ -540,7 +540,7 @@ impl str {
    #[stable(feature = "rust1", since = "1.0.0")]
    #[inline]
    pub fn as_ptr(&self) -> *const u8 {
-        core_str::StrExt::as_ptr(&self[..])
+        core_str::StrExt::as_ptr(self)
    }

    /// Takes a bytewise slice from a string.
@ -564,7 +564,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
-        core_str::StrExt::slice_unchecked(&self[..], begin, end)
+        core_str::StrExt::slice_unchecked(self, begin, end)
    }

    /// Returns a slice of the string from the character range [`begin`..`end`).
@ -594,7 +594,7 @@ impl str {
    #[unstable(feature = "collections",
               reason = "may have yet to prove its worth")]
    pub fn slice_chars(&self, begin: usize, end: usize) -> &str {
-        core_str::StrExt::slice_chars(&self[..], begin, end)
+        core_str::StrExt::slice_chars(self, begin, end)
    }

    /// Given a byte position, return the next char and its index.
@ -644,7 +644,7 @@ impl str {
                         be removed in favor of just char_at() or eventually \
                         removed altogether")]
    pub fn char_range_at(&self, start: usize) -> CharRange {
-        core_str::StrExt::char_range_at(&self[..], start)
+        core_str::StrExt::char_range_at(self, start)
    }

    /// Given a byte position, return the previous `char` and its position.
@ -696,7 +696,7 @@ impl str {
                         be removed in favor of just char_at_reverse() or \
                         eventually removed altogether")]
    pub fn char_range_at_reverse(&self, start: usize) -> CharRange {
-        core_str::StrExt::char_range_at_reverse(&self[..], start)
+        core_str::StrExt::char_range_at_reverse(self, start)
    }

    /// Given a byte position, return the `char` at that position.
@ -721,7 +721,7 @@ impl str {
                         iterators or by getting the first char from a \
                         subslice")]
    pub fn char_at(&self, i: usize) -> char {
-        core_str::StrExt::char_at(&self[..], i)
+        core_str::StrExt::char_at(self, i)
    }

    /// Given a byte position, return the `char` at that position, counting
@ -745,7 +745,7 @@ impl str {
                         are also somewhat unclear, especially with which \
                         cases generate panics")]
    pub fn char_at_reverse(&self, i: usize) -> char {
-        core_str::StrExt::char_at_reverse(&self[..], i)
+        core_str::StrExt::char_at_reverse(self, i)
    }

    /// Retrieves the first character from a `&str` and returns it.
@ -776,7 +776,34 @@ impl str {
                         may not be warranted with the existence of the chars \
                         and/or char_indices iterators")]
    pub fn slice_shift_char(&self) -> Option<(char, &str)> {
-        core_str::StrExt::slice_shift_char(&self[..])
+        core_str::StrExt::slice_shift_char(self)
+    }
+
+    /// Divide one string slice into two at an index.
+    ///
+    /// The index `mid` is a byte offset from the start of the string
+    /// that must be on a character boundary.
+    ///
+    /// Return slices `&self[..mid]` and `&self[mid..]`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `mid` is beyond the last character of the string,
+    /// or if it is not on a character boundary.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(collections)]
+    /// let s = "Löwe 老虎 Léopard";
+    /// let first_space = s.find(' ').unwrap_or(s.len());
+    /// let (a, b) = s.split_at(first_space);
+    ///
+    /// assert_eq!(a, "Löwe");
+    /// assert_eq!(b, " 老虎 Léopard");
+    /// ```
+    #[inline]
+    pub fn split_at(&self, mid: usize) -> (&str, &str) {
+        core_str::StrExt::split_at(self, mid)
    }

    /// An iterator over the codepoints of `self`.
@ -790,7 +817,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn chars(&self) -> Chars {
-        core_str::StrExt::chars(&self[..])
+        core_str::StrExt::chars(self)
    }

    /// An iterator over the characters of `self` and their byte offsets.
@ -805,7 +832,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn char_indices(&self) -> CharIndices {
-        core_str::StrExt::char_indices(&self[..])
+        core_str::StrExt::char_indices(self)
    }

    /// An iterator over the bytes of `self`.
@ -819,7 +846,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn bytes(&self) -> Bytes {
-        core_str::StrExt::bytes(&self[..])
+        core_str::StrExt::bytes(self)
    }

    /// An iterator over the non-empty substrings of `self` which contain no whitespace,
@ -835,7 +862,7 @@ impl str {
    /// ```
    #[stable(feature = "split_whitespace", since = "1.1.0")]
    pub fn split_whitespace(&self) -> SplitWhitespace {
-        UnicodeStr::split_whitespace(&self[..])
+        UnicodeStr::split_whitespace(self)
    }

    /// An iterator over the non-empty substrings of `self` which contain no whitespace,
@ -857,7 +884,7 @@ impl str {
               reason = "the precise algorithm to use is unclear")]
    #[allow(deprecated)]
    pub fn words(&self) -> Words {
-        UnicodeStr::words(&self[..])
+        UnicodeStr::words(self)
    }

    /// An iterator over the lines of a string, separated by `\n`.
@ -883,7 +910,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn lines(&self) -> Lines {
-        core_str::StrExt::lines(&self[..])
+        core_str::StrExt::lines(self)
    }

    /// An iterator over the lines of a string, separated by either
@ -910,7 +937,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn lines_any(&self) -> LinesAny {
-        core_str::StrExt::lines_any(&self[..])
+        core_str::StrExt::lines_any(self)
    }

    /// Returns an iterator over the string in Unicode Normalization Form D
@ -1016,7 +1043,7 @@ impl str {
    #[unstable(feature = "unicode",
               reason = "this functionality may only be provided by libunicode")]
    pub fn graphemes(&self, is_extended: bool) -> Graphemes {
-        UnicodeStr::graphemes(&self[..], is_extended)
+        UnicodeStr::graphemes(self, is_extended)
    }

    /// Returns an iterator over the grapheme clusters of `self` and their
@ -1037,7 +1064,7 @@ impl str {
    #[unstable(feature = "unicode",
               reason = "this functionality may only be provided by libunicode")]
    pub fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
-        UnicodeStr::grapheme_indices(&self[..], is_extended)
+        UnicodeStr::grapheme_indices(self, is_extended)
    }

    /// Returns an iterator of `u16` over the string encoded as UTF-16.
@ -1058,7 +1085,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
-        core_str::StrExt::contains(&self[..], pat)
+        core_str::StrExt::contains(self, pat)
    }

    /// Returns `true` if the given `&str` is a prefix of the string.
@ -1070,7 +1097,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
-        core_str::StrExt::starts_with(&self[..], pat)
+        core_str::StrExt::starts_with(self, pat)
    }

    /// Returns true if the given `&str` is a suffix of the string.
@ -1084,7 +1111,7 @@ impl str {
    pub fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
        where P::Searcher: ReverseSearcher<'a>
    {
-        core_str::StrExt::ends_with(&self[..], pat)
+        core_str::StrExt::ends_with(self, pat)
    }

    /// Returns the byte index of the first character of `self` that matches
@ -1129,7 +1156,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
-        core_str::StrExt::find(&self[..], pat)
+        core_str::StrExt::find(self, pat)
    }

    /// Returns the byte index of the last character of `self` that
@ -1173,7 +1200,7 @@ impl str {
    pub fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
        where P::Searcher: ReverseSearcher<'a>
    {
-        core_str::StrExt::rfind(&self[..], pat)
+        core_str::StrExt::rfind(self, pat)
    }

    /// An iterator over substrings of `self`, separated by characters
@ -1251,7 +1278,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
-        core_str::StrExt::split(&self[..], pat)
+        core_str::StrExt::split(self, pat)
    }

    /// An iterator over substrings of `self`, separated by characters
@ -1299,7 +1326,7 @@ impl str {
    pub fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
        where P::Searcher: ReverseSearcher<'a>
    {
-        core_str::StrExt::rsplit(&self[..], pat)
+        core_str::StrExt::rsplit(self, pat)
    }

    /// An iterator over substrings of `self`, separated by characters
@ -1337,7 +1364,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
-        core_str::StrExt::split_terminator(&self[..], pat)
+        core_str::StrExt::split_terminator(self, pat)
    }

    /// An iterator over substrings of `self`, separated by characters
@ -1375,7 +1402,7 @@ impl str {
    pub fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
        where P::Searcher: ReverseSearcher<'a>
    {
-        core_str::StrExt::rsplit_terminator(&self[..], pat)
+        core_str::StrExt::rsplit_terminator(self, pat)
    }

    /// An iterator over substrings of `self`, separated by a pattern,
@ -1422,7 +1449,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
-        core_str::StrExt::splitn(&self[..], count, pat)
+        core_str::StrExt::splitn(self, count, pat)
    }

    /// An iterator over substrings of `self`, separated by a pattern,
@ -1469,7 +1496,7 @@ impl str {
    pub fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
        where P::Searcher: ReverseSearcher<'a>
    {
-        core_str::StrExt::rsplitn(&self[..], count, pat)
+        core_str::StrExt::rsplitn(self, count, pat)
    }

    /// An iterator over the matches of a pattern within `self`.
@ -1503,7 +1530,7 @@ impl str {
    #[unstable(feature = "collections",
               reason = "method got recently added")]
    pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
-        core_str::StrExt::matches(&self[..], pat)
+        core_str::StrExt::matches(self, pat)
    }

    /// An iterator over the matches of a pattern within `self`, yielded in
@ -1538,7 +1565,7 @@ impl str {
    pub fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
        where P::Searcher: ReverseSearcher<'a>
    {
-        core_str::StrExt::rmatches(&self[..], pat)
+        core_str::StrExt::rmatches(self, pat)
    }

    /// An iterator over the start and end indices of the disjoint matches
@ -1583,7 +1610,7 @@ impl str {
    // NB: Right now MatchIndices yields `(usize, usize)`, but it would
    // be more consistent with `matches` and `char_indices` to return `(usize, &str)`
    pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
-        core_str::StrExt::match_indices(&self[..], pat)
+        core_str::StrExt::match_indices(self, pat)
    }

    /// An iterator over the start and end indices of the disjoint matches of
@ -1629,7 +1656,7 @@ impl str {
    pub fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
        where P::Searcher: ReverseSearcher<'a>
    {
-        core_str::StrExt::rmatch_indices(&self[..], pat)
+        core_str::StrExt::rmatch_indices(self, pat)
    }

    /// Returns the byte offset of an inner slice relative to an enclosing
@ -1653,7 +1680,7 @@ impl str {
    #[unstable(feature = "collections",
               reason = "awaiting convention about comparability of arbitrary slices")]
    pub fn subslice_offset(&self, inner: &str) -> usize {
-        core_str::StrExt::subslice_offset(&self[..], inner)
+        core_str::StrExt::subslice_offset(self, inner)
    }

    /// Returns a `&str` with leading and trailing whitespace removed.
@ -1666,7 +1693,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn trim(&self) -> &str {
-        UnicodeStr::trim(&self[..])
+        UnicodeStr::trim(self)
    }

    /// Returns a `&str` with leading whitespace removed.
@ -1679,7 +1706,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn trim_left(&self) -> &str {
-        UnicodeStr::trim_left(&self[..])
+        UnicodeStr::trim_left(self)
    }

    /// Returns a `&str` with trailing whitespace removed.
@ -1692,7 +1719,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn trim_right(&self) -> &str {
-        UnicodeStr::trim_right(&self[..])
+        UnicodeStr::trim_right(self)
    }

    /// Returns a string with all pre- and suffixes that match a pattern
@ -1722,7 +1749,7 @@ impl str {
    pub fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
        where P::Searcher: DoubleEndedSearcher<'a>
    {
-        core_str::StrExt::trim_matches(&self[..], pat)
+        core_str::StrExt::trim_matches(self, pat)
    }

    /// Returns a string with all prefixes that match a pattern
@ -1742,7 +1769,7 @@ impl str {
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
-        core_str::StrExt::trim_left_matches(&self[..], pat)
+        core_str::StrExt::trim_left_matches(self, pat)
    }

    /// Returns a string with all suffixes that match a pattern
@ -1772,7 +1799,7 @@ impl str {
    pub fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
        where P::Searcher: ReverseSearcher<'a>
    {
-        core_str::StrExt::trim_right_matches(&self[..], pat)
+        core_str::StrExt::trim_right_matches(self, pat)
    }

    /// Parses `self` into the specified type.
@ -1795,7 +1822,7 @@ impl str {
    #[inline]
    #[stable(feature = "rust1", since = "1.0.0")]
    pub fn parse<F: FromStr>(&self) -> Result<F, F::Err> {
-        core_str::StrExt::parse(&self[..])
+        core_str::StrExt::parse(self)
    }

    /// Replaces all occurrences of one string with another.
--- a/src/libcollectionstest/str.rs
+++ b/src/libcollectionstest/str.rs
@ -688,6 +688,26 @@ fn test_char_at_reverse() {
    }
 }

+#[test]
+fn test_split_at() {
+    let s = "ศไทย中华Việt Nam";
+    for (index, _) in s.char_indices() {
+        let (a, b) = s.split_at(index);
+        assert_eq!(&s[..a.len()], a);
+        assert_eq!(&s[a.len()..], b);
+    }
+    let (a, b) = s.split_at(s.len());
+    assert_eq!(a, s);
+    assert_eq!(b, "");
+}
+
+#[test]
+#[should_panic]
+fn test_split_at_boundscheck() {
+    let s = "ศไทย中华Việt Nam";
+    let (a, b) = s.split_at(1);
+}
+
 #[test]
 fn test_escape_unicode() {
    assert_eq!("abc".escape_unicode(),
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@ -1517,6 +1517,7 @@ pub trait StrExt {
    fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
        where P::Searcher: ReverseSearcher<'a>;
    fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
+    fn split_at(&self, mid: usize) -> (&str, &str);
    fn slice_shift_char<'a>(&'a self) -> Option<(char, &'a str)>;
    fn subslice_offset(&self, inner: &str) -> usize;
    fn as_ptr(&self) -> *const u8;
@ -1809,6 +1810,18 @@ impl StrExt for str {
        self.find(pat)
    }

+    fn split_at(&self, mid: usize) -> (&str, &str) {
+        // is_char_boundary checks that the index is in [0, .len()]
+        if self.is_char_boundary(mid) {
+            unsafe {
+                (self.slice_unchecked(0, mid),
+                 self.slice_unchecked(mid, self.len()))
+            }
+        } else {
+            slice_error_fail(self, 0, mid)
+        }
+    }
+
    #[inline]
    fn slice_shift_char(&self) -> Option<(char, &str)> {
        if self.is_empty() {