mirror of
https://github.com/rust-lang/rust.git
synced 2025-02-08 21:13:55 +00:00
std: Rename str::Normalizations to str::Decompositions
The Normalizations iterator has been renamed to Decompositions. It does not currently include all forms of Unicode normalization, but only encompasses decompositions. If implemented recomposition would likely be a separate iterator which works on the result of this one. [breaking-change]
This commit is contained in:
parent
8c54d5bf40
commit
df802a2754
@ -256,13 +256,13 @@ def format_table_content(f, content, indent):
|
|||||||
line = " "*indent + chunk
|
line = " "*indent + chunk
|
||||||
f.write(line)
|
f.write(line)
|
||||||
|
|
||||||
def emit_core_decomp_module(f, canon, compat):
|
def emit_core_norm_module(f, canon, compat):
|
||||||
canon_keys = canon.keys()
|
canon_keys = canon.keys()
|
||||||
canon_keys.sort()
|
canon_keys.sort()
|
||||||
|
|
||||||
compat_keys = compat.keys()
|
compat_keys = compat.keys()
|
||||||
compat_keys.sort()
|
compat_keys.sort()
|
||||||
f.write("pub mod decompose {\n");
|
f.write("pub mod normalization {\n");
|
||||||
f.write(" use option::Option;\n");
|
f.write(" use option::Option;\n");
|
||||||
f.write(" use option::{Some, None};\n");
|
f.write(" use option::{Some, None};\n");
|
||||||
f.write(" use slice::ImmutableVector;\n");
|
f.write(" use slice::ImmutableVector;\n");
|
||||||
@ -401,8 +401,8 @@ def emit_core_decomp_module(f, canon, compat):
|
|||||||
|
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def emit_std_decomp_module(f, combine):
|
def emit_std_norm_module(f, combine):
|
||||||
f.write("pub mod decompose {\n");
|
f.write("pub mod normalization {\n");
|
||||||
f.write(" use option::{Some, None};\n");
|
f.write(" use option::{Some, None};\n");
|
||||||
f.write(" use slice::ImmutableVector;\n");
|
f.write(" use slice::ImmutableVector;\n");
|
||||||
|
|
||||||
@ -467,7 +467,7 @@ def gen_core_unicode():
|
|||||||
emit_bsearch_range_table(rf);
|
emit_bsearch_range_table(rf);
|
||||||
emit_property_module(rf, "general_category", gencats)
|
emit_property_module(rf, "general_category", gencats)
|
||||||
|
|
||||||
emit_core_decomp_module(rf, canon_decomp, compat_decomp)
|
emit_core_norm_module(rf, canon_decomp, compat_decomp)
|
||||||
|
|
||||||
derived = load_properties("DerivedCoreProperties.txt",
|
derived = load_properties("DerivedCoreProperties.txt",
|
||||||
["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"])
|
["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"])
|
||||||
@ -485,7 +485,7 @@ def gen_std_unicode():
|
|||||||
with open(r, "w") as rf:
|
with open(r, "w") as rf:
|
||||||
# Preamble
|
# Preamble
|
||||||
rf.write(preamble)
|
rf.write(preamble)
|
||||||
emit_std_decomp_module(rf, combines)
|
emit_std_norm_module(rf, combines)
|
||||||
|
|
||||||
gen_core_unicode()
|
gen_core_unicode()
|
||||||
gen_std_unicode()
|
gen_std_unicode()
|
||||||
|
@ -30,9 +30,9 @@ use iter::{Iterator, range_step};
|
|||||||
use unicode::{derived_property, property, general_category, conversions};
|
use unicode::{derived_property, property, general_category, conversions};
|
||||||
|
|
||||||
/// Returns the canonical decomposition of a character.
|
/// Returns the canonical decomposition of a character.
|
||||||
pub use unicode::decompose::decompose_canonical;
|
pub use unicode::normalization::decompose_canonical;
|
||||||
/// Returns the compatibility decomposition of a character.
|
/// Returns the compatibility decomposition of a character.
|
||||||
pub use unicode::decompose::decompose_compatible;
|
pub use unicode::normalization::decompose_compatible;
|
||||||
|
|
||||||
#[cfg(not(test))] use cmp::{Eq, Ord, TotalEq, TotalOrd, Ordering};
|
#[cfg(not(test))] use cmp::{Eq, Ord, TotalEq, TotalOrd, Ordering};
|
||||||
#[cfg(not(test))] use default::Default;
|
#[cfg(not(test))] use default::Default;
|
||||||
|
@ -104,7 +104,7 @@ pub mod general_category {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod decompose {
|
pub mod normalization {
|
||||||
use option::Option;
|
use option::Option;
|
||||||
use option::{Some, None};
|
use option::{Some, None};
|
||||||
use slice::ImmutableVector;
|
use slice::ImmutableVector;
|
||||||
|
@ -228,25 +228,25 @@ fn canonical_sort(comb: &mut [(char, u8)]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[deriving(Clone)]
|
#[deriving(Clone)]
|
||||||
enum NormalizationForm {
|
enum DecompositionType {
|
||||||
NFD,
|
Canonical,
|
||||||
NFKD
|
Compatible
|
||||||
}
|
}
|
||||||
|
|
||||||
/// External iterator for a string's normalization's characters.
|
/// External iterator for a string's decomposition's characters.
|
||||||
/// Use with the `std::iter` module.
|
/// Use with the `std::iter` module.
|
||||||
#[deriving(Clone)]
|
#[deriving(Clone)]
|
||||||
pub struct Normalizations<'a> {
|
pub struct Decompositions<'a> {
|
||||||
kind: NormalizationForm,
|
kind: DecompositionType,
|
||||||
iter: Chars<'a>,
|
iter: Chars<'a>,
|
||||||
buffer: Vec<(char, u8)>,
|
buffer: Vec<(char, u8)>,
|
||||||
sorted: bool
|
sorted: bool
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Iterator<char> for Normalizations<'a> {
|
impl<'a> Iterator<char> for Decompositions<'a> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn next(&mut self) -> Option<char> {
|
fn next(&mut self) -> Option<char> {
|
||||||
use unicode::decompose::canonical_combining_class;
|
use unicode::normalization::canonical_combining_class;
|
||||||
|
|
||||||
match self.buffer.as_slice().head() {
|
match self.buffer.as_slice().head() {
|
||||||
Some(&(c, 0)) => {
|
Some(&(c, 0)) => {
|
||||||
@ -262,8 +262,8 @@ impl<'a> Iterator<char> for Normalizations<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let decomposer = match self.kind {
|
let decomposer = match self.kind {
|
||||||
NFD => char::decompose_canonical,
|
Canonical => char::decompose_canonical,
|
||||||
NFKD => char::decompose_compatible
|
Compatible => char::decompose_compatible
|
||||||
};
|
};
|
||||||
|
|
||||||
if !self.sorted {
|
if !self.sorted {
|
||||||
@ -887,24 +887,24 @@ pub trait StrAllocating: Str {
|
|||||||
/// An Iterator over the string in Unicode Normalization Form D
|
/// An Iterator over the string in Unicode Normalization Form D
|
||||||
/// (canonical decomposition).
|
/// (canonical decomposition).
|
||||||
#[inline]
|
#[inline]
|
||||||
fn nfd_chars<'a>(&'a self) -> Normalizations<'a> {
|
fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
|
||||||
Normalizations {
|
Decompositions {
|
||||||
iter: self.as_slice().chars(),
|
iter: self.as_slice().chars(),
|
||||||
buffer: Vec::new(),
|
buffer: Vec::new(),
|
||||||
sorted: false,
|
sorted: false,
|
||||||
kind: NFD
|
kind: Canonical
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An Iterator over the string in Unicode Normalization Form KD
|
/// An Iterator over the string in Unicode Normalization Form KD
|
||||||
/// (compatibility decomposition).
|
/// (compatibility decomposition).
|
||||||
#[inline]
|
#[inline]
|
||||||
fn nfkd_chars<'a>(&'a self) -> Normalizations<'a> {
|
fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
|
||||||
Normalizations {
|
Decompositions {
|
||||||
iter: self.as_slice().chars(),
|
iter: self.as_slice().chars(),
|
||||||
buffer: Vec::new(),
|
buffer: Vec::new(),
|
||||||
sorted: false,
|
sorted: false,
|
||||||
kind: NFKD
|
kind: Compatible
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
|
|
||||||
#![allow(missing_doc, non_uppercase_statics)]
|
#![allow(missing_doc, non_uppercase_statics)]
|
||||||
|
|
||||||
pub mod decompose {
|
pub mod normalization {
|
||||||
use option::{Some, None};
|
use option::{Some, None};
|
||||||
use slice::ImmutableVector;
|
use slice::ImmutableVector;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user