std, core: Generate unicode.rs using unicode.py

This commit is contained in:
Florian Zeitz 2014-05-12 19:56:41 +02:00 committed by Alex Crichton
parent 21867fa127
commit 74ad023674
3 changed files with 85 additions and 61 deletions

View File

@ -169,7 +169,7 @@ fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
else if hi < c { Less }
else { Greater }
}) != None
}\n\n
}\n
""");
def emit_property_module(f, mod, tbl):
@ -193,11 +193,11 @@ def emit_property_module(f, mod, tbl):
f.write(" pub fn %s(c: char) -> bool {\n" % cat)
f.write(" super::bsearch_range_table(c, %s_table)\n" % cat)
f.write(" }\n\n")
f.write("}\n")
f.write("}\n\n")
def emit_conversions_module(f, lowerupper, upperlower):
f.write("pub mod conversions {\n")
f.write("pub mod conversions {")
f.write("""
use cmp::{Equal, Less, Greater};
use slice::ImmutableVector;
@ -225,13 +225,14 @@ def emit_conversions_module(f, lowerupper, upperlower):
else { Greater }
})
}
""");
emit_caseconversion_table(f, "LuLl", upperlower)
emit_caseconversion_table(f, "LlLu", lowerupper)
f.write("}\n")
def emit_caseconversion_table(f, name, table):
f.write(" static %s_table : &'static [(char, char)] = &[\n" % name)
f.write(" static %s_table : &'static [(char, char)] = &[\n" % name)
sorted_table = sorted(table.iteritems(), key=operator.itemgetter(0))
ix = 0
for key, value in sorted_table:
@ -255,7 +256,7 @@ def format_table_content(f, content, indent):
line = " "*indent + chunk
f.write(line)
def emit_decomp_module(f, canon, compat, combine):
def emit_core_decomp_module(f, canon, compat):
canon_keys = canon.keys()
canon_keys.sort()
@ -279,23 +280,6 @@ def emit_decomp_module(f, canon, compat, combine):
}
None => None
}
}\n
""")
f.write("""
fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
use cmp::{Equal, Less, Greater};
match r.bsearch(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
else if hi < c { Less }
else { Greater }
}) {
Some(idx) => {
let (_, _, result) = r[idx];
result
}
None => 0
}
}\n\n
""")
@ -337,21 +321,10 @@ def emit_decomp_module(f, canon, compat, combine):
format_table_content(f, data, 8)
f.write("\n ];\n\n")
f.write(" static combining_class_table : &'static [(char, char, u8)] = &[\n")
ix = 0
for pair in combine:
f.write(ch_prefix(ix))
f.write("(%s, %s, %s)" % (escape_char(pair[0]), escape_char(pair[1]), pair[2]))
ix += 1
f.write("\n ];\n")
f.write(" pub fn canonical(c: char, i: |char|) "
+ "{ d(c, i, false); }\n\n")
f.write(" pub fn compatibility(c: char, i: |char|) "
+"{ d(c, i, true); }\n\n")
f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n"
+ " bsearch_range_value_table(c, combining_class_table)\n"
+ " }\n\n")
f.write(" fn d(c: char, i: |char|, k: bool) {\n")
f.write(" use iter::Iterator;\n");
@ -389,17 +362,43 @@ def emit_decomp_module(f, canon, compat, combine):
f.write(" }\n")
f.write("}\n\n")
r = "unicode.rs"
for i in [r]:
if os.path.exists(i):
os.remove(i);
rf = open(r, "w")
def emit_std_decomp_module(f, combine):
f.write("pub mod decompose {\n");
f.write(" use option::{Some, None};\n");
f.write(" use slice::ImmutableVector;\n");
(canon_decomp, compat_decomp, gencats,
combines, lowerupper, upperlower) = load_unicode_data("UnicodeData.txt")
f.write("""
fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
use cmp::{Equal, Less, Greater};
match r.bsearch(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
else if hi < c { Less }
else { Greater }
}) {
Some(idx) => {
let (_, _, result) = r[idx];
result
}
None => 0
}
}\n\n
""")
# Preamble
rf.write('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
f.write(" static combining_class_table : &'static [(char, char, u8)] = &[\n")
ix = 0
for pair in combine:
f.write(ch_prefix(ix))
f.write("(%s, %s, %s)" % (escape_char(pair[0]), escape_char(pair[1]), pair[2]))
ix += 1
f.write("\n ];\n\n")
f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n"
+ " bsearch_range_value_table(c, combining_class_table)\n"
+ " }\n")
f.write("}\n")
preamble = '''// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
@ -409,23 +408,45 @@ rf.write('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGH
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// The following code was generated by "src/etc/unicode.py"
// NOTE: The following code was generated by "src/etc/unicode.py", do not edit directly
#![allow(missing_doc)]
#![allow(non_uppercase_statics)]
#![allow(missing_doc, non_uppercase_statics)]
''')
'''
emit_bsearch_range_table(rf);
emit_property_module(rf, "general_category", gencats)
(canon_decomp, compat_decomp, gencats,
combines, lowerupper, upperlower) = load_unicode_data("UnicodeData.txt")
emit_decomp_module(rf, canon_decomp, compat_decomp, combines)
def gen_core_unicode():
r = "core_unicode.rs"
if os.path.exists(r):
os.remove(r);
with open(r, "w") as rf:
# Preamble
rf.write(preamble)
derived = load_properties("DerivedCoreProperties.txt",
["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"])
emit_bsearch_range_table(rf);
emit_property_module(rf, "general_category", gencats)
emit_property_module(rf, "derived_property", derived)
emit_core_decomp_module(rf, canon_decomp, compat_decomp)
props = load_properties("PropList.txt", ["White_Space"])
emit_property_module(rf, "property", props)
emit_conversions_module(rf, lowerupper, upperlower)
derived = load_properties("DerivedCoreProperties.txt",
["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"])
emit_property_module(rf, "derived_property", derived)
props = load_properties("PropList.txt", ["White_Space"])
emit_property_module(rf, "property", props)
emit_conversions_module(rf, lowerupper, upperlower)
def gen_std_unicode():
r = "std_unicode.rs"
if os.path.exists(r):
os.remove(r);
with open(r, "w") as rf:
# Preamble
rf.write(preamble)
emit_std_decomp_module(rf, combines)
gen_core_unicode()
gen_std_unicode()

View File

@ -1,4 +1,4 @@
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
@ -8,10 +8,11 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// The following code was generated by "src/etc/unicode.py"
// NOTE: The following code was generated by "src/etc/unicode.py", do not edit directly
#![allow(missing_doc, non_uppercase_statics)]
fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
use cmp::{Equal, Less, Greater};
use slice::ImmutableVector;
@ -102,6 +103,7 @@ pub mod general_category {
}
}
pub mod decompose {
use option::Option;
use option::{Some, None};
@ -123,7 +125,6 @@ pub mod decompose {
}
// Canonical decompositions
static canonical_table : &'static [(char, &'static [char])] = &[
('\xc0', &['\x41', '\u0300']), ('\xc1', &['\x41', '\u0301']), ('\xc2', &['\x41', '\u0302']),
@ -3968,6 +3969,7 @@ pub mod derived_property {
pub fn XID_Start(c: char) -> bool {
super::bsearch_range_table(c, XID_Start_table)
}
}
pub mod property {
@ -3983,6 +3985,7 @@ pub mod property {
pub fn White_Space(c: char) -> bool {
super::bsearch_range_table(c, White_Space_table)
}
}
pub mod conversions {
@ -4501,7 +4504,7 @@ pub mod conversions {
('\U00010426', '\U0001044e'), ('\U00010427', '\U0001044f')
];
static LlLu_table : &'static [(char, char)] = &[
static LlLu_table : &'static [(char, char)] = &[
('\x61', '\x41'), ('\x62', '\x42'),
('\x63', '\x43'), ('\x64', '\x44'),
('\x65', '\x45'), ('\x66', '\x46'),

View File

@ -1,4 +1,4 @@
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
@ -8,7 +8,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// The following code was generated by "src/etc/unicode.py"
// NOTE: The following code was generated by "src/etc/unicode.py", do not edit directly
#![allow(missing_doc, non_uppercase_statics)]