auto merge of #15385 : jroweboy/rust/master, r=brson

This enables the docs search function to be more forgiving for spelling mistakes. The algorithm works as a dynamic programming algorithm to detect the minimum number of changes required to the search parameter string in order to match any string in the search index. If the number of changes is less then a threshold (currently defined as 3), then the search parameter will be included as it is a possible misspelling of the word. Any results returned by the algorithm are sorted by distance and are ranked lower than results that are partial or exact matches (aka the matches returned by the original search algorithm). Additionally, the increment in the for loops in this file were using one of three different ways to increment (`i += 1` `i++` and `++i`) so I just standardized it to `++i`.

As an example, consider searching for the word `String` and accidentally typing in `Strnig`. The old system would return no results because it is a misspelling, but the Levenshtein distance between these two inputs is only two, which means that this will return `String` as a result. Additionally, it will return a few other results such as `strong`, and `StdRng` because these are also similar to `Strnig`. Because of the ranking system though, this change should be unobtrusive to anyone that spells the words correctly, as those are still ranked first before any Levenshtein results.
This commit is contained in:
bors 2014-07-31 07:51:38 +00:00
commit 28ae6f5329

View File

@ -61,7 +61,7 @@
}
$('#' + from)[0].scrollIntoView();
$('.line-numbers span').removeClass('line-highlighted');
for (i = from; i <= to; i += 1) {
for (i = from; i <= to; ++i) {
$('#' + i).addClass('line-highlighted');
}
}
@ -102,7 +102,7 @@
stripped = '',
len = rootPath.match(/\.\.\//g).length + 1;
for (i = 0; i < len; i += 1) {
for (i = 0; i < len; ++i) {
match = url.match(/\/[^\/]*$/);
if (i < len - 1) {
stripped = match[0] + stripped;
@ -114,9 +114,47 @@
document.location.href = url;
});
/**
* A function to compute the Levenshtein distance between two strings
* Licensed under the Creative Commons Attribution-ShareAlike 3.0 Unported
* Full License can be found at http://creativecommons.org/licenses/by-sa/3.0/legalcode
* This code is an unmodified version of the code written by Marco de Wit
* and was found at http://stackoverflow.com/a/18514751/745719
*/
var levenshtein = (function() {
var row2 = [];
return function(s1, s2) {
if (s1 === s2) {
return 0;
} else {
var s1_len = s1.length, s2_len = s2.length;
if (s1_len && s2_len) {
var i1 = 0, i2 = 0, a, b, c, c2, row = row2;
while (i1 < s1_len)
row[i1] = ++i1;
while (i2 < s2_len) {
c2 = s2.charCodeAt(i2);
a = i2;
++i2;
b = i2;
for (i1 = 0; i1 < s1_len; ++i1) {
c = a + (s1.charCodeAt(i1) !== c2 ? 1 : 0);
a = row[i1];
b = b < a ? (b < c ? b + 1 : c) : (a < c ? a + 1 : c);
row[i1] = b;
}
}
return b;
} else {
return s1_len + s2_len;
}
}
};
})();
function initSearch(rawSearchIndex) {
var currentResults, index, searchIndex;
var MAX_LEV_DISTANCE = 3;
var params = getQueryStringParams();
// Populate search bar with query string search term when provided,
@ -143,7 +181,7 @@
split = valLower.split("::");
//remove empty keywords
for (var j = 0; j < split.length; j++) {
for (var j = 0; j < split.length; ++j) {
split[j].toLowerCase();
if (split[j] === "") {
split.splice(j, 1);
@ -156,7 +194,7 @@
val.charAt(val.length - 1) === val.charAt(0))
{
val = val.substr(1, val.length - 2);
for (var i = 0; i < nSearchWords; i += 1) {
for (var i = 0; i < nSearchWords; ++i) {
if (searchWords[i] === val) {
// filter type: ... queries
if (typeFilter < 0 || typeFilter === searchIndex[i].ty) {
@ -170,15 +208,31 @@
} else {
// gather matching search results up to a certain maximum
val = val.replace(/\_/g, "");
for (var i = 0; i < split.length; i++) {
for (var j = 0; j < nSearchWords; j += 1) {
for (var i = 0; i < split.length; ++i) {
for (var j = 0; j < nSearchWords; ++j) {
var lev_distance;
if (searchWords[j].indexOf(split[i]) > -1 ||
searchWords[j].indexOf(val) > -1 ||
searchWords[j].replace(/_/g, "").indexOf(val) > -1)
{
// filter type: ... queries
if (typeFilter < 0 || typeFilter === searchIndex[j].ty) {
results.push({id: j, index: searchWords[j].replace(/_/g, "").indexOf(val)});
results.push({
id: j,
index: searchWords[j].replace(/_/g, "").indexOf(val),
lev: 0,
});
}
} else if (
(lev_distance = levenshtein(searchWords[j], val)) <=
MAX_LEV_DISTANCE) {
if (typeFilter < 0 || typeFilter === searchIndex[j].ty) {
results.push({
id: j,
index: 0,
// we want lev results to go lower than others
lev: lev_distance,
});
}
}
if (results.length === max) {
@ -189,7 +243,7 @@
}
var nresults = results.length;
for (var i = 0; i < nresults; i += 1) {
for (var i = 0; i < nresults; ++i) {
results[i].word = searchWords[results[i].id];
results[i].item = searchIndex[results[i].id] || {};
}
@ -201,6 +255,12 @@
results.sort(function(aaa, bbb) {
var a, b;
// Sort by non levenshtein results and then levenshtein results by the distance
// (less changes required to match means higher rankings)
a = (aaa.lev);
b = (bbb.lev);
if (a !== b) return a - b;
// sort by crate (non-current crate goes later)
a = (aaa.item.crate !== window.currentCrate);
b = (bbb.item.crate !== window.currentCrate);
@ -258,7 +318,7 @@
results[i].id = -1;
}
}
for (var i = 0; i < results.length; i++) {
for (var i = 0; i < results.length; ++i) {
var result = results[i],
name = result.item.name.toLowerCase(),
path = result.item.path.toLowerCase(),
@ -288,38 +348,23 @@
* @return {[boolean]} [Whether the result is valid or not]
*/
function validateResult(name, path, keys, parent) {
//initially valid
var validate = true;
//if there is a parent, then validate against parent
if (parent !== undefined) {
for (var i = 0; i < keys.length; i++) {
// if previous keys are valid and current key is in the
// path, name or parent
if ((validate) &&
(name.toLowerCase().indexOf(keys[i]) > -1 ||
path.toLowerCase().indexOf(keys[i]) > -1 ||
parent.name.toLowerCase().indexOf(keys[i]) > -1))
{
validate = true;
} else {
validate = false;
}
}
} else {
for (var i = 0; i < keys.length; i++) {
// if previous keys are valid and current key is in the
// path, name
if ((validate) &&
(name.toLowerCase().indexOf(keys[i]) > -1 ||
path.toLowerCase().indexOf(keys[i]) > -1))
{
validate = true;
} else {
validate = false;
}
for (var i=0; i < keys.length; ++i) {
// each check is for validation so we negate the conditions and invalidate
if (!(
// check for an exact name match
name.toLowerCase().indexOf(keys[i]) > -1 ||
// then an exact path match
path.toLowerCase().indexOf(keys[i]) > -1 ||
// next if there is a parent, check for exact parent match
(parent !== undefined &&
parent.name.toLowerCase().indexOf(keys[i]) > -1) ||
// lastly check to see if the name was a levenshtein match
levenshtein(name.toLowerCase(), keys[i]) <=
MAX_LEV_DISTANCE)) {
return false;
}
}
return validate;
return true;
}
function getQuery() {
@ -499,7 +544,7 @@
resultIndex = execQuery(query, 20000, index);
len = resultIndex.length;
for (i = 0; i < len; i += 1) {
for (i = 0; i < len; ++i) {
if (resultIndex[i].id > -1) {
obj = searchIndex[resultIndex[i].id];
filterdata.push([obj.name, obj.ty, obj.path, obj.desc]);
@ -571,7 +616,7 @@
// faster analysis operations
var len = items.length;
var lastPath = "";
for (var i = 0; i < len; i += 1) {
for (var i = 0; i < len; ++i) {
var rawRow = items[i];
var row = {crate: crate, ty: rawRow[0], name: rawRow[1],
path: rawRow[2] || lastPath, desc: rawRow[3],
@ -643,7 +688,7 @@
crates.push(crate);
}
crates.sort();
for (var i = 0; i < crates.length; i++) {
for (var i = 0; i < crates.length; ++i) {
var klass = 'crate';
if (crates[i] == window.currentCrate) {
klass += ' current';
@ -660,10 +705,10 @@
window.register_implementors = function(imp) {
var list = $('#implementors-list');
var libs = Object.getOwnPropertyNames(imp);
for (var i = 0; i < libs.length; i++) {
for (var i = 0; i < libs.length; ++i) {
if (libs[i] == currentCrate) continue;
var structs = imp[libs[i]];
for (var j = 0; j < structs.length; j++) {
for (var j = 0; j < structs.length; ++j) {
var code = $('<code>').append(structs[j]);
$.each(code.find('a'), function(idx, a) {
var href = $(a).attr('href');