Rollup merge of #128578 - camelid:cache-index-cleanup, r=notriddle

rustdoc: Cleanup `CacheBuilder` code for building search index

This code was very convoluted and hard to reason about. It is now (I hope) much
clearer and more suitable for both future enhancements and future cleanups.

I'm doing this as a precursor, with no UI changes, to changing rustdoc to
[ignore blanket impls][1] in type-based search.

[1]: https://github.com/rust-lang/rust/pull/128471#discussion_r1699475342

r? ``@notriddle``
This commit is contained in:
Matthias Krüger 2024-08-04 11:32:35 +02:00 committed by GitHub
commit 8c826923ae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 167 additions and 157 deletions

View File

@ -1677,13 +1677,16 @@ impl Type {
}
}
fn inner_def_id(&self, cache: Option<&Cache>) -> Option<DefId> {
/// Use this method to get the [DefId] of a [clean] AST node, including [PrimitiveType]s.
///
/// [clean]: crate::clean
pub(crate) fn def_id(&self, cache: &Cache) -> Option<DefId> {
let t: PrimitiveType = match *self {
Type::Path { ref path } => return Some(path.def_id()),
DynTrait(ref bounds, _) => return bounds.get(0).map(|b| b.trait_.def_id()),
Primitive(p) => return cache.and_then(|c| c.primitive_locations.get(&p).cloned()),
Primitive(p) => return cache.primitive_locations.get(&p).cloned(),
BorrowedRef { type_: box Generic(..), .. } => PrimitiveType::Reference,
BorrowedRef { ref type_, .. } => return type_.inner_def_id(cache),
BorrowedRef { ref type_, .. } => return type_.def_id(cache),
Tuple(ref tys) => {
if tys.is_empty() {
PrimitiveType::Unit
@ -1696,17 +1699,10 @@ impl Type {
Array(..) => PrimitiveType::Array,
Type::Pat(..) => PrimitiveType::Pat,
RawPointer(..) => PrimitiveType::RawPointer,
QPath(box QPathData { ref self_type, .. }) => return self_type.inner_def_id(cache),
QPath(box QPathData { ref self_type, .. }) => return self_type.def_id(cache),
Generic(_) | Infer | ImplTrait(_) => return None,
};
cache.and_then(|c| Primitive(t).def_id(c))
}
/// Use this method to get the [DefId] of a [clean] AST node, including [PrimitiveType]s.
///
/// [clean]: crate::clean
pub(crate) fn def_id(&self, cache: &Cache) -> Option<DefId> {
self.inner_def_id(Some(cache))
Primitive(t).def_id(cache)
}
}

View File

@ -260,153 +260,21 @@ impl<'a, 'tcx> DocFolder for CacheBuilder<'a, 'tcx> {
}
// Index this method for searching later on.
if let Some(s) = item.name.or_else(|| {
if item.is_stripped() {
None
} else if let clean::ImportItem(ref i) = *item.kind
&& let clean::ImportKind::Simple(s) = i.kind
{
Some(s)
} else {
None
}
}) {
let (parent, is_inherent_impl_item) = match *item.kind {
clean::StrippedItem(..) => ((None, None), false),
clean::AssocConstItem(..) | clean::AssocTypeItem(..)
if self
.cache
.parent_stack
.last()
.is_some_and(|parent| parent.is_trait_impl()) =>
let search_name = if !item.is_stripped() {
item.name.or_else(|| {
if let clean::ImportItem(ref i) = *item.kind
&& let clean::ImportKind::Simple(s) = i.kind
{
// skip associated items in trait impls
((None, None), false)
Some(s)
} else {
None
}
clean::TyMethodItem(..)
| clean::TyAssocConstItem(..)
| clean::TyAssocTypeItem(..)
| clean::StructFieldItem(..)
| clean::VariantItem(..) => (
(
Some(
self.cache
.parent_stack
.last()
.expect("parent_stack is empty")
.item_id()
.expect_def_id(),
),
Some(&self.cache.stack[..self.cache.stack.len() - 1]),
),
false,
),
clean::MethodItem(..) | clean::AssocConstItem(..) | clean::AssocTypeItem(..) => {
if self.cache.parent_stack.is_empty() {
((None, None), false)
} else {
let last = self.cache.parent_stack.last().expect("parent_stack is empty 2");
let did = match &*last {
ParentStackItem::Impl {
// impl Trait for &T { fn method(self); }
//
// When generating a function index with the above shape, we want it
// associated with `T`, not with the primitive reference type. It should
// show up as `T::method`, rather than `reference::method`, in the search
// results page.
for_: clean::Type::BorrowedRef { type_, .. },
..
} => type_.def_id(&self.cache),
ParentStackItem::Impl { for_, .. } => for_.def_id(&self.cache),
ParentStackItem::Type(item_id) => item_id.as_def_id(),
};
let path = did
.and_then(|did| self.cache.paths.get(&did))
// The current stack not necessarily has correlation
// for where the type was defined. On the other
// hand, `paths` always has the right
// information if present.
.map(|(fqp, _)| &fqp[..fqp.len() - 1]);
((did, path), true)
}
}
_ => ((None, Some(&*self.cache.stack)), false),
};
match parent {
(parent, Some(path)) if is_inherent_impl_item || !self.cache.stripped_mod => {
debug_assert!(!item.is_stripped());
// A crate has a module at its root, containing all items,
// which should not be indexed. The crate-item itself is
// inserted later on when serializing the search-index.
if item.item_id.as_def_id().is_some_and(|idx| !idx.is_crate_root())
&& let ty = item.type_()
&& (ty != ItemType::StructField
|| u16::from_str_radix(s.as_str(), 10).is_err())
{
let desc =
short_markdown_summary(&item.doc_value(), &item.link_names(self.cache));
// For searching purposes, a re-export is a duplicate if:
//
// - It's either an inline, or a true re-export
// - It's got the same name
// - Both of them have the same exact path
let defid = (match &*item.kind {
&clean::ItemKind::ImportItem(ref import) => import.source.did,
_ => None,
})
.or_else(|| item.item_id.as_def_id());
// In case this is a field from a tuple struct, we don't add it into
// the search index because its name is something like "0", which is
// not useful for rustdoc search.
self.cache.search_index.push(IndexItem {
ty,
defid,
name: s,
path: join_with_double_colon(path),
desc,
parent,
parent_idx: None,
exact_path: None,
impl_id: if let Some(ParentStackItem::Impl { item_id, .. }) =
self.cache.parent_stack.last()
{
item_id.as_def_id()
} else {
None
},
search_type: get_function_type_for_search(
&item,
self.tcx,
clean_impl_generics(self.cache.parent_stack.last()).as_ref(),
parent,
self.cache,
),
aliases: item.attrs.get_doc_aliases(),
deprecation: item.deprecation(self.tcx),
});
}
}
(Some(parent), None) if is_inherent_impl_item => {
// We have a parent, but we don't know where they're
// defined yet. Wait for later to index this item.
let impl_generics = clean_impl_generics(self.cache.parent_stack.last());
self.cache.orphan_impl_items.push(OrphanImplItem {
parent,
item: item.clone(),
impl_generics,
impl_id: if let Some(ParentStackItem::Impl { item_id, .. }) =
self.cache.parent_stack.last()
{
item_id.as_def_id()
} else {
None
},
});
}
_ => {}
}
})
} else {
None
};
if let Some(name) = search_name {
add_item_to_search_index(self.tcx, &mut self.cache, &item, name)
}
// Keep track of the fully qualified path for this item.
@ -572,6 +440,152 @@ impl<'a, 'tcx> DocFolder for CacheBuilder<'a, 'tcx> {
}
}
fn add_item_to_search_index(tcx: TyCtxt<'_>, cache: &mut Cache, item: &clean::Item, name: Symbol) {
// Item has a name, so it must also have a DefId (can't be an impl, let alone a blanket or auto impl).
let item_def_id = item.item_id.as_def_id().unwrap();
let (parent_did, parent_path) = match *item.kind {
clean::StrippedItem(..) => return,
clean::AssocConstItem(..) | clean::AssocTypeItem(..)
if cache.parent_stack.last().is_some_and(|parent| parent.is_trait_impl()) =>
{
// skip associated items in trait impls
return;
}
clean::TyMethodItem(..)
| clean::TyAssocConstItem(..)
| clean::TyAssocTypeItem(..)
| clean::StructFieldItem(..)
| clean::VariantItem(..) => {
// Don't index if containing module is stripped (i.e., private),
// or if item is tuple struct/variant field (name is a number -> not useful for search).
if cache.stripped_mod
|| item.type_() == ItemType::StructField
&& name.as_str().chars().all(|c| c.is_digit(10))
{
return;
}
let parent_did =
cache.parent_stack.last().expect("parent_stack is empty").item_id().expect_def_id();
let parent_path = &cache.stack[..cache.stack.len() - 1];
(Some(parent_did), parent_path)
}
clean::MethodItem(..) | clean::AssocConstItem(..) | clean::AssocTypeItem(..) => {
let last = cache.parent_stack.last().expect("parent_stack is empty 2");
let parent_did = match &*last {
// impl Trait for &T { fn method(self); }
//
// When generating a function index with the above shape, we want it
// associated with `T`, not with the primitive reference type. It should
// show up as `T::method`, rather than `reference::method`, in the search
// results page.
ParentStackItem::Impl { for_: clean::Type::BorrowedRef { type_, .. }, .. } => {
type_.def_id(&cache)
}
ParentStackItem::Impl { for_, .. } => for_.def_id(&cache),
ParentStackItem::Type(item_id) => item_id.as_def_id(),
};
let Some(parent_did) = parent_did else { return };
// The current stack reflects the CacheBuilder's recursive
// walk over HIR. For associated items, this is the module
// where the `impl` block is defined. That's an implementation
// detail that we don't want to affect the search engine.
//
// In particular, you can arrange things like this:
//
// #![crate_name="me"]
// mod private_mod {
// impl Clone for MyThing { fn clone(&self) -> MyThing { MyThing } }
// }
// pub struct MyThing;
//
// When that happens, we need to:
// - ignore the `cache.stripped_mod` flag, since the Clone impl is actually
// part of the public API even though it's defined in a private module
// - present the method as `me::MyThing::clone`, its publicly-visible path
// - deal with the fact that the recursive walk hasn't actually reached `MyThing`
// until it's already past `private_mod`, since that's first, and doesn't know
// yet if `MyThing` will actually be public or not (it could be re-exported)
//
// We accomplish the last two points by recording children of "orphan impls"
// in a field of the cache whose elements are added to the search index later,
// after cache building is complete (see `handle_orphan_impl_child`).
match cache.paths.get(&parent_did) {
Some((fqp, _)) => (Some(parent_did), &fqp[..fqp.len() - 1]),
None => {
handle_orphan_impl_child(cache, item, parent_did);
return;
}
}
}
_ => {
// Don't index if item is crate root, which is inserted later on when serializing the index.
// Don't index if containing module is stripped (i.e., private),
if item_def_id.is_crate_root() || cache.stripped_mod {
return;
}
(None, &*cache.stack)
}
};
debug_assert!(!item.is_stripped());
let desc = short_markdown_summary(&item.doc_value(), &item.link_names(cache));
// For searching purposes, a re-export is a duplicate if:
//
// - It's either an inline, or a true re-export
// - It's got the same name
// - Both of them have the same exact path
let defid = match &*item.kind {
clean::ItemKind::ImportItem(import) => import.source.did.unwrap_or(item_def_id),
_ => item_def_id,
};
let path = join_with_double_colon(parent_path);
let impl_id = if let Some(ParentStackItem::Impl { item_id, .. }) = cache.parent_stack.last() {
item_id.as_def_id()
} else {
None
};
let search_type = get_function_type_for_search(
&item,
tcx,
clean_impl_generics(cache.parent_stack.last()).as_ref(),
parent_did,
cache,
);
let aliases = item.attrs.get_doc_aliases();
let deprecation = item.deprecation(tcx);
let index_item = IndexItem {
ty: item.type_(),
defid: Some(defid),
name,
path,
desc,
parent: parent_did,
parent_idx: None,
exact_path: None,
impl_id,
search_type,
aliases,
deprecation,
};
cache.search_index.push(index_item);
}
/// We have a parent, but we don't know where they're
/// defined yet. Wait for later to index this item.
/// See [`Cache::orphan_impl_items`].
fn handle_orphan_impl_child(cache: &mut Cache, item: &clean::Item, parent_did: DefId) {
let impl_generics = clean_impl_generics(cache.parent_stack.last());
let impl_id = if let Some(ParentStackItem::Impl { item_id, .. }) = cache.parent_stack.last() {
item_id.as_def_id()
} else {
None
};
let orphan_item =
OrphanImplItem { parent: parent_did, item: item.clone(), impl_generics, impl_id };
cache.orphan_impl_items.push(orphan_item);
}
pub(crate) struct OrphanImplItem {
pub(crate) parent: DefId,
pub(crate) impl_id: Option<DefId>,