Preprocess intra-doc links consistently

Previously, rustdoc would panic on links to external crates if they were
surrounded by backticks.
This commit is contained in:
Joshua Nelson 2021-04-10 13:48:54 -04:00
parent 4029d4d0be
commit 47d1ed9690
5 changed files with 160 additions and 92 deletions

View File

@ -365,21 +365,20 @@ crate fn create_resolver<'a>(
} }
impl ast::visit::Visitor<'_> for IntraLinkCrateLoader { impl ast::visit::Visitor<'_> for IntraLinkCrateLoader {
fn visit_attribute(&mut self, attr: &ast::Attribute) { fn visit_attribute(&mut self, attr: &ast::Attribute) {
use crate::html::markdown::{markdown_links, MarkdownLink}; use crate::html::markdown::markdown_links;
use crate::passes::collect_intra_doc_links::Disambiguator; use crate::passes::collect_intra_doc_links::preprocess_link;
if let Some(doc) = attr.doc_str() { if let Some(doc) = attr.doc_str() {
for MarkdownLink { link, .. } in markdown_links(&doc.as_str()) { for link in markdown_links(&doc.as_str()) {
// FIXME: this misses a *lot* of the preprocessing done in collect_intra_doc_links let path_str = if let Some(Ok(x)) = preprocess_link(&link) {
// I think most of it shouldn't be necessary since we only need the crate prefix? x.path_str
let path_str = match Disambiguator::from_str(&link) { } else {
Ok(x) => x.map_or(link.as_str(), |(_, p)| p), continue;
Err(_) => continue,
}; };
self.resolver.borrow_mut().access(|resolver| { self.resolver.borrow_mut().access(|resolver| {
let _ = resolver.resolve_str_path_error( let _ = resolver.resolve_str_path_error(
attr.span, attr.span,
path_str, &path_str,
TypeNS, TypeNS,
self.current_mod, self.current_mod,
); );

View File

@ -68,7 +68,7 @@ impl<'a> From<ResolutionFailure<'a>> for ErrorKind<'a> {
} }
#[derive(Copy, Clone, Debug, Hash)] #[derive(Copy, Clone, Debug, Hash)]
enum Res { crate enum Res {
Def(DefKind, DefId), Def(DefKind, DefId),
Primitive(PrimitiveType), Primitive(PrimitiveType),
} }
@ -134,7 +134,7 @@ impl TryFrom<ResolveRes> for Res {
/// A link failed to resolve. /// A link failed to resolve.
#[derive(Debug)] #[derive(Debug)]
enum ResolutionFailure<'a> { crate enum ResolutionFailure<'a> {
/// This resolved, but with the wrong namespace. /// This resolved, but with the wrong namespace.
WrongNamespace { WrongNamespace {
/// What the link resolved to. /// What the link resolved to.
@ -172,7 +172,7 @@ enum ResolutionFailure<'a> {
} }
#[derive(Debug)] #[derive(Debug)]
enum MalformedGenerics { crate enum MalformedGenerics {
/// This link has unbalanced angle brackets. /// This link has unbalanced angle brackets.
/// ///
/// For example, `Vec<T` should trigger this, as should `Vec<T>>`. /// For example, `Vec<T` should trigger this, as should `Vec<T>>`.
@ -224,7 +224,7 @@ impl ResolutionFailure<'a> {
} }
} }
enum AnchorFailure { crate enum AnchorFailure {
/// User error: `[std#x#y]` is not valid /// User error: `[std#x#y]` is not valid
MultipleAnchors, MultipleAnchors,
/// The anchor provided by the user conflicts with Rustdoc's generated anchor. /// The anchor provided by the user conflicts with Rustdoc's generated anchor.
@ -892,6 +892,117 @@ impl<'a, 'tcx> DocFolder for LinkCollector<'a, 'tcx> {
} }
} }
crate enum PreprocessingError<'a> {
Anchor(AnchorFailure),
Disambiguator(Range<usize>, String),
Resolution(ResolutionFailure<'a>, String, Option<Disambiguator>),
}
impl From<AnchorFailure> for PreprocessingError<'_> {
fn from(err: AnchorFailure) -> Self {
Self::Anchor(err)
}
}
crate struct PreprocessingInfo {
crate path_str: String,
disambiguator: Option<Disambiguator>,
extra_fragment: Option<String>,
link_text: String,
}
/// Returns:
/// - `None` if the link should be ignored.
/// - `Some(Err)` if the link should emit an error
/// - `Some(Ok)` if the link is valid
///
/// `link_buffer` is needed for lifetime reasons; it will always be overwritten and the contents ignored.
crate fn preprocess_link<'a>(
ori_link: &'a MarkdownLink,
) -> Option<Result<PreprocessingInfo, PreprocessingError<'a>>> {
// [] is mostly likely not supposed to be a link
if ori_link.link.is_empty() {
return None;
}
// Bail early for real links.
if ori_link.link.contains('/') {
return None;
}
let stripped = ori_link.link.replace("`", "");
let mut parts = stripped.split('#');
let link = parts.next().unwrap();
if link.trim().is_empty() {
// This is an anchor to an element of the current page, nothing to do in here!
return None;
}
let extra_fragment = parts.next();
if parts.next().is_some() {
// A valid link can't have multiple #'s
return Some(Err(AnchorFailure::MultipleAnchors.into()));
}
// Parse and strip the disambiguator from the link, if present.
let (path_str, disambiguator) = match Disambiguator::from_str(&link) {
Ok(Some((d, path))) => (path.trim(), Some(d)),
Ok(None) => (link.trim(), None),
Err((err_msg, relative_range)) => {
// Only report error if we would not have ignored this link. See issue #83859.
if !should_ignore_link_with_disambiguators(link) {
let no_backticks_range = range_between_backticks(&ori_link);
let disambiguator_range = (no_backticks_range.start + relative_range.start)
..(no_backticks_range.start + relative_range.end);
return Some(Err(PreprocessingError::Disambiguator(disambiguator_range, err_msg)));
} else {
return None;
}
}
};
if should_ignore_link(path_str) {
return None;
}
// We stripped `()` and `!` when parsing the disambiguator.
// Add them back to be displayed, but not prefix disambiguators.
let link_text =
disambiguator.map(|d| d.display_for(path_str)).unwrap_or_else(|| path_str.to_owned());
// Strip generics from the path.
let path_str = if path_str.contains(['<', '>'].as_slice()) {
match strip_generics_from_path(&path_str) {
Ok(path) => path,
Err(err_kind) => {
debug!("link has malformed generics: {}", path_str);
return Some(Err(PreprocessingError::Resolution(
err_kind,
path_str.to_owned(),
disambiguator,
)));
}
}
} else {
path_str.to_owned()
};
// Sanity check to make sure we don't have any angle brackets after stripping generics.
assert!(!path_str.contains(['<', '>'].as_slice()));
// The link is not an intra-doc link if it still contains spaces after stripping generics.
if path_str.contains(' ') {
return None;
}
Some(Ok(PreprocessingInfo {
path_str,
disambiguator,
extra_fragment: extra_fragment.map(String::from),
link_text,
}))
}
impl LinkCollector<'_, '_> { impl LinkCollector<'_, '_> {
/// This is the entry point for resolving an intra-doc link. /// This is the entry point for resolving an intra-doc link.
/// ///
@ -907,16 +1018,6 @@ impl LinkCollector<'_, '_> {
) -> Option<ItemLink> { ) -> Option<ItemLink> {
trace!("considering link '{}'", ori_link.link); trace!("considering link '{}'", ori_link.link);
// Bail early for real links.
if ori_link.link.contains('/') {
return None;
}
// [] is mostly likely not supposed to be a link
if ori_link.link.is_empty() {
return None;
}
let diag_info = DiagnosticInfo { let diag_info = DiagnosticInfo {
item, item,
dox, dox,
@ -924,47 +1025,29 @@ impl LinkCollector<'_, '_> {
link_range: ori_link.range.clone(), link_range: ori_link.range.clone(),
}; };
let link = ori_link.link.replace("`", ""); let PreprocessingInfo { path_str, disambiguator, extra_fragment, link_text } =
let no_backticks_range = range_between_backticks(&ori_link); match preprocess_link(&ori_link)? {
let parts = link.split('#').collect::<Vec<_>>(); Ok(x) => x,
let (link, extra_fragment) = if parts.len() > 2 { Err(err) => {
// A valid link can't have multiple #'s match err {
anchor_failure(self.cx, diag_info, AnchorFailure::MultipleAnchors); PreprocessingError::Anchor(err) => anchor_failure(self.cx, diag_info, err),
return None; PreprocessingError::Disambiguator(range, msg) => {
} else if parts.len() == 2 { disambiguator_error(self.cx, diag_info, range, &msg)
if parts[0].trim().is_empty() { }
// This is an anchor to an element of the current page, nothing to do in here! PreprocessingError::Resolution(err, path_str, disambiguator) => {
return None; resolution_failure(
} self,
(parts[0], Some(parts[1].to_owned())) diag_info,
} else { &path_str,
(parts[0], None) disambiguator,
}; smallvec![err],
);
// Parse and strip the disambiguator from the link, if present. }
let (mut path_str, disambiguator) = match Disambiguator::from_str(&link) { }
Ok(Some((d, path))) => (path.trim(), Some(d)), return None;
Ok(None) => (link.trim(), None),
Err((err_msg, relative_range)) => {
if !should_ignore_link_with_disambiguators(link) {
// Only report error if we would not have ignored this link.
// See issue #83859.
let disambiguator_range = (no_backticks_range.start + relative_range.start)
..(no_backticks_range.start + relative_range.end);
disambiguator_error(self.cx, diag_info, disambiguator_range, &err_msg);
} }
return None; };
} let mut path_str = &*path_str;
};
if should_ignore_link(path_str) {
return None;
}
// We stripped `()` and `!` when parsing the disambiguator.
// Add them back to be displayed, but not prefix disambiguators.
let link_text =
disambiguator.map(|d| d.display_for(path_str)).unwrap_or_else(|| path_str.to_owned());
// In order to correctly resolve intra-doc links we need to // In order to correctly resolve intra-doc links we need to
// pick a base AST node to work from. If the documentation for // pick a base AST node to work from. If the documentation for
@ -1029,39 +1112,12 @@ impl LinkCollector<'_, '_> {
module_id = DefId { krate, index: CRATE_DEF_INDEX }; module_id = DefId { krate, index: CRATE_DEF_INDEX };
} }
// Strip generics from the path.
let stripped_path_string;
if path_str.contains(['<', '>'].as_slice()) {
stripped_path_string = match strip_generics_from_path(path_str) {
Ok(path) => path,
Err(err_kind) => {
debug!("link has malformed generics: {}", path_str);
resolution_failure(
self,
diag_info,
path_str,
disambiguator,
smallvec![err_kind],
);
return None;
}
};
path_str = &stripped_path_string;
}
// Sanity check to make sure we don't have any angle brackets after stripping generics.
assert!(!path_str.contains(['<', '>'].as_slice()));
// The link is not an intra-doc link if it still contains spaces after stripping generics.
if path_str.contains(' ') {
return None;
}
let (mut res, mut fragment) = self.resolve_with_disambiguator_cached( let (mut res, mut fragment) = self.resolve_with_disambiguator_cached(
ResolutionInfo { ResolutionInfo {
module_id, module_id,
dis: disambiguator, dis: disambiguator,
path_str: path_str.to_owned(), path_str: path_str.to_owned(),
extra_fragment, extra_fragment: extra_fragment.map(String::from),
}, },
diag_info.clone(), // this struct should really be Copy, but Range is not :( diag_info.clone(), // this struct should really be Copy, but Range is not :(
matches!(ori_link.kind, LinkType::Reference | LinkType::Shortcut), matches!(ori_link.kind, LinkType::Reference | LinkType::Shortcut),

View File

@ -0,0 +1 @@
// intentionally empty

View File

@ -0,0 +1 @@
// intentionally empty

View File

@ -1,8 +1,19 @@
// This test is just a little cursed.
// aux-build:issue-66159-1.rs // aux-build:issue-66159-1.rs
// aux-crate:priv:issue_66159_1=issue-66159-1.rs // aux-crate:priv:issue_66159_1=issue-66159-1.rs
// aux-build:empty.rs
// aux-crate:priv:empty=empty.rs
// aux-build:empty2.rs
// aux-crate:priv:empty2=empty2.rs
// build-aux-docs // build-aux-docs
// compile-flags:-Z unstable-options // compile-flags:-Z unstable-options --edition 2018
// @has extern_crate_only_used_in_link/index.html // @has extern_crate_only_used_in_link/index.html
// @has - '//a[@href="../issue_66159_1/struct.Something.html"]' 'issue_66159_1::Something' // @has - '//a[@href="../issue_66159_1/struct.Something.html"]' 'issue_66159_1::Something'
//! [issue_66159_1::Something] //! [issue_66159_1::Something]
// @has - '//a[@href="../empty/index.html"]' 'empty'
//! [`empty`]
// @has - '//a[@href="../empty2/index.html"]' 'empty2'
//! [empty2<x>]