snasphysicist/main.rs

## main.rs

use ra_syntax::{AstNode, File, SyntaxNodeRef, SyntaxKind};
use regex::Regex;
use rustc_lexer::{tokenize, Token, TokenKind};

/*
 * [dependencies]
 * ra_syntax = "0.1.0"
 * regex = "1.4.1"
 * rustc_lexer = "0.1.0"
 */

const RUST_SOURCE: &str =
    "/* block content */ \n\
     // non-documentation line content \n\
     /// outer documentation line content \n\
     fn main() {\n\
       //! inner documentation line content \n\
     }";

/// We wrap the three types of object we're interested in - two types of non-documentation comment
/// and everything else - in this enum to make storing them in a single vector & matching against
/// them easier
#[derive(Debug)]
enum TokenType {
    BlockComment(BlockComment),
    LineComment(LineComment),
    Other(Other)
}

impl TokenType {
    /// For use with `rustc`, converts a piece of text content and the location of the start of
    /// that content into a `TokenType`. Delegates to the `from_content` methods on the wrapped
    /// types.
    fn from_content(content: &str, location: usize) -> TokenType {
        match BlockComment::from_content(content, location) {
            Some(bc) => return TokenType::BlockComment(bc),
            None => ()
        }
        match LineComment::from_content(content, location) {
            Some(lc) => return TokenType::LineComment(lc),
            None => ()
        }
        TokenType::Other(Other::from_content(content, location))
    }
    /// For use with `rust_analyzer`, converts a syntax node into a `TokenType`. Delegates to the
    /// `from_node` methods on the wrapped types.
    fn from_node(node: SyntaxNodeRef) -> TokenType {
        match BlockComment::from_node(node) {
            Some(bc) => return TokenType::BlockComment(bc),
            None => ()
        }
        match LineComment::from_node(node) {
            Some(lc) => return TokenType::LineComment(lc),
            None => ()
        }
        TokenType::Other(Other::from_node(node))
    }
}

/// Probably no longer needed, could be useful. Basically all the things we're interested in know
/// how to print themselves as source code.
trait TextToken {
    fn as_source(&self) -> String;
}

/// Represents a developer block comment. Size is stored so we don't lose ordering information if
/// we want to print the source file back out. Note that the content is the content of the comment,
/// not including the comment markers (/* */).
#[derive(Debug)]
struct BlockComment {
    start: usize,
    content: String
}

impl BlockComment {
    /// For use with `rustc`, converts a piece of text content and the location of the start of
    /// the content to a `BlockComment`, if possible.
    fn from_content(content: &str, location: usize) -> Option<BlockComment> {
        let block_comment = Regex::new(r"^/\*(?P<content>.*)\*/$").unwrap();
        match block_comment.captures(content) {
            Some(c) => match c.get(1) {
                Some(m) => Some(BlockComment{ start: location, content: m.as_str().to_string() }),
                None => None
            }
            None => None
        }
    }
    /// For use with `rust_analyzer`, converts a syntax node into a `BlockComment` if possible.
    fn from_node(node: SyntaxNodeRef) -> Option<BlockComment> {
        match node.kind() {
            SyntaxKind::COMMENT => BlockComment::from_content(
                &node.text().to_string(), node.range().start().to_usize()),
            _ => None
        }
    }
}

impl TextToken for BlockComment {
    /// Print the block comment out as it would appear in the source file.
    fn as_source(&self) -> String {
        format!("/*{}*/", self.content)
    }
}

/// Represents a developer line comment. Size is stored so we don't lose ordering information if
/// we want to print the source file back out. Note that the content is the content of the comment,
/// not including the comment marker (//).
#[derive(Debug)]
struct LineComment {
    start: usize,
    content: String
}

impl LineComment {
    /// For use with `rustc`, converts a piece of text content and the location of the start of
    /// the content to a `LineComment`, if possible.
    fn from_content(content: &str, location: usize) -> Option<LineComment> {
        let line_comment = Regex::new(r"^//([^/].*)$").unwrap();
        match line_comment.captures(content) {
            Some(c) => match c.get(1) {
                Some(m) => Some(LineComment{ start: location, content: m.as_str().to_string() }),
                None => None
            }
            None => None
        }
    }
    /// For use with `rust_analyzer`, converts a syntax node into a `LineComment` if possible.
    fn from_node(node: SyntaxNodeRef) -> Option<LineComment> {
        match node.kind() {
            SyntaxKind::COMMENT => LineComment::from_content(
                &node.text().to_string(), node.range().start().to_usize()),
            _ => None
        }
    }
}

impl TextToken for LineComment {
    /// Print the line comment out as it would appear in the source file.
    fn as_source(&self) -> String {
        format!("//{}", self.content)
    }
}

/// For anything which is not a developer block comment or a developer line comment - we don't care
/// about the details because the goal here is only to spellcheck those. We store the full text for
/// that part of the source.
#[derive(Debug)]
struct Other {
    start: usize,
    content: String
}

impl Other {
    /// For use with `rustc`, converts a piece of text content and the location of the start of
    /// the content to an `Other`. Never fails, so try after `BlockComment`/`LineComment`.
    fn from_content(content: &str, location: usize) -> Other {
        Other{ start: location, content: content.to_string() }
    }
    /// For use with `rust_analyzer`, converts a syntax node to an `Other`.
    /// Never fails, so try after `BlockComment`/`LineComment`.
    fn from_node(node: SyntaxNodeRef) -> Other {
        Other{ start: node.range().start().to_usize(), content: node.text().to_string()}
    }
}
/// For stuff we aren't spell-checking here, we print the content straight out and don't have to
/// add any other content.
impl TextToken for Other {
    fn as_source(&self) -> String {
        format!("{}", self.content)
    }
}

/// Intermediate step when using `rustc`, that associates the location information to the text.
/// Probably not all fields in this struct are required.
struct TokenWithLocation {
    kind: TokenKind,
    start: usize,
    end: usize,
    content: String
}

fn main() {
    println!("\nRUSTC\n");
    let rustc = parse_with_rustc(RUST_SOURCE);
    print_tokens(&rustc);
    println!("\nRUST_ANALYZER\n");
    let rust_analyzer = parse_with_rust_analyzer(RUST_SOURCE);
    print_tokens(&rust_analyzer);
}

/// Prints a series of `TokenTypes` out, one per line, for debug.
fn print_tokens(tokens: &Vec<TokenType>) -> () {
    for t in tokens {
        println!("{:?}", t);
    }
}

/// Parse the provided source into `TokenTypes` using `rustc`.
fn parse_with_rustc(source: &str) -> Vec<TokenType> {
    let tokenized = tokenize_with_location(source);
    tokenized.into_iter()
      .map(|t| TokenType::from_content(&t.content, t.start))
      .collect()
}

/// Tokenize the provided source using `rustc` and extract the content/location for each token
fn tokenize_with_location(source: &str) -> Vec<TokenWithLocation> {
    let tokenized = tokenize(source);
    let mut location = 0;
    let mut tokens = vec!();
    for token in tokenized {
        tokens.push(TokenWithLocation{
            kind: token.kind,
            start: location,
            end: location + token.len,
            content: source[location..location + token.len].to_string()
        });
        location += token.len;
    }
    tokens
}

/// Parse the given source with `rust_analyzer` and convert into `TokenType`s
fn parse_with_rust_analyzer(source: &str) -> Vec<TokenType> {
    let parsed = File::parse(source);
    parsed.ast()
      .syntax()
      .descendants()
      .filter(|node| node.is_leaf() )
      .map(|node| TokenType::from_node(node))
      .collect()
}

	use ra_syntax::{AstNode, File, SyntaxNodeRef, SyntaxKind};
	use regex::Regex;
	use rustc_lexer::{tokenize, Token, TokenKind};

	/*
	* [dependencies]
	* ra_syntax = "0.1.0"
	* regex = "1.4.1"
	* rustc_lexer = "0.1.0"
	*/

	const RUST_SOURCE: &str =
	"/* block content */ \n\
	// non-documentation line content \n\
	/// outer documentation line content \n\
	fn main() {\n\
	//! inner documentation line content \n\
	}";

	/// We wrap the three types of object we're interested in - two types of non-documentation comment
	/// and everything else - in this enum to make storing them in a single vector & matching against
	/// them easier
	#[derive(Debug)]
	enum TokenType {
	BlockComment(BlockComment),
	LineComment(LineComment),
	Other(Other)
	}

	impl TokenType {
	/// For use with `rustc`, converts a piece of text content and the location of the start of
	/// that content into a `TokenType`. Delegates to the `from_content` methods on the wrapped
	/// types.
	fn from_content(content: &str, location: usize) -> TokenType {
	match BlockComment::from_content(content, location) {
	Some(bc) => return TokenType::BlockComment(bc),
	None => ()
	}
	match LineComment::from_content(content, location) {
	Some(lc) => return TokenType::LineComment(lc),
	None => ()
	}
	TokenType::Other(Other::from_content(content, location))
	}
	/// For use with `rust_analyzer`, converts a syntax node into a `TokenType`. Delegates to the
	/// `from_node` methods on the wrapped types.
	fn from_node(node: SyntaxNodeRef) -> TokenType {
	match BlockComment::from_node(node) {
	Some(bc) => return TokenType::BlockComment(bc),
	None => ()
	}
	match LineComment::from_node(node) {
	Some(lc) => return TokenType::LineComment(lc),
	None => ()
	}
	TokenType::Other(Other::from_node(node))
	}
	}

	/// Probably no longer needed, could be useful. Basically all the things we're interested in know
	/// how to print themselves as source code.
	trait TextToken {
	fn as_source(&self) -> String;
	}

	/// Represents a developer block comment. Size is stored so we don't lose ordering information if
	/// we want to print the source file back out. Note that the content is the content of the comment,
	/// not including the comment markers (/* */).
	#[derive(Debug)]
	struct BlockComment {
	start: usize,
	content: String
	}

	impl BlockComment {
	/// For use with `rustc`, converts a piece of text content and the location of the start of
	/// the content to a `BlockComment`, if possible.
	fn from_content(content: &str, location: usize) -> Option<BlockComment> {
	let block_comment = Regex::new(r"^/\(?P<content>.)\*/$").unwrap();
	match block_comment.captures(content) {
	Some(c) => match c.get(1) {
	Some(m) => Some(BlockComment{ start: location, content: m.as_str().to_string() }),
	None => None
	}
	None => None
	}
	}
	/// For use with `rust_analyzer`, converts a syntax node into a `BlockComment` if possible.
	fn from_node(node: SyntaxNodeRef) -> Option<BlockComment> {
	match node.kind() {
	SyntaxKind::COMMENT => BlockComment::from_content(
	&node.text().to_string(), node.range().start().to_usize()),
	_ => None
	}
	}
	}

	impl TextToken for BlockComment {
	/// Print the block comment out as it would appear in the source file.
	fn as_source(&self) -> String {
	format!("/{}/", self.content)
	}
	}

	/// Represents a developer line comment. Size is stored so we don't lose ordering information if
	/// we want to print the source file back out. Note that the content is the content of the comment,
	/// not including the comment marker (//).
	#[derive(Debug)]
	struct LineComment {
	start: usize,
	content: String
	}

	impl LineComment {
	/// For use with `rustc`, converts a piece of text content and the location of the start of
	/// the content to a `LineComment`, if possible.
	fn from_content(content: &str, location: usize) -> Option<LineComment> {
	let line_comment = Regex::new(r"^//([^/].*)$").unwrap();
	match line_comment.captures(content) {
	Some(c) => match c.get(1) {
	Some(m) => Some(LineComment{ start: location, content: m.as_str().to_string() }),
	None => None
	}
	None => None
	}
	}
	/// For use with `rust_analyzer`, converts a syntax node into a `LineComment` if possible.
	fn from_node(node: SyntaxNodeRef) -> Option<LineComment> {
	match node.kind() {
	SyntaxKind::COMMENT => LineComment::from_content(
	&node.text().to_string(), node.range().start().to_usize()),
	_ => None
	}
	}
	}

	impl TextToken for LineComment {
	/// Print the line comment out as it would appear in the source file.
	fn as_source(&self) -> String {
	format!("//{}", self.content)
	}
	}

	/// For anything which is not a developer block comment or a developer line comment - we don't care
	/// about the details because the goal here is only to spellcheck those. We store the full text for
	/// that part of the source.
	#[derive(Debug)]
	struct Other {
	start: usize,
	content: String
	}

	impl Other {
	/// For use with `rustc`, converts a piece of text content and the location of the start of
	/// the content to an `Other`. Never fails, so try after `BlockComment`/`LineComment`.
	fn from_content(content: &str, location: usize) -> Other {
	Other{ start: location, content: content.to_string() }
	}
	/// For use with `rust_analyzer`, converts a syntax node to an `Other`.
	/// Never fails, so try after `BlockComment`/`LineComment`.
	fn from_node(node: SyntaxNodeRef) -> Other {
	Other{ start: node.range().start().to_usize(), content: node.text().to_string()}
	}
	}
	/// For stuff we aren't spell-checking here, we print the content straight out and don't have to
	/// add any other content.
	impl TextToken for Other {
	fn as_source(&self) -> String {
	format!("{}", self.content)
	}
	}

	/// Intermediate step when using `rustc`, that associates the location information to the text.
	/// Probably not all fields in this struct are required.
	struct TokenWithLocation {
	kind: TokenKind,
	start: usize,
	end: usize,
	content: String
	}

	fn main() {
	println!("\nRUSTC\n");
	let rustc = parse_with_rustc(RUST_SOURCE);
	print_tokens(&rustc);
	println!("\nRUST_ANALYZER\n");
	let rust_analyzer = parse_with_rust_analyzer(RUST_SOURCE);
	print_tokens(&rust_analyzer);
	}

	/// Prints a series of `TokenTypes` out, one per line, for debug.
	fn print_tokens(tokens: &Vec<TokenType>) -> () {
	for t in tokens {
	println!("{:?}", t);
	}
	}

	/// Parse the provided source into `TokenTypes` using `rustc`.
	fn parse_with_rustc(source: &str) -> Vec<TokenType> {
	let tokenized = tokenize_with_location(source);
	tokenized.into_iter()
	.map(\|t\| TokenType::from_content(&t.content, t.start))
	.collect()
	}

	/// Tokenize the provided source using `rustc` and extract the content/location for each token
	fn tokenize_with_location(source: &str) -> Vec<TokenWithLocation> {
	let tokenized = tokenize(source);
	let mut location = 0;
	let mut tokens = vec!();
	for token in tokenized {
	tokens.push(TokenWithLocation{
	kind: token.kind,
	start: location,
	end: location + token.len,
	content: source[location..location + token.len].to_string()
	});
	location += token.len;
	}
	tokens
	}

	/// Parse the given source with `rust_analyzer` and convert into `TokenType`s
	fn parse_with_rust_analyzer(source: &str) -> Vec<TokenType> {
	let parsed = File::parse(source);
	parsed.ast()
	.syntax()
	.descendants()
	.filter(\|node\| node.is_leaf() )
	.map(\|node\| TokenType::from_node(node))
	.collect()
	}