rojas-diego/document.rs

## document.rs
use ropey::{Rope, RopeSlice};
use serde::{Deserialize, Serialize};
use std::fmt;
use thiserror::Error;
use tower_lsp::lsp_types::{Position, TextDocumentContentChangeEvent};
use tree_sitter::{InputEdit, Parser, Point, Tree};

pub struct TextDocument {
    pub rope: Rope,
    pub tree: Option<Tree>,
    pub language_id: LanguageId,
    parser: Parser,
}

#[derive(Error, Debug)]
pub enum DocumentError {
    #[error("position {0}:{1} is out of bounds")]
    PositionOutOfBounds(u32, u32),
}

#[derive(Clone, Debug, Copy)]
/// We redeclare this enum here because the `lsp_types` crate exports a Cow
/// type that is unconvenient to deal with.
pub enum PositionEncodingKind {
    #[allow(dead_code)]
    UTF8,
    UTF16,
    #[allow(dead_code)]
    UTF32,
}

impl TextDocument {
    /// Creates a new document from the given text and language id. It creates
    /// a rope, parser and syntax tree from the text.
    pub fn new(language_id: LanguageId, text: &str) -> Self {
        let rope = Rope::from_str(text);
        let mut parser = Parser::new();

        let language = match language_id {
            LanguageId::Bash => tree_sitter_bash::language(),
            LanguageId::C => tree_sitter_c::language(),
            LanguageId::Cpp => tree_sitter_cpp::language(),
            LanguageId::CSharp => tree_sitter_c_sharp::language(),
            LanguageId::Elixir => tree_sitter_elixir::language(),
            LanguageId::Erlang => tree_sitter_erlang::language(),
            LanguageId::Go => tree_sitter_go::language(),
            LanguageId::Html => tree_sitter_html::language(),
            LanguageId::Java => tree_sitter_java::language(),
            LanguageId::JavaScript => tree_sitter_javascript::language(),
            LanguageId::JavaScriptReact => tree_sitter_javascript::language(),
            LanguageId::Json => tree_sitter_json::language(),
            LanguageId::Lua => tree_sitter_lua::language(),
            LanguageId::Markdown => tree_sitter_md::language(),
            LanguageId::ObjectiveC => tree_sitter_objc::language(),
            LanguageId::Python => tree_sitter_python::language(),
            LanguageId::R => tree_sitter_r::language(),
            LanguageId::Ruby => tree_sitter_ruby::language(),
            LanguageId::Rust => tree_sitter_rust::language(),
            LanguageId::Scala => tree_sitter_scala::language(),
            LanguageId::Swift => tree_sitter_swift::language(),
            LanguageId::TypeScript => tree_sitter_typescript::language_typescript(),
            LanguageId::TypeScriptReact => tree_sitter_typescript::language_tsx(),
            LanguageId::Unknown => {
                return Self {
                    rope,
                    tree: None,
                    language_id,
                    parser,
                };
            }
        };

        parser
            .set_language(language)
            .expect("set parser language should always succeed");

        let tree = parser
            .parse(text, None)
            .expect("parse should always return a tree when the language was set and no timeout was specified");

        Self {
            rope,
            tree: Some(tree),
            language_id,
            parser,
        }
    }

    /// Apply a change to the document.
    pub fn apply_content_change(
        &mut self,
        change: &TextDocumentContentChangeEvent,
        position_encoding: PositionEncodingKind,
    ) -> Result<(), DocumentError> {
        match change.range {
            Some(range) => {
                assert!(
                    range.start.line < range.end.line
                        || (range.start.line == range.end.line
                            && range.start.character <= range.end.character)
                );

                let same_line = range.start.line == range.end.line;
                let same_character = range.start.character == range.end.character;

                let change_start_line_cu_idx = range.start.character as usize;
                let change_end_line_cu_idx = range.end.character as usize;

                // 1. Get the line at which the change starts.
                let change_start_line_idx = range.start.line as usize;
                let change_start_line = match self.rope.get_line(change_start_line_idx) {
                    Some(line) => line,
                    None => {
                        return Err(DocumentError::PositionOutOfBounds(
                            range.start.line,
                            range.start.character,
                        ))
                    }
                };

                // 2. Get the line at which the change ends. (Small optimization
                // where we first check whether start and end line are the
                // same O(log N) lookup. We repeat this all throughout this
                // function).
                let change_end_line_idx = range.end.line as usize;
                let change_end_line = match same_line {
                    true => change_start_line,
                    false => match self.rope.get_line(change_end_line_idx) {
                        Some(line) => line,
                        None => {
                            return Err(DocumentError::PositionOutOfBounds(
                                range.end.line,
                                range.end.character,
                            ));
                        }
                    },
                };

                fn compute_char_idx(
                    position_encoding: PositionEncodingKind,
                    position: &Position,
                    slice: &RopeSlice,
                ) -> Result<usize, DocumentError> {
                    match position_encoding {
                        PositionEncodingKind::UTF8 => {
                            slice.try_byte_to_char(position.character as usize)
                        }
                        PositionEncodingKind::UTF16 => {
                            slice.try_utf16_cu_to_char(position.character as usize)
                        }
                        PositionEncodingKind::UTF32 => Ok(position.character as usize),
                    }
                    .map_err(|_| {
                        DocumentError::PositionOutOfBounds(position.line, position.character)
                    })
                }

                // 3. Compute the character offset into the start/end line where
                // the change starts/ends.
                let change_start_line_char_idx =
                    compute_char_idx(position_encoding, &range.start, &change_start_line)?;
                let change_end_line_char_idx = match same_line && same_character {
                    true => change_start_line_char_idx,
                    false => compute_char_idx(position_encoding, &range.end, &change_end_line)?,
                };

                // 4. Compute the character and byte offset into the document
                // where the change starts/ends.
                let change_start_doc_char_idx =
                    self.rope.line_to_char(change_start_line_idx) + change_start_line_char_idx;
                let change_end_doc_char_idx = match same_line && same_character {
                    true => change_start_doc_char_idx,
                    false => self.rope.line_to_char(change_end_line_idx) + change_end_line_char_idx,
                };
                let change_start_doc_byte_idx = self.rope.char_to_byte(change_start_doc_char_idx);
                let change_end_doc_byte_idx = match same_line && same_character {
                    true => change_start_doc_byte_idx,
                    false => self.rope.char_to_byte(change_end_doc_char_idx),
                };

                // 5. Compute the byte offset into the start/end line where the
                // change starts/ends. Required for tree-sitter.
                let change_start_line_byte_idx = match position_encoding {
                    PositionEncodingKind::UTF8 => change_start_line_cu_idx,
                    PositionEncodingKind::UTF16 => {
                        change_start_line.char_to_utf16_cu(change_start_line_char_idx)
                    }
                    PositionEncodingKind::UTF32 => change_start_line_char_idx,
                };
                let change_end_line_byte_idx = match same_line && same_character {
                    true => change_start_line_byte_idx,
                    false => match position_encoding {
                        PositionEncodingKind::UTF8 => change_end_line_cu_idx,
                        PositionEncodingKind::UTF16 => {
                            change_end_line.char_to_utf16_cu(change_end_line_char_idx)
                        }
                        PositionEncodingKind::UTF32 => change_end_line_char_idx,
                    },
                };

                self.rope
                    .remove(change_start_doc_char_idx..change_end_doc_char_idx);
                self.rope.insert(change_start_doc_char_idx, &change.text);

                if let Some(tree) = &mut self.tree {
                    // 6. Compute the byte index into the new end line where the
                    // change ends. Required for tree-sitter.
                    let change_new_end_line_idx = self
                        .rope
                        .byte_to_line(change_start_doc_byte_idx + change.text.len());
                    let change_new_end_line_byte_idx =
                        change_start_doc_byte_idx + change.text.len();

                    // 7. Construct the tree-sitter edit. We stay mindful that
                    // tree-sitter Point::column is a byte offset.
                    let edit = InputEdit {
                        start_byte: change_start_doc_byte_idx,
                        old_end_byte: change_end_doc_byte_idx,
                        new_end_byte: change_start_doc_byte_idx + change.text.len(),
                        start_position: Point {
                            row: change_start_line_idx,
                            column: change_start_line_byte_idx,
                        },
                        old_end_position: Point {
                            row: change_end_line_idx,
                            column: change_end_line_byte_idx,
                        },
                        new_end_position: Point {
                            row: change_new_end_line_idx,
                            column: change_new_end_line_byte_idx,
                        },
                    };

                    tree.edit(&edit);

                    self.tree = Some(self
                                .parser
                                .parse(self.rope.to_string(), Some(tree))
                                .expect("parse should always return a tree when the language was set and no timeout was specified"));
                }

                return Ok(());
            }
            None => {
                self.rope = Rope::from_str(&change.text);
                self.tree = self.parser.parse(&change.text, None);

                return Ok(());
            }
        }
    }
}

#[derive(Clone, Copy, Serialize, Deserialize)]
pub enum LanguageId {
    Bash,
    C,
    Cpp,
    CSharp,
    Elixir,
    Erlang,
    Go,
    Html,
    Java,
    JavaScript,
    JavaScriptReact,
    Json,
    Lua,
    Markdown,
    ObjectiveC,
    Python,
    R,
    Ruby,
    Rust,
    Scala,
    Swift,
    TypeScript,
    TypeScriptReact,
    Unknown,
}

impl fmt::Display for LanguageId {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Self::Bash => write!(f, "shellscript"),
            Self::C => write!(f, "c"),
            Self::Cpp => write!(f, "cpp"),
            Self::CSharp => write!(f, "csharp"),
            Self::Elixir => write!(f, "elixir"),
            Self::Erlang => write!(f, "erlang"),
            Self::Go => write!(f, "go"),
            Self::Html => write!(f, "html"),
            Self::Java => write!(f, "java"),
            Self::JavaScript => write!(f, "javascript"),
            Self::JavaScriptReact => write!(f, "javascriptreact"),
            Self::Json => write!(f, "json"),
            Self::Lua => write!(f, "lua"),
            Self::Markdown => write!(f, "markdown"),
            Self::ObjectiveC => write!(f, "objective-c"),
            Self::Python => write!(f, "python"),
            Self::R => write!(f, "r"),
            Self::Ruby => write!(f, "ruby"),
            Self::Rust => write!(f, "rust"),
            Self::Scala => write!(f, "scala"),
            Self::Swift => write!(f, "swift"),
            Self::TypeScript => write!(f, "typescript"),
            Self::TypeScriptReact => write!(f, "typescriptreact"),
            Self::Unknown => write!(f, "unknown"),
        }
    }
}

pub struct LanguageIdError {
    language_id: String,
}

impl fmt::Display for LanguageIdError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "invalid language id: {}", self.language_id)
    }
}

impl From<&str> for LanguageId {
    fn from(value: &str) -> Self {
        match value {
            "c" => Self::C,
            "cpp" => Self::Cpp,
            "csharp" => Self::CSharp,
            "elixir" => Self::Elixir,
            "erlang" => Self::Erlang,
            "go" => Self::Go,
            "html" => Self::Html,
            "java" => Self::Java,
            "javascript" => Self::JavaScript,
            "javascriptreact" => Self::JavaScriptReact,
            "json" => Self::Json,
            "lua" => Self::Lua,
            "markdown" => Self::Markdown,
            "objective-c" => Self::ObjectiveC,
            "python" => Self::Python,
            "r" => Self::R,
            "ruby" => Self::Ruby,
            "rust" => Self::Rust,
            "scala" => Self::Scala,
            "shellscript" => Self::Bash,
            "swift" => Self::Swift,
            "typescript" => Self::TypeScript,
            "typescriptreact" => Self::TypeScriptReact,
            _ => Self::Unknown,
        }
    }
}

impl From<String> for LanguageId {
    fn from(value: String) -> Self {
        Self::from(value.as_str())
    }
}

#[cfg(test)]
mod test {
    use tower_lsp::lsp_types::{Position, Range};
    use tree_sitter::Node;

    use super::*;

    macro_rules! new_change {
        ($start_line:expr, $start_char:expr, $end_line:expr, $end_char:expr, $text:expr) => {
            &TextDocumentContentChangeEvent {
                range: Some(Range::new(
                    Position::new($start_line as u32, $start_char as u32),
                    Position::new($end_line as u32, $end_char as u32),
                )),
                range_length: None,
                text: $text.to_string(),
            }
        };
    }

    #[test]
    fn test_text_document_apply_content_change() {
        let mut rope = Rope::from_str("🤗 Hello 🤗\nABC 🇫🇷\n world!");
        let mut doc = TextDocument::new(LanguageId::Unknown, &rope.to_string());

        doc.apply_content_change(new_change!(0, 0, 0, 3, ""), PositionEncodingKind::UTF16)
            .unwrap();
        rope = Rope::from_str("Hello 🤗\nABC 🇫🇷\n world!");
        assert_eq!(doc.rope.to_string(), rope.to_string());

        doc.apply_content_change(
            new_change!(1, 4 + "🇫🇷".len(), 1, 4 + "🇫🇷".len(), " DEF"),
            PositionEncodingKind::UTF8,
        )
        .unwrap();
        rope = Rope::from_str("Hello 🤗\nABC 🇫🇷 DEF\n world!");
        assert_eq!(doc.rope.to_string(), rope.to_string());

        doc.apply_content_change(
            new_change!(1, 0, 1, 4 + "🇫🇷".chars().count() + 4, ""),
            PositionEncodingKind::UTF32,
        )
        .unwrap();
        rope = Rope::from_str("Hello 🤗\n\n world!");
        assert_eq!(doc.rope.to_string(), rope.to_string());

        doc.apply_content_change(new_change!(1, 0, 1, 1, ""), PositionEncodingKind::UTF16)
            .unwrap();
        rope = Rope::from_str("Hello 🤗\n world!");
        assert_eq!(doc.rope.to_string(), rope.to_string());

        doc.apply_content_change(new_change!(0, 5, 1, 1, "，"), PositionEncodingKind::UTF16)
            .unwrap();
        rope = Rope::from_str("Hello，world!");
        assert_eq!(doc.rope.to_string(), rope.to_string());

        doc.apply_content_change(
            new_change!(0, 0, 0, rope.len_utf16_cu(), ""),
            PositionEncodingKind::UTF16,
        )
        .unwrap();
        assert_eq!(doc.rope.to_string(), "");
    }

    #[test]
    fn test_text_document_apply_content_change_no_range() {
        let mut rope = Rope::from_str(
            "let a = '🥸 你好';\rfunction helloWorld() { return '🤲🏿'; }\nlet b = 'Hi, 😊';",
        );
        let mut doc = TextDocument::new(LanguageId::JavaScript, &rope.to_string());
        let mut parser = Parser::new();

        parser
            .set_language(tree_sitter_javascript::language())
            .unwrap();

        assert!(doc.apply_content_change(
            &TextDocumentContentChangeEvent {
                range: None,
                range_length: None,
                text: "let a = '🥸 你好';\rfunction helloWorld() { return '🤲🏿'; }\nlet b = 'Hi, 😊';".to_owned(),
            },
            PositionEncodingKind::UTF16,
        ).is_ok());
        assert_eq!(doc.rope.to_string(), rope.to_string());

        let tree = parser.parse(&rope.to_string(), None).unwrap();

        assert!(nodes_are_equal_recursive(
            &doc.tree.as_ref().unwrap().root_node(),
            &tree.root_node()
        ));

        assert!(doc
            .apply_content_change(
                &TextDocumentContentChangeEvent {
                    range: None,
                    range_length: None,
                    text: "let a = '🥸 你好，😊';".to_owned(),
                },
                PositionEncodingKind::UTF16,
            )
            .is_ok());
        rope = Rope::from_str("let a = '🥸 你好，😊';");
        assert_eq!(doc.rope.to_string(), rope.to_string());

        let tree = parser.parse(&rope.to_string(), None).unwrap();

        assert!(nodes_are_equal_recursive(
            &doc.tree.as_ref().unwrap().root_node(),
            &tree.root_node()
        ));
    }

    #[test]
    fn test_text_document_apply_content_change_bounds() {
        let rope = Rope::from_str("");
        let mut doc = TextDocument::new(LanguageId::Unknown, &rope.to_string());

        assert!(doc
            .apply_content_change(new_change!(0, 0, 0, 1, ""), PositionEncodingKind::UTF16)
            .is_err());

        assert!(doc
            .apply_content_change(new_change!(1, 0, 1, 0, ""), PositionEncodingKind::UTF16)
            .is_err());

        assert!(doc
            .apply_content_change(new_change!(0, 0, 0, 0, "🤗"), PositionEncodingKind::UTF16)
            .is_ok());
        let rope = Rope::from_str("🤗");
        assert_eq!(doc.rope.to_string(), rope.to_string());

        assert!(doc
            .apply_content_change(
                new_change!(0, rope.len_utf16_cu(), 0, rope.len_utf16_cu(), "\r\n"),
                PositionEncodingKind::UTF16
            )
            .is_ok());
        let rope = Rope::from_str("🤗\r\n");
        assert_eq!(doc.rope.to_string(), rope.to_string());

        assert!(doc
            .apply_content_change(
                new_change!(0, '🤗'.len_utf16(), 0, '🤗'.len_utf16(), "\n"),
                PositionEncodingKind::UTF16
            )
            .is_ok());
        let rope = Rope::from_str("🤗\n\r\n");
        assert_eq!(doc.rope.to_string(), rope.to_string());

        assert!(doc
            .apply_content_change(
                new_change!(0, '🤗'.len_utf16(), 2, 0, ""),
                PositionEncodingKind::UTF16
            )
            .is_ok());
        let rope = Rope::from_str("🤗");
        assert_eq!(doc.rope.to_string(), rope.to_string());
    }

    #[test]
    // Ensure that the three stays consistent across updates.
    fn test_document_update_tree_consistency_easy() {
        let a = "let a = '你好';\rlet b = 'Hi, 😊';";

        let mut document = TextDocument::new(LanguageId::JavaScript, a);

        document
            .apply_content_change(new_change!(0, 9, 0, 11, "𐐀"), PositionEncodingKind::UTF16)
            .unwrap();

        let b = "let a = '𐐀';\rlet b = 'Hi, 😊';";

        assert_eq!(document.rope.to_string(), b);

        let mut parser = Parser::new();

        parser
            .set_language(tree_sitter_javascript::language())
            .unwrap();

        let b_tree = parser.parse(b, None).unwrap();

        assert!(nodes_are_equal_recursive(
            &document.tree.unwrap().root_node(),
            &b_tree.root_node()
        ));
    }

    #[test]
    fn test_document_update_tree_consistency_medium() {
        let a = "let a = '🥸 你好';\rfunction helloWorld() { return '🤲🏿'; }\nlet b = 'Hi, 😊';";

        let mut document = TextDocument::new(LanguageId::JavaScript, a);

        document
            .apply_content_change(new_change!(0, 14, 2, 13, "，"), PositionEncodingKind::UTF16)
            .unwrap();

        let b = "let a = '🥸 你好，😊';";

        assert_eq!(document.rope.to_string(), b);

        let mut parser = Parser::new();

        parser
            .set_language(tree_sitter_javascript::language())
            .unwrap();

        let b_tree = parser.parse(b, None).unwrap();

        assert!(nodes_are_equal_recursive(
            &document.tree.unwrap().root_node(),
            &b_tree.root_node()
        ));
    }

    #[test]
    /// I wrote this test because I was unsure whether tree-sitter's Point
    /// struct represents a byte or character offset. Turns out it's a byte
    /// offset.
    fn test_tree_sitter_point() {
        let mut parser = Parser::new();

        let language = tree_sitter_javascript::language();

        parser.set_language(language).unwrap();

        let source_code = "let a = \"你好\";\nlet x = 'Hi, 😊';";
        let parsed = parser.parse(source_code, None).unwrap();
        let root_node = parsed.root_node();

        // Expect the '🤗' to be in "program.lexical_declaration.variable_declarator.string.string_fragment"
        // and to take up 4 bytes.
        let mut node = root_node
            .descendant_for_point_range(
                tree_sitter::Point { row: 1, column: 9 },
                tree_sitter::Point { row: 1, column: 17 },
            )
            .unwrap();

        assert_eq!(node.kind(), "string_fragment");

        // Assert the ancestor chain
        node = node.parent().unwrap();
        assert_eq!(node.kind(), "string");
        node = node.parent().unwrap();
        assert_eq!(node.kind(), "variable_declarator");
        node = node.parent().unwrap();
        assert_eq!(node.kind(), "lexical_declaration");
        node = node.parent().unwrap();
        assert_eq!(node.kind(), "program");
        assert_eq!(node.parent(), None);

        // Conversely, we check that ropey effectively uses character offsets.
        let rope = Rope::from_str(source_code);

        let second_line = rope.line(1);
        assert_eq!(second_line.len_chars(), 16);
        assert_eq!(second_line.len_bytes(), 19);
        assert_eq!(second_line.chars().nth(13), Some('😊'));
    }

    #[test]
    /// I wrote this test to better understand ropey's handling of ranges.
    fn test_rope_remove_and_insert() {
        let source_code = "let x = 'Hi, 😊';".to_owned();
        let mut rope = Rope::from_str(&source_code);

        assert_eq!(rope.len_chars(), source_code.chars().count());

        rope.remove(0..source_code.chars().count());
        assert_eq!(rope.len_chars(), 0);

        rope.insert(0, &source_code);
        assert_eq!(rope.len_chars(), source_code.chars().count());

        rope.remove(0..source_code.chars().count() - 1);
        assert_eq!(rope.len_chars(), 1);

        rope.remove(0..1);
        assert_eq!(rope.len_chars(), 0);

        rope = Rope::from_str("");

        assert_eq!(rope.lines().nth(0), Some(rope.slice(0..0)));
    }

    fn nodes_are_equal_recursive(node1: &Node, node2: &Node) -> bool {
        if node1.kind() != node2.kind() {
            return false;
        }

        if node1.start_byte() != node2.start_byte() {
            return false;
        }

        if node1.end_byte() != node2.end_byte() {
            return false;
        }

        if node1.start_position() != node2.start_position() {
            return false;
        }

        if node1.end_position() != node2.end_position() {
            return false;
        }

        if node1.child_count() != node2.child_count() {
            return false;
        }

        for i in 0..node1.child_count() {
            let child1 = node1.child(i).unwrap();
            let child2 = node2.child(i).unwrap();

            if !nodes_are_equal_recursive(&child1, &child2) {
                return false;
            }
        }

        true
    }
}
	use ropey::{Rope, RopeSlice};
	use serde::{Deserialize, Serialize};
	use std::fmt;
	use thiserror::Error;
	use tower_lsp::lsp_types::{Position, TextDocumentContentChangeEvent};
	use tree_sitter::{InputEdit, Parser, Point, Tree};

	pub struct TextDocument {
	pub rope: Rope,
	pub tree: Option<Tree>,
	pub language_id: LanguageId,
	parser: Parser,
	}

	#[derive(Error, Debug)]
	pub enum DocumentError {
	#[error("position {0}:{1} is out of bounds")]
	PositionOutOfBounds(u32, u32),
	}

	#[derive(Clone, Debug, Copy)]
	/// We redeclare this enum here because the `lsp_types` crate exports a Cow
	/// type that is unconvenient to deal with.
	pub enum PositionEncodingKind {
	#[allow(dead_code)]
	UTF8,
	UTF16,
	#[allow(dead_code)]
	UTF32,
	}

	impl TextDocument {
	/// Creates a new document from the given text and language id. It creates
	/// a rope, parser and syntax tree from the text.
	pub fn new(language_id: LanguageId, text: &str) -> Self {
	let rope = Rope::from_str(text);
	let mut parser = Parser::new();

	let language = match language_id {
	LanguageId::Bash => tree_sitter_bash::language(),
	LanguageId::C => tree_sitter_c::language(),
	LanguageId::Cpp => tree_sitter_cpp::language(),
	LanguageId::CSharp => tree_sitter_c_sharp::language(),
	LanguageId::Elixir => tree_sitter_elixir::language(),
	LanguageId::Erlang => tree_sitter_erlang::language(),
	LanguageId::Go => tree_sitter_go::language(),
	LanguageId::Html => tree_sitter_html::language(),
	LanguageId::Java => tree_sitter_java::language(),
	LanguageId::JavaScript => tree_sitter_javascript::language(),
	LanguageId::JavaScriptReact => tree_sitter_javascript::language(),
	LanguageId::Json => tree_sitter_json::language(),
	LanguageId::Lua => tree_sitter_lua::language(),
	LanguageId::Markdown => tree_sitter_md::language(),
	LanguageId::ObjectiveC => tree_sitter_objc::language(),
	LanguageId::Python => tree_sitter_python::language(),
	LanguageId::R => tree_sitter_r::language(),
	LanguageId::Ruby => tree_sitter_ruby::language(),
	LanguageId::Rust => tree_sitter_rust::language(),
	LanguageId::Scala => tree_sitter_scala::language(),
	LanguageId::Swift => tree_sitter_swift::language(),
	LanguageId::TypeScript => tree_sitter_typescript::language_typescript(),
	LanguageId::TypeScriptReact => tree_sitter_typescript::language_tsx(),
	LanguageId::Unknown => {
	return Self {
	rope,
	tree: None,
	language_id,
	parser,
	};
	}
	};

	parser
	.set_language(language)
	.expect("set parser language should always succeed");

	let tree = parser
	.parse(text, None)
	.expect("parse should always return a tree when the language was set and no timeout was specified");

	Self {
	rope,
	tree: Some(tree),
	language_id,
	parser,
	}
	}

	/// Apply a change to the document.
	pub fn apply_content_change(
	&mut self,
	change: &TextDocumentContentChangeEvent,
	position_encoding: PositionEncodingKind,
	) -> Result<(), DocumentError> {
	match change.range {
	Some(range) => {
	assert!(
	range.start.line < range.end.line
	\|\| (range.start.line == range.end.line
	&& range.start.character <= range.end.character)
	);

	let same_line = range.start.line == range.end.line;
	let same_character = range.start.character == range.end.character;

	let change_start_line_cu_idx = range.start.character as usize;
	let change_end_line_cu_idx = range.end.character as usize;

	// 1. Get the line at which the change starts.
	let change_start_line_idx = range.start.line as usize;
	let change_start_line = match self.rope.get_line(change_start_line_idx) {
	Some(line) => line,
	None => {
	return Err(DocumentError::PositionOutOfBounds(
	range.start.line,
	range.start.character,
	))
	}
	};

	// 2. Get the line at which the change ends. (Small optimization
	// where we first check whether start and end line are the
	// same O(log N) lookup. We repeat this all throughout this
	// function).
	let change_end_line_idx = range.end.line as usize;
	let change_end_line = match same_line {
	true => change_start_line,
	false => match self.rope.get_line(change_end_line_idx) {
	Some(line) => line,
	None => {
	return Err(DocumentError::PositionOutOfBounds(
	range.end.line,
	range.end.character,
	));
	}
	},
	};

	fn compute_char_idx(
	position_encoding: PositionEncodingKind,
	position: &Position,
	slice: &RopeSlice,
	) -> Result<usize, DocumentError> {
	match position_encoding {
	PositionEncodingKind::UTF8 => {
	slice.try_byte_to_char(position.character as usize)
	}
	PositionEncodingKind::UTF16 => {
	slice.try_utf16_cu_to_char(position.character as usize)
	}
	PositionEncodingKind::UTF32 => Ok(position.character as usize),
	}
	.map_err(\|_\| {
	DocumentError::PositionOutOfBounds(position.line, position.character)
	})
	}

	// 3. Compute the character offset into the start/end line where
	// the change starts/ends.
	let change_start_line_char_idx =
	compute_char_idx(position_encoding, &range.start, &change_start_line)?;
	let change_end_line_char_idx = match same_line && same_character {
	true => change_start_line_char_idx,
	false => compute_char_idx(position_encoding, &range.end, &change_end_line)?,
	};

	// 4. Compute the character and byte offset into the document
	// where the change starts/ends.
	let change_start_doc_char_idx =
	self.rope.line_to_char(change_start_line_idx) + change_start_line_char_idx;
	let change_end_doc_char_idx = match same_line && same_character {
	true => change_start_doc_char_idx,
	false => self.rope.line_to_char(change_end_line_idx) + change_end_line_char_idx,
	};
	let change_start_doc_byte_idx = self.rope.char_to_byte(change_start_doc_char_idx);
	let change_end_doc_byte_idx = match same_line && same_character {
	true => change_start_doc_byte_idx,
	false => self.rope.char_to_byte(change_end_doc_char_idx),
	};

	// 5. Compute the byte offset into the start/end line where the
	// change starts/ends. Required for tree-sitter.
	let change_start_line_byte_idx = match position_encoding {
	PositionEncodingKind::UTF8 => change_start_line_cu_idx,
	PositionEncodingKind::UTF16 => {
	change_start_line.char_to_utf16_cu(change_start_line_char_idx)
	}
	PositionEncodingKind::UTF32 => change_start_line_char_idx,
	};
	let change_end_line_byte_idx = match same_line && same_character {
	true => change_start_line_byte_idx,
	false => match position_encoding {
	PositionEncodingKind::UTF8 => change_end_line_cu_idx,
	PositionEncodingKind::UTF16 => {
	change_end_line.char_to_utf16_cu(change_end_line_char_idx)
	}
	PositionEncodingKind::UTF32 => change_end_line_char_idx,
	},
	};

	self.rope
	.remove(change_start_doc_char_idx..change_end_doc_char_idx);
	self.rope.insert(change_start_doc_char_idx, &change.text);

	if let Some(tree) = &mut self.tree {
	// 6. Compute the byte index into the new end line where the
	// change ends. Required for tree-sitter.
	let change_new_end_line_idx = self
	.rope
	.byte_to_line(change_start_doc_byte_idx + change.text.len());
	let change_new_end_line_byte_idx =
	change_start_doc_byte_idx + change.text.len();

	// 7. Construct the tree-sitter edit. We stay mindful that
	// tree-sitter Point::column is a byte offset.
	let edit = InputEdit {
	start_byte: change_start_doc_byte_idx,
	old_end_byte: change_end_doc_byte_idx,
	new_end_byte: change_start_doc_byte_idx + change.text.len(),
	start_position: Point {
	row: change_start_line_idx,
	column: change_start_line_byte_idx,
	},
	old_end_position: Point {
	row: change_end_line_idx,
	column: change_end_line_byte_idx,
	},
	new_end_position: Point {
	row: change_new_end_line_idx,
	column: change_new_end_line_byte_idx,
	},
	};

	tree.edit(&edit);

	self.tree = Some(self
	.parser
	.parse(self.rope.to_string(), Some(tree))
	.expect("parse should always return a tree when the language was set and no timeout was specified"));
	}

	return Ok(());
	}
	None => {
	self.rope = Rope::from_str(&change.text);
	self.tree = self.parser.parse(&change.text, None);

	return Ok(());
	}
	}
	}
	}

	#[derive(Clone, Copy, Serialize, Deserialize)]
	pub enum LanguageId {
	Bash,
	C,
	Cpp,
	CSharp,
	Elixir,
	Erlang,
	Go,
	Html,
	Java,
	JavaScript,
	JavaScriptReact,
	Json,
	Lua,
	Markdown,
	ObjectiveC,
	Python,
	R,
	Ruby,
	Rust,
	Scala,
	Swift,
	TypeScript,
	TypeScriptReact,
	Unknown,
	}

	impl fmt::Display for LanguageId {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
	match self {
	Self::Bash => write!(f, "shellscript"),
	Self::C => write!(f, "c"),
	Self::Cpp => write!(f, "cpp"),
	Self::CSharp => write!(f, "csharp"),
	Self::Elixir => write!(f, "elixir"),
	Self::Erlang => write!(f, "erlang"),
	Self::Go => write!(f, "go"),
	Self::Html => write!(f, "html"),
	Self::Java => write!(f, "java"),
	Self::JavaScript => write!(f, "javascript"),
	Self::JavaScriptReact => write!(f, "javascriptreact"),
	Self::Json => write!(f, "json"),
	Self::Lua => write!(f, "lua"),
	Self::Markdown => write!(f, "markdown"),
	Self::ObjectiveC => write!(f, "objective-c"),
	Self::Python => write!(f, "python"),
	Self::R => write!(f, "r"),
	Self::Ruby => write!(f, "ruby"),
	Self::Rust => write!(f, "rust"),
	Self::Scala => write!(f, "scala"),
	Self::Swift => write!(f, "swift"),
	Self::TypeScript => write!(f, "typescript"),
	Self::TypeScriptReact => write!(f, "typescriptreact"),
	Self::Unknown => write!(f, "unknown"),
	}
	}
	}

	pub struct LanguageIdError {
	language_id: String,
	}

	impl fmt::Display for LanguageIdError {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
	write!(f, "invalid language id: {}", self.language_id)
	}
	}

	impl From<&str> for LanguageId {
	fn from(value: &str) -> Self {
	match value {
	"c" => Self::C,
	"cpp" => Self::Cpp,
	"csharp" => Self::CSharp,
	"elixir" => Self::Elixir,
	"erlang" => Self::Erlang,
	"go" => Self::Go,
	"html" => Self::Html,
	"java" => Self::Java,
	"javascript" => Self::JavaScript,
	"javascriptreact" => Self::JavaScriptReact,
	"json" => Self::Json,
	"lua" => Self::Lua,
	"markdown" => Self::Markdown,
	"objective-c" => Self::ObjectiveC,
	"python" => Self::Python,
	"r" => Self::R,
	"ruby" => Self::Ruby,
	"rust" => Self::Rust,
	"scala" => Self::Scala,
	"shellscript" => Self::Bash,
	"swift" => Self::Swift,
	"typescript" => Self::TypeScript,
	"typescriptreact" => Self::TypeScriptReact,
	_ => Self::Unknown,
	}
	}
	}

	impl From<String> for LanguageId {
	fn from(value: String) -> Self {
	Self::from(value.as_str())
	}
	}

	#[cfg(test)]
	mod test {
	use tower_lsp::lsp_types::{Position, Range};
	use tree_sitter::Node;

	use super::*;

	macro_rules! new_change {
	($start_line:expr, $start_char:expr, $end_line:expr, $end_char:expr, $text:expr) => {
	&TextDocumentContentChangeEvent {
	range: Some(Range::new(
	Position::new($start_line as u32, $start_char as u32),
	Position::new($end_line as u32, $end_char as u32),
	)),
	range_length: None,
	text: $text.to_string(),
	}
	};
	}

	#[test]
	fn test_text_document_apply_content_change() {
	let mut rope = Rope::from_str("🤗 Hello 🤗\nABC 🇫🇷\n world!");
	let mut doc = TextDocument::new(LanguageId::Unknown, &rope.to_string());

	doc.apply_content_change(new_change!(0, 0, 0, 3, ""), PositionEncodingKind::UTF16)
	.unwrap();
	rope = Rope::from_str("Hello 🤗\nABC 🇫🇷\n world!");
	assert_eq!(doc.rope.to_string(), rope.to_string());

	doc.apply_content_change(
	new_change!(1, 4 + "🇫🇷".len(), 1, 4 + "🇫🇷".len(), " DEF"),
	PositionEncodingKind::UTF8,
	)
	.unwrap();
	rope = Rope::from_str("Hello 🤗\nABC 🇫🇷 DEF\n world!");
	assert_eq!(doc.rope.to_string(), rope.to_string());

	doc.apply_content_change(
	new_change!(1, 0, 1, 4 + "🇫🇷".chars().count() + 4, ""),
	PositionEncodingKind::UTF32,
	)
	.unwrap();
	rope = Rope::from_str("Hello 🤗\n\n world!");
	assert_eq!(doc.rope.to_string(), rope.to_string());

	doc.apply_content_change(new_change!(1, 0, 1, 1, ""), PositionEncodingKind::UTF16)
	.unwrap();
	rope = Rope::from_str("Hello 🤗\n world!");
	assert_eq!(doc.rope.to_string(), rope.to_string());

	doc.apply_content_change(new_change!(0, 5, 1, 1, "，"), PositionEncodingKind::UTF16)
	.unwrap();
	rope = Rope::from_str("Hello，world!");
	assert_eq!(doc.rope.to_string(), rope.to_string());

	doc.apply_content_change(
	new_change!(0, 0, 0, rope.len_utf16_cu(), ""),
	PositionEncodingKind::UTF16,
	)
	.unwrap();
	assert_eq!(doc.rope.to_string(), "");
	}

	#[test]
	fn test_text_document_apply_content_change_no_range() {
	let mut rope = Rope::from_str(
	"let a = '🥸 你好';\rfunction helloWorld() { return '🤲🏿'; }\nlet b = 'Hi, 😊';",
	);
	let mut doc = TextDocument::new(LanguageId::JavaScript, &rope.to_string());
	let mut parser = Parser::new();

	parser
	.set_language(tree_sitter_javascript::language())
	.unwrap();

	assert!(doc.apply_content_change(
	&TextDocumentContentChangeEvent {
	range: None,
	range_length: None,
	text: "let a = '🥸 你好';\rfunction helloWorld() { return '🤲🏿'; }\nlet b = 'Hi, 😊';".to_owned(),
	},
	PositionEncodingKind::UTF16,
	).is_ok());
	assert_eq!(doc.rope.to_string(), rope.to_string());

	let tree = parser.parse(&rope.to_string(), None).unwrap();

	assert!(nodes_are_equal_recursive(
	&doc.tree.as_ref().unwrap().root_node(),
	&tree.root_node()
	));

	assert!(doc
	.apply_content_change(
	&TextDocumentContentChangeEvent {
	range: None,
	range_length: None,
	text: "let a = '🥸 你好，😊';".to_owned(),
	},
	PositionEncodingKind::UTF16,
	)
	.is_ok());
	rope = Rope::from_str("let a = '🥸 你好，😊';");
	assert_eq!(doc.rope.to_string(), rope.to_string());

	let tree = parser.parse(&rope.to_string(), None).unwrap();

	assert!(nodes_are_equal_recursive(
	&doc.tree.as_ref().unwrap().root_node(),
	&tree.root_node()
	));
	}

	#[test]
	fn test_text_document_apply_content_change_bounds() {
	let rope = Rope::from_str("");
	let mut doc = TextDocument::new(LanguageId::Unknown, &rope.to_string());

	assert!(doc
	.apply_content_change(new_change!(0, 0, 0, 1, ""), PositionEncodingKind::UTF16)
	.is_err());

	assert!(doc
	.apply_content_change(new_change!(1, 0, 1, 0, ""), PositionEncodingKind::UTF16)
	.is_err());

	assert!(doc
	.apply_content_change(new_change!(0, 0, 0, 0, "🤗"), PositionEncodingKind::UTF16)
	.is_ok());
	let rope = Rope::from_str("🤗");
	assert_eq!(doc.rope.to_string(), rope.to_string());

	assert!(doc
	.apply_content_change(
	new_change!(0, rope.len_utf16_cu(), 0, rope.len_utf16_cu(), "\r\n"),
	PositionEncodingKind::UTF16
	)
	.is_ok());
	let rope = Rope::from_str("🤗\r\n");
	assert_eq!(doc.rope.to_string(), rope.to_string());

	assert!(doc
	.apply_content_change(
	new_change!(0, '🤗'.len_utf16(), 0, '🤗'.len_utf16(), "\n"),
	PositionEncodingKind::UTF16
	)
	.is_ok());
	let rope = Rope::from_str("🤗\n\r\n");
	assert_eq!(doc.rope.to_string(), rope.to_string());

	assert!(doc
	.apply_content_change(
	new_change!(0, '🤗'.len_utf16(), 2, 0, ""),
	PositionEncodingKind::UTF16
	)
	.is_ok());
	let rope = Rope::from_str("🤗");
	assert_eq!(doc.rope.to_string(), rope.to_string());
	}

	#[test]
	// Ensure that the three stays consistent across updates.
	fn test_document_update_tree_consistency_easy() {
	let a = "let a = '你好';\rlet b = 'Hi, 😊';";

	let mut document = TextDocument::new(LanguageId::JavaScript, a);

	document
	.apply_content_change(new_change!(0, 9, 0, 11, "𐐀"), PositionEncodingKind::UTF16)
	.unwrap();

	let b = "let a = '𐐀';\rlet b = 'Hi, 😊';";

	assert_eq!(document.rope.to_string(), b);

	let mut parser = Parser::new();

	parser
	.set_language(tree_sitter_javascript::language())
	.unwrap();

	let b_tree = parser.parse(b, None).unwrap();

	assert!(nodes_are_equal_recursive(
	&document.tree.unwrap().root_node(),
	&b_tree.root_node()
	));
	}

	#[test]
	fn test_document_update_tree_consistency_medium() {
	let a = "let a = '🥸 你好';\rfunction helloWorld() { return '🤲🏿'; }\nlet b = 'Hi, 😊';";

	let mut document = TextDocument::new(LanguageId::JavaScript, a);

	document
	.apply_content_change(new_change!(0, 14, 2, 13, "，"), PositionEncodingKind::UTF16)
	.unwrap();

	let b = "let a = '🥸 你好，😊';";

	assert_eq!(document.rope.to_string(), b);

	let mut parser = Parser::new();

	parser
	.set_language(tree_sitter_javascript::language())
	.unwrap();

	let b_tree = parser.parse(b, None).unwrap();

	assert!(nodes_are_equal_recursive(
	&document.tree.unwrap().root_node(),
	&b_tree.root_node()
	));
	}

	#[test]
	/// I wrote this test because I was unsure whether tree-sitter's Point
	/// struct represents a byte or character offset. Turns out it's a byte
	/// offset.
	fn test_tree_sitter_point() {
	let mut parser = Parser::new();

	let language = tree_sitter_javascript::language();

	parser.set_language(language).unwrap();

	let source_code = "let a = \"你好\";\nlet x = 'Hi, 😊';";
	let parsed = parser.parse(source_code, None).unwrap();
	let root_node = parsed.root_node();

	// Expect the '🤗' to be in "program.lexical_declaration.variable_declarator.string.string_fragment"
	// and to take up 4 bytes.
	let mut node = root_node
	.descendant_for_point_range(
	tree_sitter::Point { row: 1, column: 9 },
	tree_sitter::Point { row: 1, column: 17 },
	)
	.unwrap();

	assert_eq!(node.kind(), "string_fragment");

	// Assert the ancestor chain
	node = node.parent().unwrap();
	assert_eq!(node.kind(), "string");
	node = node.parent().unwrap();
	assert_eq!(node.kind(), "variable_declarator");
	node = node.parent().unwrap();
	assert_eq!(node.kind(), "lexical_declaration");
	node = node.parent().unwrap();
	assert_eq!(node.kind(), "program");
	assert_eq!(node.parent(), None);

	// Conversely, we check that ropey effectively uses character offsets.
	let rope = Rope::from_str(source_code);

	let second_line = rope.line(1);
	assert_eq!(second_line.len_chars(), 16);
	assert_eq!(second_line.len_bytes(), 19);
	assert_eq!(second_line.chars().nth(13), Some('😊'));
	}

	#[test]
	/// I wrote this test to better understand ropey's handling of ranges.
	fn test_rope_remove_and_insert() {
	let source_code = "let x = 'Hi, 😊';".to_owned();
	let mut rope = Rope::from_str(&source_code);

	assert_eq!(rope.len_chars(), source_code.chars().count());

	rope.remove(0..source_code.chars().count());
	assert_eq!(rope.len_chars(), 0);

	rope.insert(0, &source_code);
	assert_eq!(rope.len_chars(), source_code.chars().count());

	rope.remove(0..source_code.chars().count() - 1);
	assert_eq!(rope.len_chars(), 1);

	rope.remove(0..1);
	assert_eq!(rope.len_chars(), 0);

	rope = Rope::from_str("");

	assert_eq!(rope.lines().nth(0), Some(rope.slice(0..0)));
	}

	fn nodes_are_equal_recursive(node1: &Node, node2: &Node) -> bool {
	if node1.kind() != node2.kind() {
	return false;
	}

	if node1.start_byte() != node2.start_byte() {
	return false;
	}

	if node1.end_byte() != node2.end_byte() {
	return false;
	}

	if node1.start_position() != node2.start_position() {
	return false;
	}

	if node1.end_position() != node2.end_position() {
	return false;
	}

	if node1.child_count() != node2.child_count() {
	return false;
	}

	for i in 0..node1.child_count() {
	let child1 = node1.child(i).unwrap();
	let child2 = node2.child(i).unwrap();

	if !nodes_are_equal_recursive(&child1, &child2) {
	return false;
	}
	}

	true
	}
	}