tsl-devkit/lsp-server/src/service/parser.cpp

391 lines
14 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <memory>
#include <mutex>
#include <shared_mutex>
#include <spdlog/spdlog.h>
#include <filesystem>
#include "./utils/text_coordinates.hpp"
#include "../language/ast/deserializer.hpp"
#include "./parser.hpp"
namespace lsp::service
{
// ============= TreeSitter ================== //
TreeSitter::TreeSitter()
{
parser_ = ts_parser_new();
if (!parser_)
throw std::runtime_error("Failed to create tree-sitter parser");
}
TreeSitter::~TreeSitter()
{
if (parser_)
ts_parser_delete(parser_);
}
TreeSitter::TreeSitter(TreeSitter&& other) noexcept :
parser_(other.parser_)
{
other.parser_ = nullptr;
}
bool TreeSitter::SetLanguage(const TSLanguage* language)
{
if (!parser_)
return false;
return ts_parser_set_language(parser_, language);
}
TSTree* TreeSitter::Parse(const char* content, size_t length, TSTree* old_tree)
{
if (!parser_)
return nullptr;
return ts_parser_parse_string(parser_, old_tree, content, length);
}
TSParser* TreeSitter::GetRawParser() const
{
return parser_;
};
// ============= SyntaxTree ================== //
SyntaxTree::SyntaxTree(TSTree* tree) :
tree_(tree, ts_tree_delete) {}
SyntaxTree::~SyntaxTree() = default;
TSTree* SyntaxTree::Get() const
{
return tree_.get();
}
void SyntaxTree::ApplyEdit(const protocol::TextDocumentContentChangeEvent& change, const protocol::string& content)
{
if (!tree_)
return;
protocol::uinteger start_offset = utils::text_coordinates::ToOffset(change.range.start, content);
protocol::uinteger end_offset = utils::text_coordinates::ToOffset(change.range.end, content);
TSInputEdit edit = {};
edit.start_byte = start_offset;
edit.old_end_byte = end_offset;
edit.new_end_byte = start_offset + change.text.length();
edit.start_point = utils::text_coordinates::ToPoint(change.range.start);
edit.old_end_point = utils::text_coordinates::ToPoint(change.range.end);
edit.new_end_point = utils::text_coordinates::CalculateEndPoint(change.text, edit.start_point);
ts_tree_edit(tree_.get(), &edit);
}
TSNode SyntaxTree::GetRootNode() const
{
return tree_ ? ts_tree_root_node(tree_.get()) : TSNode{};
}
// ============= SyntaxTreeManager ================== //
void SyntaxTreeManager::StoreTree(const protocol::DocumentUri& uri, std::unique_ptr<SyntaxTree> tree)
{
std::unique_lock<std::shared_mutex> lock(mutex_);
trees_[uri] = std::move(tree);
}
void SyntaxTreeManager::RemoveTree(const protocol::DocumentUri& uri)
{
std::unique_lock<std::shared_mutex> lock(mutex_);
trees_.erase(uri);
}
SyntaxTree* SyntaxTreeManager::GetTree(const protocol::DocumentUri& uri)
{
std::shared_lock<std::shared_mutex> lock(mutex_);
auto it = trees_.find(uri);
return (it != trees_.end()) ? it->second.get() : nullptr;
}
const SyntaxTree* SyntaxTreeManager::GetTree(const protocol::DocumentUri& uri) const
{
std::shared_lock<std::shared_mutex> lock(mutex_);
auto it = trees_.find(uri);
return (it != trees_.end()) ? it->second.get() : nullptr;
}
size_t SyntaxTreeManager::GetTreeCount() const
{
std::shared_lock<std::shared_mutex> lock(mutex_);
return trees_.size();
}
void SyntaxTreeManager::Clear()
{
std::unique_lock<std::shared_mutex> lock(mutex_);
trees_.clear();
}
// ============= Parser ================== //
Parser::Parser(std::shared_ptr<EventBus> event_bus) :
event_bus_(std::move(event_bus))
{
if (parser_.SetLanguage(tree_sitter_tsf()))
spdlog::info("Set tree-sitter-tsf successfully");
else
spdlog::error("Failed to set tree-sitter language");
event_bus_->Subscribe<events::DocumentOpend>(
[this](const auto& e) { OnDocumentOpened(e); });
event_bus_->Subscribe<events::DocumentChanged>(
[this](const auto& e) { OnDocumentChanged(e); });
event_bus_->Subscribe<events::DocumentClosed>(
[this](const auto& e) { OnDocumentClosed(e); });
}
Parser::~Parser() = default;
TSParser* Parser::GetRawParser() const
{
return parser_.GetRawParser();
};
TSTree* Parser::GetTree(const protocol::DocumentUri& uri) const
{
auto* syntax_tree = syntax_tree_manager_.GetTree(uri);
return syntax_tree ? syntax_tree->Get() : nullptr;
}
std::optional<symbol::EditingSymbolTable> Parser::ParseTsfFile(const std::string& file_path)
{
std::string extension = std::filesystem::path(file_path).extension().string();
std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower);
if (extension != ".tsf" && extension != ".tsl")
return std::nullopt;
spdlog::debug("Parse tsf file: {}", file_path);
// 读取文件
std::ifstream file(file_path);
if (!file.is_open())
{
spdlog::trace("Cannot open file: {}", file_path);
return std::nullopt;
}
std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
file.close();
// 创建局部的 parser 和 extractor线程安全
TreeSitter local_parser;
if (!local_parser.SetLanguage(tree_sitter_tsf()))
{
spdlog::error("Failed to set tree-sitter language in ParseTsfFile");
return std::nullopt;
}
// 解析
TSTree* tree = local_parser.Parse(content.c_str(), content.length());
if (!tree)
{
spdlog::trace("Failed to parse file: {}", file_path);
return std::nullopt;
}
// 解析
auto tree_deleter = [](TSTree* t) { ts_tree_delete(t); };
std::unique_ptr<TSTree, decltype(tree_deleter)> tree_guard(tree, tree_deleter);
symbol::EditingSymbolTable table;
table.uri = "file://" + std::filesystem::absolute(file_path).string();
table.version = 0;
table.last_parsed = std::chrono::system_clock::now();
table.is_dirty = false;
table.deserializer = std::make_unique<language::ast::Deserializer>();
table.file_path = std::filesystem::absolute(file_path).string();
auto ast_result = table.deserializer->Parse(ts_tree_root_node(tree), content);
if (ast_result.HasErrors())
{
for (const auto& it : ast_result.errors)
spdlog::warn("Parse Error = {}", it.message);
}
if (!ast_result.root)
{
spdlog::error("AST root is null for: {}", file_path);
return std::nullopt;
}
table.ast_root = std::move(ast_result.root);
table.symbol_table = std::make_unique<language::symbol::SymbolTable>();
try
{
// 使用 Builder 构建符号表
table.symbol_table->Build(*table.ast_root);
spdlog::debug("Successfully built symbol table for: {}", file_path);
}
catch (const std::exception& e)
{
spdlog::error("Failed to build symbol table for {}: {}", file_path, e.what());
return std::nullopt;
}
auto metadata = InferFileMetadata(*table.symbol_table);
if (!metadata)
{
spdlog::warn("Cannot infer file metadata from symbol table: {}", file_path);
return std::nullopt;
}
table.file_type = metadata->file_type;
// 验证TSF文件
std::string file_stem = std::filesystem::path(file_path).stem().string();
auto normalize_name = [](std::string s) {
std::transform(s.begin(), s.end(), s.begin(), ::tolower);
size_t at_pos = s.find("@");
if (at_pos != std::string::npos)
s = s.substr(0, at_pos);
return s;
};
if (normalize_name(file_stem) != normalize_name(metadata->primary_symbol))
spdlog::warn("File name '{}' doesn't match primary symbol '{}' in: {}", file_stem, metadata->primary_symbol, file_path);
spdlog::debug("Successfully parsed file: {} (type: {}, symbol: {})",
file_path,
static_cast<int>(metadata->file_type),
metadata->primary_symbol);
return table;
}
std::optional<Parser::FileMetadata> Parser::InferFileMetadata(const language::symbol::SymbolTable& table)
{
auto top_level_symbols = table.GetDocumentSymbols();
if (top_level_symbols.empty())
{
spdlog::warn("No top-level symbol definitions found in symbol table");
return std::nullopt;
}
// 过滤掉特殊的全局命名空间符号 "::"
std::vector<const language::symbol::SymbolDefinition*> filtered_symbols;
for (const auto* sym : top_level_symbols)
{
if (sym->name != "::")
{
filtered_symbols.push_back(sym);
}
}
if (filtered_symbols.empty())
{
spdlog::warn("No valid top-level symbols found (only global namespace)");
return std::nullopt;
}
// 按位置排序
std::sort(filtered_symbols.begin(), filtered_symbols.end(), [](const language::symbol::SymbolDefinition* a, const language::symbol::SymbolDefinition* b) {
if (a->location.start_byte != b->location.start_byte)
{
return a->location.start_byte < b->location.start_byte;
}
if (a->location.start_line != b->location.start_line)
{
return a->location.start_line < b->location.start_line;
}
return a->location.start_column < b->location.start_column;
});
// 取第一个真实的顶层符号
const language::symbol::SymbolDefinition* first_top_level = filtered_symbols[0];
FileMetadata metadata;
metadata.primary_symbol = first_top_level->name;
metadata.primary_kind = first_top_level->kind;
// 根据第一个符号推断文件类型
switch (first_top_level->kind)
{
case protocol::SymbolKind::Module:
metadata.file_type = symbol::TsfFileType::kUnit;
spdlog::trace("File type inferred as Unit from first symbol: '{}'", first_top_level->name);
break;
case protocol::SymbolKind::Class:
metadata.file_type = symbol::TsfFileType::kClass;
spdlog::trace("File type inferred as Class from first symbol: '{}'", first_top_level->name);
break;
case protocol::SymbolKind::Function:
metadata.file_type = symbol::TsfFileType::kFunction;
spdlog::trace("File type inferred as Function from first symbol: '{}'", first_top_level->name);
break;
default:
metadata.file_type = symbol::TsfFileType::kScript;
spdlog::trace("File type inferred as Script (default) from first symbol: '{}'", first_top_level->name);
break;
}
spdlog::debug("Inferred file metadata: type={}, symbol='{}', kind={}, location={}:{}",
static_cast<int>(metadata.file_type),
metadata.primary_symbol,
static_cast<int>(metadata.primary_kind),
first_top_level->location.start_line,
first_top_level->location.start_column);
return metadata;
}
void Parser::OnDocumentOpened(const events::DocumentOpend& event)
{
TSTree* tree = parser_.Parse(event.textDocument.text.c_str(), event.textDocument.text.length());
if (tree)
{
syntax_tree_manager_.StoreTree(event.textDocument.uri, std::make_unique<SyntaxTree>(tree));
event_bus_->Publish(events::DocumentParsed{
.item = event.textDocument,
.tree = tree });
spdlog::debug("Successfully parsed document: {}", event.textDocument.uri);
}
else
{
spdlog::error("Failed to parsed document: {}", event.textDocument.uri);
}
}
void Parser::OnDocumentChanged(const events::DocumentChanged& event)
{
SyntaxTree* syntax_tree = syntax_tree_manager_.GetTree(event.uri);
TSTree* old_tree = syntax_tree ? syntax_tree->Get() : nullptr;
// 应用增量编辑
if (syntax_tree && old_tree)
for (const auto& change : event.changes)
syntax_tree->ApplyEdit(change, event.content);
// 增量解析
TSTree* tree = parser_.Parse(event.content.c_str(), event.content.length(), old_tree);
if (tree)
{
syntax_tree_manager_.StoreTree(event.uri, std::make_unique<SyntaxTree>(tree));
event_bus_->Publish(events::DocumentReparsed{
.item{ .uri = event.uri, .languageId = "", .version = event.version, .text = std::move(event.content) },
.tree = tree,
});
spdlog::debug("Document reparsed successfully: {}", event.uri);
}
else
{
spdlog::error("Failed to reparse document: {}", event.uri);
}
}
void Parser::OnDocumentClosed(const events::DocumentClosed& event)
{
syntax_tree_manager_.RemoveTree(event.textDocument.uri);
spdlog::debug("Removed syntax tree for: {}", event.textDocument.uri);
}
}