391 lines
14 KiB
C++
391 lines
14 KiB
C++
#include <memory>
|
||
#include <mutex>
|
||
#include <shared_mutex>
|
||
#include <spdlog/spdlog.h>
|
||
#include <filesystem>
|
||
#include "./utils/text_coordinates.hpp"
|
||
#include "../language/ast/deserializer.hpp"
|
||
#include "./parser.hpp"
|
||
|
||
namespace lsp::service
|
||
{
|
||
// ============= TreeSitter ================== //
|
||
|
||
TreeSitter::TreeSitter()
|
||
{
|
||
parser_ = ts_parser_new();
|
||
if (!parser_)
|
||
throw std::runtime_error("Failed to create tree-sitter parser");
|
||
}
|
||
|
||
TreeSitter::~TreeSitter()
|
||
{
|
||
if (parser_)
|
||
ts_parser_delete(parser_);
|
||
}
|
||
|
||
TreeSitter::TreeSitter(TreeSitter&& other) noexcept :
|
||
parser_(other.parser_)
|
||
{
|
||
other.parser_ = nullptr;
|
||
}
|
||
|
||
bool TreeSitter::SetLanguage(const TSLanguage* language)
|
||
{
|
||
if (!parser_)
|
||
return false;
|
||
return ts_parser_set_language(parser_, language);
|
||
}
|
||
|
||
TSTree* TreeSitter::Parse(const char* content, size_t length, TSTree* old_tree)
|
||
{
|
||
if (!parser_)
|
||
return nullptr;
|
||
return ts_parser_parse_string(parser_, old_tree, content, length);
|
||
}
|
||
|
||
TSParser* TreeSitter::GetRawParser() const
|
||
{
|
||
return parser_;
|
||
};
|
||
|
||
// ============= SyntaxTree ================== //
|
||
|
||
SyntaxTree::SyntaxTree(TSTree* tree) :
|
||
tree_(tree, ts_tree_delete) {}
|
||
|
||
SyntaxTree::~SyntaxTree() = default;
|
||
|
||
TSTree* SyntaxTree::Get() const
|
||
{
|
||
return tree_.get();
|
||
}
|
||
|
||
void SyntaxTree::ApplyEdit(const protocol::TextDocumentContentChangeEvent& change, const protocol::string& content)
|
||
{
|
||
if (!tree_)
|
||
return;
|
||
protocol::uinteger start_offset = utils::text_coordinates::ToOffset(change.range.start, content);
|
||
protocol::uinteger end_offset = utils::text_coordinates::ToOffset(change.range.end, content);
|
||
|
||
TSInputEdit edit = {};
|
||
edit.start_byte = start_offset;
|
||
edit.old_end_byte = end_offset;
|
||
edit.new_end_byte = start_offset + change.text.length();
|
||
|
||
edit.start_point = utils::text_coordinates::ToPoint(change.range.start);
|
||
edit.old_end_point = utils::text_coordinates::ToPoint(change.range.end);
|
||
edit.new_end_point = utils::text_coordinates::CalculateEndPoint(change.text, edit.start_point);
|
||
|
||
ts_tree_edit(tree_.get(), &edit);
|
||
}
|
||
|
||
TSNode SyntaxTree::GetRootNode() const
|
||
{
|
||
return tree_ ? ts_tree_root_node(tree_.get()) : TSNode{};
|
||
}
|
||
|
||
// ============= SyntaxTreeManager ================== //
|
||
|
||
void SyntaxTreeManager::StoreTree(const protocol::DocumentUri& uri, std::unique_ptr<SyntaxTree> tree)
|
||
{
|
||
std::unique_lock<std::shared_mutex> lock(mutex_);
|
||
trees_[uri] = std::move(tree);
|
||
}
|
||
|
||
void SyntaxTreeManager::RemoveTree(const protocol::DocumentUri& uri)
|
||
{
|
||
std::unique_lock<std::shared_mutex> lock(mutex_);
|
||
trees_.erase(uri);
|
||
}
|
||
|
||
SyntaxTree* SyntaxTreeManager::GetTree(const protocol::DocumentUri& uri)
|
||
{
|
||
std::shared_lock<std::shared_mutex> lock(mutex_);
|
||
auto it = trees_.find(uri);
|
||
return (it != trees_.end()) ? it->second.get() : nullptr;
|
||
}
|
||
|
||
const SyntaxTree* SyntaxTreeManager::GetTree(const protocol::DocumentUri& uri) const
|
||
{
|
||
std::shared_lock<std::shared_mutex> lock(mutex_);
|
||
auto it = trees_.find(uri);
|
||
return (it != trees_.end()) ? it->second.get() : nullptr;
|
||
}
|
||
|
||
size_t SyntaxTreeManager::GetTreeCount() const
|
||
{
|
||
std::shared_lock<std::shared_mutex> lock(mutex_);
|
||
return trees_.size();
|
||
}
|
||
|
||
void SyntaxTreeManager::Clear()
|
||
{
|
||
std::unique_lock<std::shared_mutex> lock(mutex_);
|
||
trees_.clear();
|
||
}
|
||
|
||
// ============= Parser ================== //
|
||
|
||
Parser::Parser(std::shared_ptr<EventBus> event_bus) :
|
||
event_bus_(std::move(event_bus))
|
||
{
|
||
if (parser_.SetLanguage(tree_sitter_tsf()))
|
||
spdlog::info("Set tree-sitter-tsf successfully");
|
||
else
|
||
spdlog::error("Failed to set tree-sitter language");
|
||
|
||
event_bus_->Subscribe<events::DocumentOpend>(
|
||
[this](const auto& e) { OnDocumentOpened(e); });
|
||
event_bus_->Subscribe<events::DocumentChanged>(
|
||
[this](const auto& e) { OnDocumentChanged(e); });
|
||
event_bus_->Subscribe<events::DocumentClosed>(
|
||
[this](const auto& e) { OnDocumentClosed(e); });
|
||
}
|
||
|
||
Parser::~Parser() = default;
|
||
|
||
TSParser* Parser::GetRawParser() const
|
||
{
|
||
return parser_.GetRawParser();
|
||
};
|
||
|
||
TSTree* Parser::GetTree(const protocol::DocumentUri& uri) const
|
||
{
|
||
auto* syntax_tree = syntax_tree_manager_.GetTree(uri);
|
||
return syntax_tree ? syntax_tree->Get() : nullptr;
|
||
}
|
||
|
||
std::optional<symbol::EditingSymbolTable> Parser::ParseTsfFile(const std::string& file_path)
|
||
{
|
||
std::string extension = std::filesystem::path(file_path).extension().string();
|
||
std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower);
|
||
if (extension != ".tsf" && extension != ".tsl")
|
||
return std::nullopt;
|
||
spdlog::debug("Parse tsf file: {}", file_path);
|
||
|
||
// 读取文件
|
||
std::ifstream file(file_path);
|
||
if (!file.is_open())
|
||
{
|
||
spdlog::trace("Cannot open file: {}", file_path);
|
||
return std::nullopt;
|
||
}
|
||
|
||
std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
|
||
file.close();
|
||
|
||
// 创建局部的 parser 和 extractor(线程安全)
|
||
TreeSitter local_parser;
|
||
if (!local_parser.SetLanguage(tree_sitter_tsf()))
|
||
{
|
||
spdlog::error("Failed to set tree-sitter language in ParseTsfFile");
|
||
return std::nullopt;
|
||
}
|
||
|
||
// 解析
|
||
TSTree* tree = local_parser.Parse(content.c_str(), content.length());
|
||
if (!tree)
|
||
{
|
||
spdlog::trace("Failed to parse file: {}", file_path);
|
||
return std::nullopt;
|
||
}
|
||
// 解析
|
||
auto tree_deleter = [](TSTree* t) { ts_tree_delete(t); };
|
||
std::unique_ptr<TSTree, decltype(tree_deleter)> tree_guard(tree, tree_deleter);
|
||
|
||
symbol::EditingSymbolTable table;
|
||
table.uri = "file://" + std::filesystem::absolute(file_path).string();
|
||
table.version = 0;
|
||
table.last_parsed = std::chrono::system_clock::now();
|
||
table.is_dirty = false;
|
||
table.deserializer = std::make_unique<language::ast::Deserializer>();
|
||
table.file_path = std::filesystem::absolute(file_path).string();
|
||
|
||
auto ast_result = table.deserializer->Parse(ts_tree_root_node(tree), content);
|
||
if (ast_result.HasErrors())
|
||
{
|
||
for (const auto& it : ast_result.errors)
|
||
spdlog::warn("Parse Error = {}", it.message);
|
||
}
|
||
if (!ast_result.root)
|
||
{
|
||
spdlog::error("AST root is null for: {}", file_path);
|
||
return std::nullopt;
|
||
}
|
||
table.ast_root = std::move(ast_result.root);
|
||
table.symbol_table = std::make_unique<language::symbol::SymbolTable>();
|
||
|
||
try
|
||
{
|
||
// 使用 Builder 构建符号表
|
||
table.symbol_table->Build(*table.ast_root);
|
||
spdlog::debug("Successfully built symbol table for: {}", file_path);
|
||
}
|
||
catch (const std::exception& e)
|
||
{
|
||
spdlog::error("Failed to build symbol table for {}: {}", file_path, e.what());
|
||
return std::nullopt;
|
||
}
|
||
|
||
auto metadata = InferFileMetadata(*table.symbol_table);
|
||
if (!metadata)
|
||
{
|
||
spdlog::warn("Cannot infer file metadata from symbol table: {}", file_path);
|
||
return std::nullopt;
|
||
}
|
||
table.file_type = metadata->file_type;
|
||
|
||
// 验证TSF文件
|
||
std::string file_stem = std::filesystem::path(file_path).stem().string();
|
||
auto normalize_name = [](std::string s) {
|
||
std::transform(s.begin(), s.end(), s.begin(), ::tolower);
|
||
size_t at_pos = s.find("@");
|
||
if (at_pos != std::string::npos)
|
||
s = s.substr(0, at_pos);
|
||
return s;
|
||
};
|
||
if (normalize_name(file_stem) != normalize_name(metadata->primary_symbol))
|
||
spdlog::warn("File name '{}' doesn't match primary symbol '{}' in: {}", file_stem, metadata->primary_symbol, file_path);
|
||
|
||
spdlog::debug("Successfully parsed file: {} (type: {}, symbol: {})",
|
||
file_path,
|
||
static_cast<int>(metadata->file_type),
|
||
metadata->primary_symbol);
|
||
return table;
|
||
}
|
||
|
||
std::optional<Parser::FileMetadata> Parser::InferFileMetadata(const language::symbol::SymbolTable& table)
|
||
{
|
||
auto top_level_symbols = table.GetDocumentSymbols();
|
||
|
||
if (top_level_symbols.empty())
|
||
{
|
||
spdlog::warn("No top-level symbol definitions found in symbol table");
|
||
return std::nullopt;
|
||
}
|
||
|
||
// 过滤掉特殊的全局命名空间符号 "::"
|
||
std::vector<const language::symbol::SymbolDefinition*> filtered_symbols;
|
||
for (const auto* sym : top_level_symbols)
|
||
{
|
||
if (sym->name != "::")
|
||
{
|
||
filtered_symbols.push_back(sym);
|
||
}
|
||
}
|
||
|
||
if (filtered_symbols.empty())
|
||
{
|
||
spdlog::warn("No valid top-level symbols found (only global namespace)");
|
||
return std::nullopt;
|
||
}
|
||
|
||
// 按位置排序
|
||
std::sort(filtered_symbols.begin(), filtered_symbols.end(), [](const language::symbol::SymbolDefinition* a, const language::symbol::SymbolDefinition* b) {
|
||
if (a->location.start_byte != b->location.start_byte)
|
||
{
|
||
return a->location.start_byte < b->location.start_byte;
|
||
}
|
||
if (a->location.start_line != b->location.start_line)
|
||
{
|
||
return a->location.start_line < b->location.start_line;
|
||
}
|
||
return a->location.start_column < b->location.start_column;
|
||
});
|
||
|
||
// 取第一个真实的顶层符号
|
||
const language::symbol::SymbolDefinition* first_top_level = filtered_symbols[0];
|
||
|
||
FileMetadata metadata;
|
||
metadata.primary_symbol = first_top_level->name;
|
||
metadata.primary_kind = first_top_level->kind;
|
||
|
||
// 根据第一个符号推断文件类型
|
||
switch (first_top_level->kind)
|
||
{
|
||
case protocol::SymbolKind::Module:
|
||
metadata.file_type = symbol::TsfFileType::kUnit;
|
||
spdlog::trace("File type inferred as Unit from first symbol: '{}'", first_top_level->name);
|
||
break;
|
||
|
||
case protocol::SymbolKind::Class:
|
||
metadata.file_type = symbol::TsfFileType::kClass;
|
||
spdlog::trace("File type inferred as Class from first symbol: '{}'", first_top_level->name);
|
||
break;
|
||
|
||
case protocol::SymbolKind::Function:
|
||
metadata.file_type = symbol::TsfFileType::kFunction;
|
||
spdlog::trace("File type inferred as Function from first symbol: '{}'", first_top_level->name);
|
||
break;
|
||
|
||
default:
|
||
metadata.file_type = symbol::TsfFileType::kScript;
|
||
spdlog::trace("File type inferred as Script (default) from first symbol: '{}'", first_top_level->name);
|
||
break;
|
||
}
|
||
|
||
spdlog::debug("Inferred file metadata: type={}, symbol='{}', kind={}, location={}:{}",
|
||
static_cast<int>(metadata.file_type),
|
||
metadata.primary_symbol,
|
||
static_cast<int>(metadata.primary_kind),
|
||
first_top_level->location.start_line,
|
||
first_top_level->location.start_column);
|
||
|
||
return metadata;
|
||
}
|
||
|
||
void Parser::OnDocumentOpened(const events::DocumentOpend& event)
|
||
{
|
||
TSTree* tree = parser_.Parse(event.textDocument.text.c_str(), event.textDocument.text.length());
|
||
if (tree)
|
||
{
|
||
syntax_tree_manager_.StoreTree(event.textDocument.uri, std::make_unique<SyntaxTree>(tree));
|
||
event_bus_->Publish(events::DocumentParsed{
|
||
.item = event.textDocument,
|
||
.tree = tree });
|
||
spdlog::debug("Successfully parsed document: {}", event.textDocument.uri);
|
||
}
|
||
else
|
||
{
|
||
spdlog::error("Failed to parsed document: {}", event.textDocument.uri);
|
||
}
|
||
}
|
||
|
||
void Parser::OnDocumentChanged(const events::DocumentChanged& event)
|
||
{
|
||
SyntaxTree* syntax_tree = syntax_tree_manager_.GetTree(event.uri);
|
||
TSTree* old_tree = syntax_tree ? syntax_tree->Get() : nullptr;
|
||
|
||
// 应用增量编辑
|
||
if (syntax_tree && old_tree)
|
||
for (const auto& change : event.changes)
|
||
syntax_tree->ApplyEdit(change, event.content);
|
||
|
||
// 增量解析
|
||
TSTree* tree = parser_.Parse(event.content.c_str(), event.content.length(), old_tree);
|
||
|
||
if (tree)
|
||
{
|
||
syntax_tree_manager_.StoreTree(event.uri, std::make_unique<SyntaxTree>(tree));
|
||
event_bus_->Publish(events::DocumentReparsed{
|
||
.item{ .uri = event.uri, .languageId = "", .version = event.version, .text = std::move(event.content) },
|
||
.tree = tree,
|
||
});
|
||
|
||
spdlog::debug("Document reparsed successfully: {}", event.uri);
|
||
}
|
||
else
|
||
{
|
||
spdlog::error("Failed to reparse document: {}", event.uri);
|
||
}
|
||
}
|
||
|
||
void Parser::OnDocumentClosed(const events::DocumentClosed& event)
|
||
{
|
||
syntax_tree_manager_.RemoveTree(event.textDocument.uri);
|
||
spdlog::debug("Removed syntax tree for: {}", event.textDocument.uri);
|
||
}
|
||
|
||
}
|