tsl-devkit/lsp-server/src/manager/symbol.cppm

856 lines
28 KiB
C++

module;
export module lsp.manager.symbol;
import tree_sitter;
import spdlog;
import std;
import lsp.protocol;
import lsp.manager.event_bus;
import lsp.manager.events;
import lsp.language.ast;
import lsp.language.symbol;
import lsp.language.semantic;
import lsp.utils.string;
export namespace lsp::manager
{
class Symbol
{
public:
enum class SymbolSource
{
kEditing,
kWorkspace,
kSystem
};
struct IndexedSymbol
{
protocol::DocumentUri uri;
std::string name;
protocol::SymbolKind kind;
SymbolSource source;
language::symbol::SymbolId id;
};
explicit Symbol(EventBus& event_bus);
~Symbol();
void LoadSystemLibrary(const std::string& lib_path);
void LoadWorkspace(const protocol::DocumentUri& workspace_uri);
void IndexWorkspaceFiles(const std::vector<protocol::DocumentUri>& uris);
void RemoveWorkspaceFiles(const std::vector<protocol::DocumentUri>& uris);
void RenameWorkspaceFiles(const std::vector<std::pair<protocol::DocumentUri, protocol::DocumentUri>>& files);
const language::symbol::SymbolTable* GetSymbolTable(const protocol::DocumentUri& uri) const;
const language::semantic::SemanticModel* GetSemanticModel(const protocol::DocumentUri& uri) const;
std::vector<const language::symbol::SymbolTable*> GetWorkspaceSymbolTables() const;
std::vector<const language::symbol::SymbolTable*> GetSystemSymbolTables() const;
std::vector<IndexedSymbol> QueryIndexedSymbols(protocol::SymbolKind kind, std::optional<SymbolSource> source = std::nullopt) const;
private:
void OnDocumentParsed(const events::DocumentParsed& event);
void OnDocumentReparsed(const events::DocumentReparsed& event);
void OnDocumentClosed(const events::DocumentClosed& event);
struct DocumentAnalysis
{
protocol::DocumentUri uri;
protocol::integer version;
std::unique_ptr<language::ast::Deserializer> deserializer;
std::unique_ptr<language::ast::Program> ast;
std::unique_ptr<language::symbol::SymbolTable> symbol_table;
std::unique_ptr<language::semantic::SemanticModel> semantic_model;
};
struct StoredSymbolEntry
{
std::unique_ptr<language::symbol::SymbolTable> symbol_table;
std::unique_ptr<language::semantic::SemanticModel> semantic_model;
};
void RebuildIndex();
void AddTableToIndex(const language::symbol::SymbolTable& table, const protocol::DocumentUri& uri, SymbolSource source);
bool IsTopLevelSymbol(const language::symbol::SymbolTable& table, language::symbol::SymbolId id) const;
std::unordered_map<std::string, StoredSymbolEntry> system_symbols_;
std::unordered_map<std::string, StoredSymbolEntry> workspace_symbols_;
std::unordered_map<protocol::DocumentUri, DocumentAnalysis> editing_symbols_;
std::unordered_map<std::string, std::vector<IndexedSymbol>, utils::IHasher, utils::IEqualTo> index_by_name_;
EventBus& event_bus_;
mutable std::shared_mutex mutex_;
};
}
namespace lsp::manager
{
extern "C" const TSLanguage* tree_sitter_tsf(void);
namespace
{
std::string PathToUri(const std::filesystem::path& path)
{
auto absolute = std::filesystem::absolute(path).generic_string();
#ifdef _WIN32
std::replace(absolute.begin(), absolute.end(), '\\', '/');
#endif
if (!absolute.starts_with("/"))
absolute = "/" + absolute;
return "file://" + absolute;
}
std::string UriToPath(const std::string& uri)
{
std::string path = uri;
if (path.starts_with("file://"))
path = path.substr(7);
#ifdef _WIN32
if (!path.empty() && path[0] == '/')
path = path.substr(1);
std::replace(path.begin(), path.end(), '/', '\\');
#endif
// Percent-decoding
std::string decoded;
decoded.reserve(path.size());
for (size_t i = 0; i < path.size(); ++i)
{
if (path[i] == '%' && i + 2 < path.size())
{
std::string hex = path.substr(i + 1, 2);
char ch = static_cast<char>(std::stoi(hex, nullptr, 16));
decoded.push_back(ch);
i += 2;
}
else if (path[i] == '+')
{
decoded.push_back(' ');
}
else
{
decoded.push_back(path[i]);
}
}
return decoded;
}
enum class TslFileKind
{
kLibraryTsf,
kScriptTsl,
kOther,
};
TslFileKind GetTslFileKind(const std::filesystem::path& path)
{
if (!path.has_extension())
{
return TslFileKind::kOther;
}
std::string ext = path.extension().string();
std::transform(
ext.begin(),
ext.end(),
ext.begin(),
[](unsigned char ch) { return static_cast<char>(std::tolower(ch)); });
if (ext == ".tsf")
{
return TslFileKind::kLibraryTsf;
}
if (ext == ".tsl")
{
return TslFileKind::kScriptTsl;
}
return TslFileKind::kOther;
}
std::unique_ptr<language::symbol::SymbolTable> BuildSymbolTableFromFile(
const std::filesystem::path& file_path)
{
if (GetTslFileKind(file_path) == TslFileKind::kOther)
return nullptr;
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open())
{
spdlog::warn("Failed to open symbol file: {}", file_path.string());
return nullptr;
}
std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
auto parser_deleter = [](TSParser* parser) {
if (parser)
ts_parser_delete(parser);
};
std::unique_ptr<TSParser, decltype(parser_deleter)> parser(ts_parser_new(), parser_deleter);
if (!parser || !ts_parser_set_language(parser.get(), tree_sitter_tsf()))
{
spdlog::error("Failed to create tree-sitter parser for file: {}", file_path.string());
return nullptr;
}
TSTree* tree_handle = ts_parser_parse_string(parser.get(), nullptr, content.c_str(), content.length());
if (!tree_handle)
{
spdlog::warn("tree-sitter failed to parse file: {}", file_path.string());
return nullptr;
}
auto tree_deleter = [](TSTree* tree) {
if (tree)
ts_tree_delete(tree);
};
std::unique_ptr<TSTree, decltype(tree_deleter)> tree(tree_handle, tree_deleter);
language::ast::Deserializer deserializer;
auto ast_result = deserializer.Parse(ts_tree_root_node(tree.get()), content);
if (!ast_result.root)
{
spdlog::warn("Failed to deserialize AST for file: {}", file_path.string());
return nullptr;
}
auto symbol_table = std::make_unique<language::symbol::SymbolTable>();
try
{
language::symbol::Builder builder(*symbol_table);
builder.Build(*ast_result.root);
}
catch (const std::exception& e)
{
spdlog::error("Exception building symbol table for {}: {}", file_path.string(), e.what());
return nullptr;
}
return symbol_table;
}
bool HasMatchingTopLevelSymbol(const language::symbol::SymbolTable& table, const std::string& stem)
{
for (const auto& wrapper : table.all_definitions())
{
const auto& symbol = wrapper.get();
switch (symbol.kind())
{
case protocol::SymbolKind::Function:
case protocol::SymbolKind::Class:
case protocol::SymbolKind::Module:
if (utils::IEquals(symbol.name(), stem))
return true;
break;
default:
break;
}
}
return false;
}
std::string DescribeTopLevelSymbols(const language::symbol::SymbolTable& table)
{
std::vector<std::string> parts;
for (const auto& wrapper : table.all_definitions())
{
const auto& symbol = wrapper.get();
switch (symbol.kind())
{
case protocol::SymbolKind::Function:
parts.push_back("function:" + symbol.name());
break;
case protocol::SymbolKind::Class:
parts.push_back("class:" + symbol.name());
break;
case protocol::SymbolKind::Module:
parts.push_back("unit:" + symbol.name());
break;
default:
break;
}
}
if (parts.empty())
{
return "<none>";
}
std::string result;
for (std::size_t i = 0; i < parts.size(); ++i)
{
if (i > 0)
{
result += ", ";
}
result += parts[i];
}
return result;
}
}
Symbol::Symbol(EventBus& event_bus) : event_bus_(event_bus)
{
event_bus_.Subscribe<events::DocumentParsed>(
[this](const auto& e) { OnDocumentParsed(e); });
event_bus_.Subscribe<events::DocumentReparsed>(
[this](const auto& e) { OnDocumentReparsed(e); });
event_bus_.Subscribe<events::DocumentClosed>(
[this](const auto& e) { OnDocumentClosed(e); });
}
Symbol::~Symbol() = default;
void Symbol::LoadSystemLibrary(const std::string& lib_path)
{
spdlog::info("Loading system library from: {}", lib_path);
auto start = std::chrono::steady_clock::now();
if (!std::filesystem::exists(lib_path))
{
spdlog::warn("System library path does not exist: {}", lib_path);
return;
}
size_t loaded = 0;
size_t failed = 0;
std::unordered_map<std::string, StoredSymbolEntry> new_symbols;
auto options = std::filesystem::directory_options::follow_directory_symlink |
std::filesystem::directory_options::skip_permission_denied;
for (const auto& entry : std::filesystem::recursive_directory_iterator(lib_path, options))
{
if (!entry.is_regular_file())
continue;
// System library only accepts `.tsf` as a library unit. `.tsl` is a script and should be ignored here.
if (GetTslFileKind(entry.path()) != TslFileKind::kLibraryTsf)
{
continue;
}
spdlog::trace("Indexing library file: {}", entry.path().string());
auto table = BuildSymbolTableFromFile(entry.path());
if (!table)
{
spdlog::trace("Failed to build symbol table for: {}", entry.path().string());
++failed;
continue;
}
auto stem = entry.path().stem().string();
if (!HasMatchingTopLevelSymbol(*table, stem))
{
spdlog::debug("Indexing system file {} with unmatched top-level symbol (stem='{}', top-level={})",
entry.path().string(),
stem,
DescribeTopLevelSymbols(*table));
}
StoredSymbolEntry stored;
stored.symbol_table = std::move(table);
stored.semantic_model = std::make_unique<language::semantic::SemanticModel>(*stored.symbol_table);
new_symbols[PathToUri(entry.path())] = std::move(stored);
++loaded;
}
{
std::unique_lock<std::shared_mutex> lock(mutex_);
system_symbols_ = std::move(new_symbols);
RebuildIndex();
}
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - start)
.count();
spdlog::info("System library loaded: {} files, {} failed, {}ms",
loaded,
failed,
duration);
}
void Symbol::LoadWorkspace(const protocol::DocumentUri& workspace_uri)
{
auto workspace_path = UriToPath(workspace_uri);
spdlog::info("Loading workspace from: {}", workspace_path);
auto start = std::chrono::steady_clock::now();
if (!std::filesystem::exists(workspace_path))
{
spdlog::warn("Workspace path does not exist: {}", workspace_path);
return;
}
size_t loaded = 0;
size_t failed = 0;
std::unordered_map<std::string, StoredSymbolEntry> new_symbols;
auto options = std::filesystem::directory_options::follow_directory_symlink |
std::filesystem::directory_options::skip_permission_denied;
for (const auto& entry : std::filesystem::recursive_directory_iterator(workspace_path, options))
{
if (!entry.is_regular_file())
continue;
auto kind = GetTslFileKind(entry.path());
if (kind == TslFileKind::kOther)
continue;
auto table = BuildSymbolTableFromFile(entry.path());
if (!table)
{
++failed;
continue;
}
auto stem = entry.path().stem().string();
// Only `.tsf` is a library unit that must match the file name.
// `.tsl` is a script and should not be forced to have a top-level symbol.
if (kind == TslFileKind::kLibraryTsf && !HasMatchingTopLevelSymbol(*table, stem))
{
spdlog::warn("Skipping workspace file {}: top-level symbol does not match file name", entry.path().string());
++failed;
continue;
}
StoredSymbolEntry stored;
stored.symbol_table = std::move(table);
stored.semantic_model = std::make_unique<language::semantic::SemanticModel>(*stored.symbol_table);
new_symbols[PathToUri(entry.path())] = std::move(stored);
++loaded;
}
{
std::unique_lock<std::shared_mutex> lock(mutex_);
workspace_symbols_ = std::move(new_symbols);
RebuildIndex();
}
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - start)
.count();
spdlog::info("Workspace loaded: {} files, {} failed, {}ms",
loaded,
failed,
duration);
}
void Symbol::IndexWorkspaceFiles(const std::vector<protocol::DocumentUri>& uris)
{
std::unordered_map<std::string, StoredSymbolEntry> updates;
std::vector<std::string> removals;
updates.reserve(uris.size());
removals.reserve(uris.size());
for (const auto& uri : uris)
{
auto file_path = std::filesystem::path(UriToPath(uri));
auto kind = GetTslFileKind(file_path);
if (kind == TslFileKind::kOther)
{
continue;
}
auto normalized_uri = PathToUri(file_path);
if (!std::filesystem::exists(file_path))
{
removals.push_back(std::move(normalized_uri));
continue;
}
auto table = BuildSymbolTableFromFile(file_path);
if (!table)
{
removals.push_back(std::move(normalized_uri));
continue;
}
auto stem = file_path.stem().string();
if (kind == TslFileKind::kLibraryTsf && !HasMatchingTopLevelSymbol(*table, stem))
{
spdlog::warn("Skipping workspace file {}: top-level symbol does not match file name", file_path.string());
removals.push_back(std::move(normalized_uri));
continue;
}
StoredSymbolEntry stored;
stored.symbol_table = std::move(table);
stored.semantic_model = std::make_unique<language::semantic::SemanticModel>(*stored.symbol_table);
updates[normalized_uri] = std::move(stored);
}
if (updates.empty() && removals.empty())
{
return;
}
{
std::unique_lock<std::shared_mutex> lock(mutex_);
for (auto& [uri, entry] : updates)
{
workspace_symbols_[uri] = std::move(entry);
}
for (auto& uri : removals)
{
workspace_symbols_.erase(uri);
}
RebuildIndex();
}
}
void Symbol::RemoveWorkspaceFiles(const std::vector<protocol::DocumentUri>& uris)
{
if (uris.empty())
{
return;
}
std::vector<std::string> removals;
removals.reserve(uris.size());
for (const auto& uri : uris)
{
auto file_path = std::filesystem::path(UriToPath(uri));
auto kind = GetTslFileKind(file_path);
if (kind == TslFileKind::kOther)
{
continue;
}
removals.push_back(PathToUri(file_path));
}
if (removals.empty())
{
return;
}
{
std::unique_lock<std::shared_mutex> lock(mutex_);
for (auto& uri : removals)
{
workspace_symbols_.erase(uri);
}
RebuildIndex();
}
}
void Symbol::RenameWorkspaceFiles(const std::vector<std::pair<protocol::DocumentUri, protocol::DocumentUri>>& files)
{
if (files.empty())
{
return;
}
std::unordered_map<std::string, StoredSymbolEntry> updates;
std::vector<std::string> removals;
updates.reserve(files.size());
removals.reserve(files.size());
for (const auto& [old_uri, new_uri] : files)
{
auto old_path = std::filesystem::path(UriToPath(old_uri));
if (GetTslFileKind(old_path) != TslFileKind::kOther)
{
removals.push_back(PathToUri(old_path));
}
auto new_path = std::filesystem::path(UriToPath(new_uri));
auto kind = GetTslFileKind(new_path);
if (kind == TslFileKind::kOther)
{
continue;
}
auto normalized_uri = PathToUri(new_path);
if (!std::filesystem::exists(new_path))
{
removals.push_back(std::move(normalized_uri));
continue;
}
auto table = BuildSymbolTableFromFile(new_path);
if (!table)
{
removals.push_back(std::move(normalized_uri));
continue;
}
auto stem = new_path.stem().string();
if (kind == TslFileKind::kLibraryTsf && !HasMatchingTopLevelSymbol(*table, stem))
{
spdlog::warn("Skipping workspace file {}: top-level symbol does not match file name", new_path.string());
removals.push_back(std::move(normalized_uri));
continue;
}
StoredSymbolEntry stored;
stored.symbol_table = std::move(table);
stored.semantic_model = std::make_unique<language::semantic::SemanticModel>(*stored.symbol_table);
updates[normalized_uri] = std::move(stored);
}
if (updates.empty() && removals.empty())
{
return;
}
{
std::unique_lock<std::shared_mutex> lock(mutex_);
for (auto& uri : removals)
{
workspace_symbols_.erase(uri);
}
for (auto& [uri, entry] : updates)
{
workspace_symbols_[uri] = std::move(entry);
}
RebuildIndex();
}
}
const language::symbol::SymbolTable* Symbol::GetSymbolTable(
const protocol::DocumentUri& uri) const
{
std::shared_lock<std::shared_mutex> lock(mutex_);
if (auto it = editing_symbols_.find(uri); it != editing_symbols_.end())
{
return it->second.symbol_table.get();
}
if (auto it = workspace_symbols_.find(uri); it != workspace_symbols_.end())
{
return it->second.symbol_table.get();
}
if (auto it = system_symbols_.find(uri); it != system_symbols_.end())
{
return it->second.symbol_table.get();
}
return nullptr;
}
const language::semantic::SemanticModel* Symbol::GetSemanticModel(
const protocol::DocumentUri& uri) const
{
std::shared_lock<std::shared_mutex> lock(mutex_);
if (auto it = editing_symbols_.find(uri); it != editing_symbols_.end())
{
return it->second.semantic_model.get();
}
if (auto it = workspace_symbols_.find(uri); it != workspace_symbols_.end())
{
return it->second.semantic_model.get();
}
if (auto it = system_symbols_.find(uri); it != system_symbols_.end())
{
return it->second.semantic_model.get();
}
return nullptr;
}
std::vector<const language::symbol::SymbolTable*> Symbol::GetWorkspaceSymbolTables() const
{
std::shared_lock<std::shared_mutex> lock(mutex_);
std::vector<const language::symbol::SymbolTable*> result;
result.reserve(workspace_symbols_.size());
for (const auto& [uri, entry] : workspace_symbols_)
{
(void)uri;
result.push_back(entry.symbol_table.get());
}
return result;
}
std::vector<const language::symbol::SymbolTable*> Symbol::GetSystemSymbolTables() const
{
std::shared_lock<std::shared_mutex> lock(mutex_);
std::vector<const language::symbol::SymbolTable*> result;
result.reserve(system_symbols_.size());
for (const auto& [uri, entry] : system_symbols_)
{
(void)uri;
result.push_back(entry.symbol_table.get());
}
return result;
}
std::vector<Symbol::IndexedSymbol> Symbol::QueryIndexedSymbols(protocol::SymbolKind kind, std::optional<SymbolSource> source) const
{
std::shared_lock<std::shared_mutex> lock(mutex_);
std::vector<IndexedSymbol> result;
for (const auto& [_, symbols] : index_by_name_)
{
for (const auto& item : symbols)
{
if (item.kind != kind)
continue;
if (source.has_value() && item.source != *source)
continue;
result.push_back(item);
}
}
return result;
}
void Symbol::OnDocumentParsed(const events::DocumentParsed& event)
{
if (!event.tree)
{
spdlog::warn("Received null tree for document: {}", event.item.uri);
return;
}
try
{
DocumentAnalysis analysis;
analysis.uri = event.item.uri;
analysis.version = event.item.version;
analysis.deserializer = std::make_unique<language::ast::Deserializer>();
auto ast_result = analysis.deserializer->Parse(
ts_tree_root_node(event.tree),
event.item.text);
if (!ast_result.IsSuccess())
{
spdlog::error("Failed to deserialize AST for: {}", event.item.uri);
return;
}
analysis.ast = std::move(ast_result.root);
analysis.symbol_table = std::make_unique<language::symbol::SymbolTable>();
language::symbol::Builder builder(*analysis.symbol_table);
builder.Build(*analysis.ast);
analysis.semantic_model = std::make_unique<language::semantic::SemanticModel>(*analysis.symbol_table);
{
language::semantic::Analyzer analyzer(*analysis.symbol_table, *analysis.semantic_model);
analyzer.Analyze(*analysis.ast);
}
{
std::unique_lock<std::shared_mutex> lock(mutex_);
editing_symbols_[event.item.uri] = std::move(analysis);
RebuildIndex();
}
spdlog::debug("Document parsed and symbols built: {}", event.item.uri);
}
catch (const std::exception& e)
{
spdlog::error("Exception building symbols for {}: {}",
event.item.uri,
e.what());
}
}
void Symbol::OnDocumentReparsed(const events::DocumentReparsed& event)
{
OnDocumentParsed(events::DocumentParsed{
.item = event.item,
.tree = event.tree });
}
void Symbol::OnDocumentClosed(const events::DocumentClosed& event)
{
std::unique_lock<std::shared_mutex> lock(mutex_);
editing_symbols_.erase(event.textDocument.uri);
RebuildIndex();
spdlog::debug("Document closed and symbols removed: {}",
event.textDocument.uri);
}
void Symbol::RebuildIndex()
{
index_by_name_.clear();
auto add_container = [this](const auto& container, SymbolSource source) {
for (const auto& [uri, entry] : container)
{
if (entry.symbol_table)
AddTableToIndex(*entry.symbol_table, uri, source);
}
};
add_container(system_symbols_, SymbolSource::kSystem);
add_container(workspace_symbols_, SymbolSource::kWorkspace);
for (const auto& [uri, analysis] : editing_symbols_)
{
if (analysis.symbol_table)
AddTableToIndex(*analysis.symbol_table, uri, SymbolSource::kEditing);
}
}
bool Symbol::IsTopLevelSymbol(const language::symbol::SymbolTable& table, language::symbol::SymbolId id) const
{
const auto& scopes = table.scopes().all_scopes();
auto global_id = table.scopes().global_scope();
auto it = scopes.find(global_id);
if (it == scopes.end())
return false;
const auto& symbols = it->second.symbols;
for (const auto& [_, ids] : symbols)
{
if (std::find(ids.begin(), ids.end(), id) != ids.end())
return true;
}
return false;
}
void Symbol::AddTableToIndex(const language::symbol::SymbolTable& table, const protocol::DocumentUri& uri, SymbolSource source)
{
for (const auto& wrapper : table.all_definitions())
{
const auto& symbol = wrapper.get();
if (symbol.kind() != protocol::SymbolKind::Function &&
symbol.kind() != protocol::SymbolKind::Class &&
symbol.kind() != protocol::SymbolKind::Module)
{
continue;
}
if (!IsTopLevelSymbol(table, symbol.id()))
continue;
IndexedSymbol item{
.uri = uri,
.name = symbol.name(),
.kind = symbol.kind(),
.source = source,
.id = symbol.id()
};
auto key = utils::ToLower(symbol.name());
index_by_name_[key].push_back(std::move(item));
}
}
}