module; export module lsp.manager.symbol; import tree_sitter; import spdlog; import std; import lsp.protocol; import lsp.manager.event_bus; import lsp.manager.events; import lsp.language.ast; import lsp.language.symbol; import lsp.language.semantic; import lsp.utils.string; export namespace lsp::manager { class Symbol { public: enum class SymbolSource { kEditing, kWorkspace, kSystem }; struct IndexedSymbol { protocol::DocumentUri uri; std::string name; protocol::SymbolKind kind; SymbolSource source; language::symbol::SymbolId id; }; explicit Symbol(EventBus& event_bus); ~Symbol(); void LoadSystemLibrary(const std::string& lib_path); void LoadWorkspace(const protocol::DocumentUri& workspace_uri); void IndexWorkspaceFiles(const std::vector& uris); void RemoveWorkspaceFiles(const std::vector& uris); void RenameWorkspaceFiles(const std::vector>& files); const language::symbol::SymbolTable* GetSymbolTable(const protocol::DocumentUri& uri) const; const language::semantic::SemanticModel* GetSemanticModel(const protocol::DocumentUri& uri) const; std::vector GetWorkspaceSymbolTables() const; std::vector GetSystemSymbolTables() const; std::vector QueryIndexedSymbols(protocol::SymbolKind kind, std::optional source = std::nullopt) const; private: void OnDocumentParsed(const events::DocumentParsed& event); void OnDocumentReparsed(const events::DocumentReparsed& event); void OnDocumentClosed(const events::DocumentClosed& event); struct DocumentAnalysis { protocol::DocumentUri uri; protocol::integer version; std::unique_ptr deserializer; std::unique_ptr ast; std::unique_ptr symbol_table; std::unique_ptr semantic_model; }; struct StoredSymbolEntry { std::unique_ptr symbol_table; std::unique_ptr semantic_model; }; void RebuildIndex(); void AddTableToIndex(const language::symbol::SymbolTable& table, const protocol::DocumentUri& uri, SymbolSource source); bool IsTopLevelSymbol(const language::symbol::SymbolTable& table, language::symbol::SymbolId id) const; std::unordered_map system_symbols_; std::unordered_map workspace_symbols_; std::unordered_map editing_symbols_; std::unordered_map, utils::IHasher, utils::IEqualTo> index_by_name_; EventBus& event_bus_; mutable std::shared_mutex mutex_; }; } namespace lsp::manager { extern "C" const TSLanguage* tree_sitter_tsf(void); namespace { std::string PathToUri(const std::filesystem::path& path) { auto absolute = std::filesystem::absolute(path).generic_string(); #ifdef _WIN32 std::replace(absolute.begin(), absolute.end(), '\\', '/'); #endif if (!absolute.starts_with("/")) absolute = "/" + absolute; return "file://" + absolute; } std::string UriToPath(const std::string& uri) { std::string path = uri; if (path.starts_with("file://")) path = path.substr(7); #ifdef _WIN32 if (!path.empty() && path[0] == '/') path = path.substr(1); std::replace(path.begin(), path.end(), '/', '\\'); #endif // Percent-decoding std::string decoded; decoded.reserve(path.size()); for (size_t i = 0; i < path.size(); ++i) { if (path[i] == '%' && i + 2 < path.size()) { std::string hex = path.substr(i + 1, 2); char ch = static_cast(std::stoi(hex, nullptr, 16)); decoded.push_back(ch); i += 2; } else if (path[i] == '+') { decoded.push_back(' '); } else { decoded.push_back(path[i]); } } return decoded; } enum class TslFileKind { kLibraryTsf, kScriptTsl, kOther, }; TslFileKind GetTslFileKind(const std::filesystem::path& path) { if (!path.has_extension()) { return TslFileKind::kOther; } std::string ext = path.extension().string(); std::transform( ext.begin(), ext.end(), ext.begin(), [](unsigned char ch) { return static_cast(std::tolower(ch)); }); if (ext == ".tsf") { return TslFileKind::kLibraryTsf; } if (ext == ".tsl") { return TslFileKind::kScriptTsl; } return TslFileKind::kOther; } std::unique_ptr BuildSymbolTableFromFile( const std::filesystem::path& file_path) { if (GetTslFileKind(file_path) == TslFileKind::kOther) return nullptr; std::ifstream file(file_path, std::ios::binary); if (!file.is_open()) { spdlog::warn("Failed to open symbol file: {}", file_path.string()); return nullptr; } std::string content((std::istreambuf_iterator(file)), std::istreambuf_iterator()); auto parser_deleter = [](TSParser* parser) { if (parser) ts_parser_delete(parser); }; std::unique_ptr parser(ts_parser_new(), parser_deleter); if (!parser || !ts_parser_set_language(parser.get(), tree_sitter_tsf())) { spdlog::error("Failed to create tree-sitter parser for file: {}", file_path.string()); return nullptr; } TSTree* tree_handle = ts_parser_parse_string(parser.get(), nullptr, content.c_str(), content.length()); if (!tree_handle) { spdlog::warn("tree-sitter failed to parse file: {}", file_path.string()); return nullptr; } auto tree_deleter = [](TSTree* tree) { if (tree) ts_tree_delete(tree); }; std::unique_ptr tree(tree_handle, tree_deleter); language::ast::Deserializer deserializer; auto ast_result = deserializer.Parse(ts_tree_root_node(tree.get()), content); if (!ast_result.root) { spdlog::warn("Failed to deserialize AST for file: {}", file_path.string()); return nullptr; } auto symbol_table = std::make_unique(); try { language::symbol::Builder builder(*symbol_table); builder.Build(*ast_result.root); } catch (const std::exception& e) { spdlog::error("Exception building symbol table for {}: {}", file_path.string(), e.what()); return nullptr; } return symbol_table; } bool HasMatchingTopLevelSymbol(const language::symbol::SymbolTable& table, const std::string& stem) { for (const auto& wrapper : table.all_definitions()) { const auto& symbol = wrapper.get(); switch (symbol.kind()) { case protocol::SymbolKind::Function: case protocol::SymbolKind::Class: case protocol::SymbolKind::Module: if (utils::IEquals(symbol.name(), stem)) return true; break; default: break; } } return false; } std::string DescribeTopLevelSymbols(const language::symbol::SymbolTable& table) { std::vector parts; for (const auto& wrapper : table.all_definitions()) { const auto& symbol = wrapper.get(); switch (symbol.kind()) { case protocol::SymbolKind::Function: parts.push_back("function:" + symbol.name()); break; case protocol::SymbolKind::Class: parts.push_back("class:" + symbol.name()); break; case protocol::SymbolKind::Module: parts.push_back("unit:" + symbol.name()); break; default: break; } } if (parts.empty()) { return ""; } std::string result; for (std::size_t i = 0; i < parts.size(); ++i) { if (i > 0) { result += ", "; } result += parts[i]; } return result; } } Symbol::Symbol(EventBus& event_bus) : event_bus_(event_bus) { event_bus_.Subscribe( [this](const auto& e) { OnDocumentParsed(e); }); event_bus_.Subscribe( [this](const auto& e) { OnDocumentReparsed(e); }); event_bus_.Subscribe( [this](const auto& e) { OnDocumentClosed(e); }); } Symbol::~Symbol() = default; void Symbol::LoadSystemLibrary(const std::string& lib_path) { spdlog::info("Loading system library from: {}", lib_path); auto start = std::chrono::steady_clock::now(); if (!std::filesystem::exists(lib_path)) { spdlog::warn("System library path does not exist: {}", lib_path); return; } size_t loaded = 0; size_t failed = 0; std::unordered_map new_symbols; auto options = std::filesystem::directory_options::follow_directory_symlink | std::filesystem::directory_options::skip_permission_denied; for (const auto& entry : std::filesystem::recursive_directory_iterator(lib_path, options)) { if (!entry.is_regular_file()) continue; // System library only accepts `.tsf` as a library unit. `.tsl` is a script and should be ignored here. if (GetTslFileKind(entry.path()) != TslFileKind::kLibraryTsf) { continue; } spdlog::trace("Indexing library file: {}", entry.path().string()); auto table = BuildSymbolTableFromFile(entry.path()); if (!table) { spdlog::trace("Failed to build symbol table for: {}", entry.path().string()); ++failed; continue; } auto stem = entry.path().stem().string(); if (!HasMatchingTopLevelSymbol(*table, stem)) { spdlog::debug("Indexing system file {} with unmatched top-level symbol (stem='{}', top-level={})", entry.path().string(), stem, DescribeTopLevelSymbols(*table)); } StoredSymbolEntry stored; stored.symbol_table = std::move(table); stored.semantic_model = std::make_unique(*stored.symbol_table); new_symbols[PathToUri(entry.path())] = std::move(stored); ++loaded; } { std::unique_lock lock(mutex_); system_symbols_ = std::move(new_symbols); RebuildIndex(); } auto duration = std::chrono::duration_cast( std::chrono::steady_clock::now() - start) .count(); spdlog::info("System library loaded: {} files, {} failed, {}ms", loaded, failed, duration); } void Symbol::LoadWorkspace(const protocol::DocumentUri& workspace_uri) { auto workspace_path = UriToPath(workspace_uri); spdlog::info("Loading workspace from: {}", workspace_path); auto start = std::chrono::steady_clock::now(); if (!std::filesystem::exists(workspace_path)) { spdlog::warn("Workspace path does not exist: {}", workspace_path); return; } size_t loaded = 0; size_t failed = 0; std::unordered_map new_symbols; auto options = std::filesystem::directory_options::follow_directory_symlink | std::filesystem::directory_options::skip_permission_denied; for (const auto& entry : std::filesystem::recursive_directory_iterator(workspace_path, options)) { if (!entry.is_regular_file()) continue; auto kind = GetTslFileKind(entry.path()); if (kind == TslFileKind::kOther) continue; auto table = BuildSymbolTableFromFile(entry.path()); if (!table) { ++failed; continue; } auto stem = entry.path().stem().string(); // Only `.tsf` is a library unit that must match the file name. // `.tsl` is a script and should not be forced to have a top-level symbol. if (kind == TslFileKind::kLibraryTsf && !HasMatchingTopLevelSymbol(*table, stem)) { spdlog::warn("Skipping workspace file {}: top-level symbol does not match file name", entry.path().string()); ++failed; continue; } StoredSymbolEntry stored; stored.symbol_table = std::move(table); stored.semantic_model = std::make_unique(*stored.symbol_table); new_symbols[PathToUri(entry.path())] = std::move(stored); ++loaded; } { std::unique_lock lock(mutex_); workspace_symbols_ = std::move(new_symbols); RebuildIndex(); } auto duration = std::chrono::duration_cast( std::chrono::steady_clock::now() - start) .count(); spdlog::info("Workspace loaded: {} files, {} failed, {}ms", loaded, failed, duration); } void Symbol::IndexWorkspaceFiles(const std::vector& uris) { std::unordered_map updates; std::vector removals; updates.reserve(uris.size()); removals.reserve(uris.size()); for (const auto& uri : uris) { auto file_path = std::filesystem::path(UriToPath(uri)); auto kind = GetTslFileKind(file_path); if (kind == TslFileKind::kOther) { continue; } auto normalized_uri = PathToUri(file_path); if (!std::filesystem::exists(file_path)) { removals.push_back(std::move(normalized_uri)); continue; } auto table = BuildSymbolTableFromFile(file_path); if (!table) { removals.push_back(std::move(normalized_uri)); continue; } auto stem = file_path.stem().string(); if (kind == TslFileKind::kLibraryTsf && !HasMatchingTopLevelSymbol(*table, stem)) { spdlog::warn("Skipping workspace file {}: top-level symbol does not match file name", file_path.string()); removals.push_back(std::move(normalized_uri)); continue; } StoredSymbolEntry stored; stored.symbol_table = std::move(table); stored.semantic_model = std::make_unique(*stored.symbol_table); updates[normalized_uri] = std::move(stored); } if (updates.empty() && removals.empty()) { return; } { std::unique_lock lock(mutex_); for (auto& [uri, entry] : updates) { workspace_symbols_[uri] = std::move(entry); } for (auto& uri : removals) { workspace_symbols_.erase(uri); } RebuildIndex(); } } void Symbol::RemoveWorkspaceFiles(const std::vector& uris) { if (uris.empty()) { return; } std::vector removals; removals.reserve(uris.size()); for (const auto& uri : uris) { auto file_path = std::filesystem::path(UriToPath(uri)); auto kind = GetTslFileKind(file_path); if (kind == TslFileKind::kOther) { continue; } removals.push_back(PathToUri(file_path)); } if (removals.empty()) { return; } { std::unique_lock lock(mutex_); for (auto& uri : removals) { workspace_symbols_.erase(uri); } RebuildIndex(); } } void Symbol::RenameWorkspaceFiles(const std::vector>& files) { if (files.empty()) { return; } std::unordered_map updates; std::vector removals; updates.reserve(files.size()); removals.reserve(files.size()); for (const auto& [old_uri, new_uri] : files) { auto old_path = std::filesystem::path(UriToPath(old_uri)); if (GetTslFileKind(old_path) != TslFileKind::kOther) { removals.push_back(PathToUri(old_path)); } auto new_path = std::filesystem::path(UriToPath(new_uri)); auto kind = GetTslFileKind(new_path); if (kind == TslFileKind::kOther) { continue; } auto normalized_uri = PathToUri(new_path); if (!std::filesystem::exists(new_path)) { removals.push_back(std::move(normalized_uri)); continue; } auto table = BuildSymbolTableFromFile(new_path); if (!table) { removals.push_back(std::move(normalized_uri)); continue; } auto stem = new_path.stem().string(); if (kind == TslFileKind::kLibraryTsf && !HasMatchingTopLevelSymbol(*table, stem)) { spdlog::warn("Skipping workspace file {}: top-level symbol does not match file name", new_path.string()); removals.push_back(std::move(normalized_uri)); continue; } StoredSymbolEntry stored; stored.symbol_table = std::move(table); stored.semantic_model = std::make_unique(*stored.symbol_table); updates[normalized_uri] = std::move(stored); } if (updates.empty() && removals.empty()) { return; } { std::unique_lock lock(mutex_); for (auto& uri : removals) { workspace_symbols_.erase(uri); } for (auto& [uri, entry] : updates) { workspace_symbols_[uri] = std::move(entry); } RebuildIndex(); } } const language::symbol::SymbolTable* Symbol::GetSymbolTable( const protocol::DocumentUri& uri) const { std::shared_lock lock(mutex_); if (auto it = editing_symbols_.find(uri); it != editing_symbols_.end()) { return it->second.symbol_table.get(); } if (auto it = workspace_symbols_.find(uri); it != workspace_symbols_.end()) { return it->second.symbol_table.get(); } if (auto it = system_symbols_.find(uri); it != system_symbols_.end()) { return it->second.symbol_table.get(); } return nullptr; } const language::semantic::SemanticModel* Symbol::GetSemanticModel( const protocol::DocumentUri& uri) const { std::shared_lock lock(mutex_); if (auto it = editing_symbols_.find(uri); it != editing_symbols_.end()) { return it->second.semantic_model.get(); } if (auto it = workspace_symbols_.find(uri); it != workspace_symbols_.end()) { return it->second.semantic_model.get(); } if (auto it = system_symbols_.find(uri); it != system_symbols_.end()) { return it->second.semantic_model.get(); } return nullptr; } std::vector Symbol::GetWorkspaceSymbolTables() const { std::shared_lock lock(mutex_); std::vector result; result.reserve(workspace_symbols_.size()); for (const auto& [uri, entry] : workspace_symbols_) { (void)uri; result.push_back(entry.symbol_table.get()); } return result; } std::vector Symbol::GetSystemSymbolTables() const { std::shared_lock lock(mutex_); std::vector result; result.reserve(system_symbols_.size()); for (const auto& [uri, entry] : system_symbols_) { (void)uri; result.push_back(entry.symbol_table.get()); } return result; } std::vector Symbol::QueryIndexedSymbols(protocol::SymbolKind kind, std::optional source) const { std::shared_lock lock(mutex_); std::vector result; for (const auto& [_, symbols] : index_by_name_) { for (const auto& item : symbols) { if (item.kind != kind) continue; if (source.has_value() && item.source != *source) continue; result.push_back(item); } } return result; } void Symbol::OnDocumentParsed(const events::DocumentParsed& event) { if (!event.tree) { spdlog::warn("Received null tree for document: {}", event.item.uri); return; } try { DocumentAnalysis analysis; analysis.uri = event.item.uri; analysis.version = event.item.version; analysis.deserializer = std::make_unique(); auto ast_result = analysis.deserializer->Parse( ts_tree_root_node(event.tree), event.item.text); if (!ast_result.IsSuccess()) { spdlog::error("Failed to deserialize AST for: {}", event.item.uri); return; } analysis.ast = std::move(ast_result.root); analysis.symbol_table = std::make_unique(); language::symbol::Builder builder(*analysis.symbol_table); builder.Build(*analysis.ast); analysis.semantic_model = std::make_unique(*analysis.symbol_table); { language::semantic::Analyzer analyzer(*analysis.symbol_table, *analysis.semantic_model); analyzer.Analyze(*analysis.ast); } { std::unique_lock lock(mutex_); editing_symbols_[event.item.uri] = std::move(analysis); RebuildIndex(); } spdlog::debug("Document parsed and symbols built: {}", event.item.uri); } catch (const std::exception& e) { spdlog::error("Exception building symbols for {}: {}", event.item.uri, e.what()); } } void Symbol::OnDocumentReparsed(const events::DocumentReparsed& event) { OnDocumentParsed(events::DocumentParsed{ .item = event.item, .tree = event.tree }); } void Symbol::OnDocumentClosed(const events::DocumentClosed& event) { std::unique_lock lock(mutex_); editing_symbols_.erase(event.textDocument.uri); RebuildIndex(); spdlog::debug("Document closed and symbols removed: {}", event.textDocument.uri); } void Symbol::RebuildIndex() { index_by_name_.clear(); auto add_container = [this](const auto& container, SymbolSource source) { for (const auto& [uri, entry] : container) { if (entry.symbol_table) AddTableToIndex(*entry.symbol_table, uri, source); } }; add_container(system_symbols_, SymbolSource::kSystem); add_container(workspace_symbols_, SymbolSource::kWorkspace); for (const auto& [uri, analysis] : editing_symbols_) { if (analysis.symbol_table) AddTableToIndex(*analysis.symbol_table, uri, SymbolSource::kEditing); } } bool Symbol::IsTopLevelSymbol(const language::symbol::SymbolTable& table, language::symbol::SymbolId id) const { const auto& scopes = table.scopes().all_scopes(); auto global_id = table.scopes().global_scope(); auto it = scopes.find(global_id); if (it == scopes.end()) return false; const auto& symbols = it->second.symbols; for (const auto& [_, ids] : symbols) { if (std::find(ids.begin(), ids.end(), id) != ids.end()) return true; } return false; } void Symbol::AddTableToIndex(const language::symbol::SymbolTable& table, const protocol::DocumentUri& uri, SymbolSource source) { for (const auto& wrapper : table.all_definitions()) { const auto& symbol = wrapper.get(); if (symbol.kind() != protocol::SymbolKind::Function && symbol.kind() != protocol::SymbolKind::Class && symbol.kind() != protocol::SymbolKind::Module) { continue; } if (!IsTopLevelSymbol(table, symbol.id())) continue; IndexedSymbol item{ .uri = uri, .name = symbol.name(), .kind = symbol.kind(), .source = source, .id = symbol.id() }; auto key = utils::ToLower(symbol.name()); index_by_name_[key].push_back(std::move(item)); } } }