diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..87043b8 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,45 @@ +cmake_minimum_required(VERSION 3.15) +project(sherpa_server LANGUAGES CXX) + +# 64-bit helper. Must match sherpa-onnx DLL bitness (shipped x64). Runs as a +# separate process from the 32-bit FNV plugin, so mismatched bitness is fine. +if(MSVC AND NOT DEFINED CMAKE_GENERATOR_PLATFORM) + message(FATAL_ERROR + "Configure with -A x64 — sherpa_server must be 64-bit " + "to match the shipped sherpa-onnx DLLs.") +endif() + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +get_filename_component(_default_sherpa_dir + "${CMAKE_CURRENT_LIST_DIR}/../../third-party/sherpa-onnx" ABSOLUTE) +set(SHERPA_DIR + "${_default_sherpa_dir}" + CACHE PATH "Path to the sherpa-onnx SDK (must have include/ and lib/)") + +if(NOT EXISTS "${SHERPA_DIR}/include/sherpa-onnx/c-api/c-api.h") + message(FATAL_ERROR + "sherpa-onnx headers not found under ${SHERPA_DIR}. " + "Run 'bash mod/install-sherpa-onnx.sh' to stage the SDK, or " + "point -DSHERPA_DIR at the folder containing include/ and lib/.") +endif() + +add_executable(sherpa_server main.cpp tray_icon_win32.cpp) + +target_include_directories(sherpa_server PRIVATE "${SHERPA_DIR}/include") +target_link_directories(sherpa_server PRIVATE "${SHERPA_DIR}/lib") +target_link_libraries(sherpa_server PRIVATE sherpa-onnx-c-api) + +if(MSVC) + target_compile_options(sherpa_server PRIVATE /O2 /W3 /D_CRT_SECURE_NO_WARNINGS) + set_property(TARGET sherpa_server PROPERTY + MSVC_RUNTIME_LIBRARY "MultiThreaded") + set_target_properties(sherpa_server PROPERTIES + LINK_FLAGS "/SUBSYSTEM:WINDOWS /ENTRY:mainCRTStartup") +endif() + +set_target_properties(sherpa_server PROPERTIES + OUTPUT_NAME "sherpa_server" + SUFFIX ".exe" +) diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..3ace91b --- /dev/null +++ b/main.cpp @@ -0,0 +1,578 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define WIN32_LEAN_AND_MEAN +#include + +#include "tray_icon_win32.hpp" + +// --------------------------------------------------------------------------- +// CLI parsing — tiny, just enough for the flags we need +// --------------------------------------------------------------------------- +static const char* FindFlag(int argc, char** argv, const char* name) { + // Matches --name=value or --name value + size_t nameLen = strlen(name); + for (int i = 1; i < argc; i++) { + const char* a = argv[i]; + if (strncmp(a, name, nameLen) != 0) continue; + if (a[nameLen] == '=') return a + nameLen + 1; + if (a[nameLen] == '\0' && i + 1 < argc) return argv[i + 1]; + } + return nullptr; +} + +static const char* FlagOrEmpty(int argc, char** argv, const char* name) { + const char* v = FindFlag(argc, argv, name); + return v ? v : ""; +} + +static int FlagInt(int argc, char** argv, const char* name, int def) { + const char* v = FindFlag(argc, argv, name); + return v ? atoi(v) : def; +} + +static float FlagFloat(int argc, char** argv, const char* name, float def) { + const char* v = FindFlag(argc, argv, name); + return v ? (float)atof(v) : def; +} + +// --------------------------------------------------------------------------- +// voice_presets.ini [Model.] loader +// --------------------------------------------------------------------------- +struct ModelDef { + std::string key; // section-name suffix + std::string type; // "kokoro" / "kitten" / "vits" / "matcha" / ... + // Field bag — name -> value. Owned strings stay alive for the + // server's lifetime so SherpaOnnx* configs can keep raw pointers. + std::unordered_map fields; + + const char* GetCStr(const char* name) const { + auto it = fields.find(name); + return (it != fields.end() && !it->second.empty()) ? it->second.c_str() : ""; + } + float GetFloat(const char* name, float def) const { + auto it = fields.find(name); + if (it == fields.end() || it->second.empty()) return def; + return (float)atof(it->second.c_str()); + } +}; + +// Trim leading/trailing ASCII whitespace + an inline " ;" comment tail +// from a string in-place. +static void TrimAndStripComment(std::string& s) { + // Strip ";" tail + for (size_t i = 0; i + 1 < s.size(); i++) { + if ((s[i] == ' ' || s[i] == '\t') && s[i + 1] == ';') { + s.resize(i); + break; + } + } + while (!s.empty() && (s.back() == ' ' || s.back() == '\t' || + s.back() == '\r' || s.back() == '\n')) { + s.pop_back(); + } + size_t lead = 0; + while (lead < s.size() && (s[lead] == ' ' || s[lead] == '\t')) lead++; + if (lead > 0) s.erase(0, lead); +} + +// Walk a GetPrivateProfileSectionA payload ("k=v\0k=v\0...\0\0") and call +// visit(key, value) for each entry. +template +static void ForEachIniEntry(const char* buf, Fn&& visit) { + for (const char* p = buf; *p; p += strlen(p) + 1) { + const char* eq = strchr(p, '='); + if (!eq || eq == p) continue; + std::string key(p, eq - p); + std::string val(eq + 1); + TrimAndStripComment(key); + TrimAndStripComment(val); + if (key.empty()) continue; + visit(key, val); + } +} + +static ModelDef* FindModelDefCi(std::vector& defs, + const std::string& key) { + for (auto& d : defs) { + if (_stricmp(d.key.c_str(), key.c_str()) == 0) return &d; + } + return nullptr; +} +static const ModelDef* FindModelDefCi(const std::vector& defs, + const std::string& key) { + for (const auto& d : defs) { + if (_stricmp(d.key.c_str(), key.c_str()) == 0) return &d; + } + return nullptr; +} + +// Discover every [Model.] section in one INI and merge into `out`. +// Last-wins on duplicate keys (case-insensitive) — a later file's +// redefinition replaces the existing entry in place, mirroring the +// plugin-side loader. +static void LoadModelDefsFromFile(const char* iniPath, + std::vector& out) { + static char nameBuf[8192]; + DWORD n = GetPrivateProfileSectionNamesA(nameBuf, sizeof(nameBuf), iniPath); + if (n == 0) return; + if (n >= sizeof(nameBuf) - 2) { + fprintf(stderr, "sherpa_server: warning — section-name list in %s " + "exceeded %zu bytes, some sections may be missed\n", + iniPath, sizeof(nameBuf)); + fflush(stderr); + } + + static char sectionBuf[16384]; + for (const char* sect = nameBuf; *sect; sect += strlen(sect) + 1) { + if (_strnicmp(sect, "Model.", 6) != 0) continue; + const char* keyPart = sect + 6; + if (!*keyPart) { + fprintf(stderr, "sherpa_server: [%s] in %s missing model-key suffix — skipping\n", + sect, iniPath); + fflush(stderr); + continue; + } + DWORD len = GetPrivateProfileSectionA(sect, sectionBuf, sizeof(sectionBuf), iniPath); + if (len == 0) { + fprintf(stderr, "sherpa_server: [%s] in %s is empty — skipping\n", + sect, iniPath); + fflush(stderr); + continue; + } + ModelDef def; + def.key = keyPart; + ForEachIniEntry(sectionBuf, [&](const std::string& k, const std::string& v) { + if (_stricmp(k.c_str(), "type") == 0) def.type = v; + else def.fields[k] = v; + }); + if (def.type.empty()) { + fprintf(stderr, "sherpa_server: [%s] in %s missing type= — skipping\n", + sect, iniPath); + fflush(stderr); + continue; + } + if (ModelDef* existing = FindModelDefCi(out, def.key)) { + fprintf(stderr, "sherpa_server: [%s] in %s — overriding previous definition\n", + sect, iniPath); + fflush(stderr); + *existing = std::move(def); + } else { + fprintf(stderr, "sherpa_server: registered model '%s' (type=%s, %zu fields) from %s\n", + def.key.c_str(), def.type.c_str(), def.fields.size(), iniPath); + fflush(stderr); + out.push_back(std::move(def)); + } + } +} + +// Enumerate every *.ini under `dirPath` (alphabetical, case-insensitive) +// and merge their [Model.*] sections into `out`. Last-wins on duplicates. +static int LoadModelDefsFromDir(const char* dirPath, + std::vector& out) { + char pattern[MAX_PATH]; + int wrote = snprintf(pattern, sizeof(pattern), "%s\\*.ini", dirPath); + if (wrote < 0 || wrote >= (int)sizeof(pattern)) { + fprintf(stderr, "sherpa_server: voice_presets path too long: %s\n", dirPath); + fflush(stderr); + return 0; + } + + std::vector names; + WIN32_FIND_DATAA fd; + HANDLE h = FindFirstFileA(pattern, &fd); + if (h == INVALID_HANDLE_VALUE) { + fprintf(stderr, "sherpa_server: no *.ini files in %s\n", dirPath); + fflush(stderr); + return 0; + } + do { + if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) continue; + names.push_back(fd.cFileName); + } while (FindNextFileA(h, &fd)); + FindClose(h); + + std::sort(names.begin(), names.end(), + [](const std::string& a, const std::string& b) { + return _stricmp(a.c_str(), b.c_str()) < 0; + }); + + fprintf(stderr, "sherpa_server: loading %zu voice_presets file(s) from %s\n", + names.size(), dirPath); + fflush(stderr); + for (const auto& nm : names) { + char full[MAX_PATH]; + snprintf(full, sizeof(full), "%s\\%s", dirPath, nm.c_str()); + LoadModelDefsFromFile(full, out); + } + return (int)out.size(); +} + +// --------------------------------------------------------------------------- +// Engine creation per family +// --------------------------------------------------------------------------- +struct EngineGlobals { + const char* provider; + int numThreads; + int debug; + int maxNumSentences; + float silenceScale; +}; + +// Preflight the non-CPU provider DLL so we can fall back to CPU without +// onnxruntime's CUDA init aborting the whole process. Returns the +// effective provider string the caller should use. +static const char* PreflightProvider(const char* provider) { + const char* providerDll = nullptr; + if (_stricmp(provider, "cuda") == 0) providerDll = "onnxruntime_providers_cuda.dll"; + else if (_stricmp(provider, "tensorrt") == 0) providerDll = "onnxruntime_providers_tensorrt.dll"; + if (!providerDll) return provider; + + HMODULE h = LoadLibraryA(providerDll); + if (!h) { + DWORD err = GetLastError(); + fprintf(stderr, + "sherpa_server: cannot load %s (err=%lu) — provider=%s " + "unavailable on this machine. Falling back to CPU.\n", + providerDll, err, provider); + fflush(stderr); + return "cpu"; + } + FreeLibrary(h); + return provider; +} + +// Build a SherpaOnnxOfflineTtsConfig populated for one ModelDef and try +// to create the engine. Falls back from non-CPU providers to CPU on +// failure. Returns nullptr on hard failure; writes the loaded engine's +// sample rate / speaker count to *outSampleRate / *outNumSpeakers on +// success. +static const SherpaOnnxOfflineTts* +CreateEngineForDef(const ModelDef& def, const EngineGlobals& g, + int* outSampleRate, int* outNumSpeakers) { + SherpaOnnxOfflineTtsConfig cfg = {}; + cfg.model.num_threads = g.numThreads; + cfg.model.debug = g.debug; + cfg.model.provider = PreflightProvider(g.provider); + cfg.max_num_sentences = g.maxNumSentences; + cfg.silence_scale = g.silenceScale; + + const char* t = def.type.c_str(); + if (_stricmp(t, "kokoro") == 0) { + cfg.model.kokoro.model = def.GetCStr("model"); + cfg.model.kokoro.voices = def.GetCStr("voices"); + cfg.model.kokoro.tokens = def.GetCStr("tokens"); + cfg.model.kokoro.data_dir = def.GetCStr("data_dir"); + cfg.model.kokoro.lexicon = def.GetCStr("lexicon"); + cfg.model.kokoro.lang = def.GetCStr("lang"); + cfg.model.kokoro.length_scale = def.GetFloat("length_scale", 1.0f); + } else if (_stricmp(t, "kitten") == 0) { + cfg.model.kitten.model = def.GetCStr("model"); + cfg.model.kitten.voices = def.GetCStr("voices"); + cfg.model.kitten.tokens = def.GetCStr("tokens"); + cfg.model.kitten.data_dir = def.GetCStr("data_dir"); + cfg.model.kitten.length_scale = def.GetFloat("length_scale", 1.0f); + } else if (_stricmp(t, "vits") == 0) { + cfg.model.vits.model = def.GetCStr("model"); + cfg.model.vits.lexicon = def.GetCStr("lexicon"); + cfg.model.vits.tokens = def.GetCStr("tokens"); + cfg.model.vits.data_dir = def.GetCStr("data_dir"); + cfg.model.vits.noise_scale = def.GetFloat("noise_scale", 0.667f); + cfg.model.vits.noise_scale_w = def.GetFloat("noise_scale_w", 0.8f); + cfg.model.vits.length_scale = def.GetFloat("length_scale", 1.0f); + } else if (_stricmp(t, "matcha") == 0) { + cfg.model.matcha.acoustic_model = def.GetCStr("acoustic_model"); + cfg.model.matcha.vocoder = def.GetCStr("vocoder"); + cfg.model.matcha.lexicon = def.GetCStr("lexicon"); + cfg.model.matcha.tokens = def.GetCStr("tokens"); + cfg.model.matcha.data_dir = def.GetCStr("data_dir"); + cfg.model.matcha.noise_scale = def.GetFloat("noise_scale", 1.0f); + cfg.model.matcha.length_scale = def.GetFloat("length_scale", 1.0f); + } else { + fprintf(stderr, "sherpa_server: model '%s' has unsupported type='%s' — " + "supported: kokoro, kitten, vits, matcha\n", + def.key.c_str(), t); + fflush(stderr); + return nullptr; + } + + fprintf(stderr, "sherpa_server: loading '%s' (type=%s, provider=%s, threads=%d)\n", + def.key.c_str(), t, cfg.model.provider, cfg.model.num_threads); + fflush(stderr); + + const SherpaOnnxOfflineTts* tts = SherpaOnnxCreateOfflineTts(&cfg); + if (!tts && strcmp(cfg.model.provider, "cpu") != 0) { + fprintf(stderr, "sherpa_server: provider=%s failed for '%s' — " + "falling back to CPU\n", + cfg.model.provider, def.key.c_str()); + fflush(stderr); + cfg.model.provider = "cpu"; + tts = SherpaOnnxCreateOfflineTts(&cfg); + } + if (!tts) { + fprintf(stderr, "sherpa_server: SherpaOnnxCreateOfflineTts failed for '%s'\n", + def.key.c_str()); + fflush(stderr); + return nullptr; + } + + *outSampleRate = SherpaOnnxOfflineTtsSampleRate(tts); + *outNumSpeakers = SherpaOnnxOfflineTtsNumSpeakers(tts); + fprintf(stderr, "sherpa_server: '%s' ready sr=%d speakers=%d\n", + def.key.c_str(), *outSampleRate, *outNumSpeakers); + fflush(stderr); + return tts; +} + +// --------------------------------------------------------------------------- +// WAV building — sherpa returns float [-1, 1] samples; we serialise to a +// mono 16-bit PCM RIFF file and hand raw bytes to the plugin. +// --------------------------------------------------------------------------- +static std::string BuildWav(const float* samples, int32_t n, int32_t sampleRate) { + const uint16_t channels = 1; + const uint16_t bitsPerSample = 16; + const uint16_t blockAlign = channels * (bitsPerSample / 8); + const uint32_t byteRate = (uint32_t)sampleRate * blockAlign; + const uint32_t dataSize = (uint32_t)n * blockAlign; + const uint32_t riffSize = 36 + dataSize; + const uint32_t fmtSize = 16; + const uint16_t audioFormat = 1; + + std::string wav; + wav.reserve(44 + dataSize); + + auto putBytes = [&](const void* p, size_t len) { + wav.append((const char*)p, len); + }; + auto putU32 = [&](uint32_t v) { putBytes(&v, 4); }; + auto putU16 = [&](uint16_t v) { putBytes(&v, 2); }; + + putBytes("RIFF", 4); putU32(riffSize); + putBytes("WAVE", 4); + putBytes("fmt ", 4); putU32(fmtSize); + putU16(audioFormat); putU16(channels); + putU32((uint32_t)sampleRate); putU32(byteRate); + putU16(blockAlign); putU16(bitsPerSample); + putBytes("data", 4); putU32(dataSize); + + wav.resize(wav.size() + dataSize); + int16_t* pcm = (int16_t*)(wav.data() + wav.size() - dataSize); + for (int32_t i = 0; i < n; i++) { + float v = samples[i] * 32767.0f; + if (v > 32767.0f) pcm[i] = 32767; + else if (v < -32768.0f) pcm[i] = -32768; + else pcm[i] = (int16_t)v; + } + return wav; +} + +static void WriteResponse(const char* wavBytes, int32_t wavLen) { + fwrite(&wavLen, 4, 1, stdout); + if (wavLen > 0 && wavBytes) fwrite(wavBytes, 1, (size_t)wavLen, stdout); + fflush(stdout); +} + +static void WriteFailure() { + int32_t zero = 0; + fwrite(&zero, 4, 1, stdout); + fflush(stdout); +} + +// --------------------------------------------------------------------------- +// Per-engine state +// --------------------------------------------------------------------------- +struct EngineState { + const SherpaOnnxOfflineTts* tts; + int sampleRate; + int numSpeakers; + bool loadFailed; // sticky: once a model fails to load, don't retry +}; + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- +int main(int argc, char** argv) { + // Binary mode on stdout so Windows doesn't mangle \n -> \r\n in PCM. + _setmode(_fileno(stdout), _O_BINARY); + + const char* presetsPath = FindFlag(argc, argv, "--voice-presets"); + if (!presetsPath || !*presetsPath) { + fprintf(stderr, "sherpa_server: --voice-presets is required\n"); + return 2; + } + DWORD presetsAttrs = GetFileAttributesA(presetsPath); + if (presetsAttrs == INVALID_FILE_ATTRIBUTES || + !(presetsAttrs & FILE_ATTRIBUTE_DIRECTORY)) { + fprintf(stderr, "sherpa_server: --voice-presets must name an existing " + "directory (got %s)\n", presetsPath); + return 2; + } + + EngineGlobals globals = {}; + { + const char* provider = FindFlag(argc, argv, "--provider"); + globals.provider = (provider && provider[0]) ? provider : "cpu"; + globals.numThreads = FlagInt(argc, argv, "--num-threads", 2); + globals.debug = FlagInt(argc, argv, "--debug", 0); + // Kokoro ignores max_num_sentences != 1 (it streams the full text + // through a single forward pass). Default 1 avoids a spurious warning. + globals.maxNumSentences = FlagInt(argc, argv, "--max-num-sentences", 1); + globals.silenceScale = FlagFloat(argc, argv, "--silence-scale", 0.2f); + } + float speed = FlagFloat(argc, argv, "--speed", 1.0f); + + fprintf(stderr, "sherpa_server: voice_presets=%s provider=%s threads=%d speed=%.2f\n", + presetsPath, globals.provider, globals.numThreads, speed); + fflush(stderr); + + std::vector modelDefs; + int modelCount = LoadModelDefsFromDir(presetsPath, modelDefs); + if (modelCount == 0) { + fprintf(stderr, "sherpa_server: voice_presets/ has no [Model.*] sections — " + "every request will fail until at least one model is declared\n"); + fflush(stderr); + } + + std::unordered_map engines; + + // Lazy-load an engine on first reference. Subsequent requests for the + // same key reuse the loaded handle. Failed loads are sticky — we + // don't retry on every line, just log once and reply failure. + auto GetEngine = [&](const std::string& key) -> EngineState* { + std::string lower = key; + for (auto& c : lower) c = (char)tolower((unsigned char)c); + + auto it = engines.find(lower); + if (it != engines.end()) { + return it->second.loadFailed ? nullptr : &it->second; + } + + EngineState& st = engines[lower]; + st.tts = nullptr; + st.sampleRate = 0; + st.numSpeakers = 0; + st.loadFailed = false; + + const ModelDef* def = FindModelDefCi(modelDefs, key); + if (!def) { + fprintf(stderr, "sherpa_server: model '%s' not declared in any voice_presets/*.ini\n", + key.c_str()); + fflush(stderr); + st.loadFailed = true; + return nullptr; + } + st.tts = CreateEngineForDef(*def, globals, &st.sampleRate, &st.numSpeakers); + if (!st.tts) { + st.loadFailed = true; + return nullptr; + } + return &st; + }; + + // Tray-icon indicator: lets the user see that sherpa_server.exe is + // alive in the background. The exe is WIN32 subsystem (no console + // window), so without this the only sign of life is the process in + // Task Manager. + { + char tip[160]; + snprintf(tip, sizeof(tip), + "Sherpa-onnx TTS (Numen) - %d model(s) registered", + modelCount); + sherpa::StartTrayIcon(tip); + } + + // Utterance loop. Read one request line, synthesise, emit WAV. + std::string line; + line.reserve(4096); + + while (true) { + int ch = fgetc(stdin); + if (ch == EOF) break; + if (ch == '\r') continue; + if (ch != '\n') { + line.push_back((char)ch); + // Guard runaway input; drop the rest of the line on overflow. + if (line.size() > 16 * 1024) { + while ((ch = fgetc(stdin)) != EOF && ch != '\n') {} + fprintf(stderr, "sherpa_server: dropped oversized request line\n"); + fflush(stderr); + line.clear(); + WriteFailure(); + } + continue; + } + + // Parse "\t\t". Two tabs minimum; the text + // is everything past the second tab and may itself contain + // anything (we don't strip). + size_t tab1 = line.find('\t'); + size_t tab2 = (tab1 == std::string::npos) ? std::string::npos + : line.find('\t', tab1 + 1); + if (tab1 == std::string::npos || tab2 == std::string::npos) { + fprintf(stderr, "sherpa_server: request missing tab separator(s): \"%.80s\"\n", + line.c_str()); + fflush(stderr); + WriteFailure(); + line.clear(); + continue; + } + + std::string modelKey(line.data(), tab1); + std::string sidStr(line.data() + tab1 + 1, tab2 - tab1 - 1); + const char* text = line.c_str() + tab2 + 1; + + int32_t sid = atoi(sidStr.c_str()); + + EngineState* eng = GetEngine(modelKey); + if (!eng) { + WriteFailure(); + line.clear(); + continue; + } + + SherpaOnnxGenerationConfig gcfg = {}; + gcfg.sid = sid; + gcfg.speed = speed; + + const SherpaOnnxGeneratedAudio* audio = + SherpaOnnxOfflineTtsGenerateWithConfig(eng->tts, text, &gcfg, nullptr, nullptr); + if (!audio || !audio->samples || audio->n <= 0) { + fprintf(stderr, "sherpa_server: synthesis failed (model=%s sid=%d, text=\"%.80s\")\n", + modelKey.c_str(), sid, text); + fflush(stderr); + if (audio) SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio); + WriteFailure(); + line.clear(); + continue; + } + + std::string wav = BuildWav(audio->samples, audio->n, audio->sample_rate); + fprintf(stderr, "sherpa_server: model=%s sid=%d samples=%d sr=%d bytes=%zu\n", + modelKey.c_str(), sid, audio->n, audio->sample_rate, wav.size()); + fflush(stderr); + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio); + + WriteResponse(wav.data(), (int32_t)wav.size()); + line.clear(); + } + + fprintf(stderr, "sherpa_server: stdin EOF, shutting down\n"); + fflush(stderr); + sherpa::StopTrayIcon(); + for (auto& kv : engines) { + if (kv.second.tts) SherpaOnnxDestroyOfflineTts(kv.second.tts); + } + return 0; +} diff --git a/tray_icon_win32.cpp b/tray_icon_win32.cpp new file mode 100644 index 0000000..1e00b12 --- /dev/null +++ b/tray_icon_win32.cpp @@ -0,0 +1,195 @@ +#ifndef _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include "tray_icon_win32.hpp" + +#ifdef _WIN32 + +#include +#include + +#include +#include +#include +#include + +#pragma comment(lib, "user32.lib") +#pragma comment(lib, "shell32.lib") + +namespace sherpa { + +namespace { + +// WM_APP-based callback ID for our Shell_NotifyIcon. Anything in [WM_APP, +// 0xBFFF] is safe to use for app-defined messages. +constexpr UINT WM_TRAYICON = WM_APP + 1; + +// Unique ID for our notification icon within the HWND. Arbitrary value. +constexpr UINT TRAY_ICON_UID = 0xC1A2; + +// Right-click menu command ids. +constexpr UINT IDM_TOOLTIP = 1; // disabled label echoing the tooltip +constexpr UINT IDM_EXIT = 2; // shut the helper down + +HANDLE g_thread = nullptr; +DWORD g_threadId = 0; +char g_tooltip[128] = {}; + +// Set inside the tray thread once NIM_ADD succeeds, so StopTrayIcon can +// tell the difference between "still starting" and "icon is live". +std::atomic g_iconLive{false}; + +// Optional graceful-shutdown hook, invoked on the tray thread when the user +// picks "Exit". Empty -> Exit terminates the process via ExitProcess. +std::function g_onExit; + +// Handle the "Exit" menu pick (tray thread). Remove the icon immediately so +// no ghost lingers, then either hand back to the caller's shutdown hook or +// terminate outright. +void RequestExit(HWND hwnd) { + NOTIFYICONDATAA nidDel = {}; + nidDel.cbSize = sizeof(nidDel); + nidDel.hWnd = hwnd; + nidDel.uID = TRAY_ICON_UID; + Shell_NotifyIconA(NIM_DELETE, &nidDel); + g_iconLive.store(false, std::memory_order_release); + + if (g_onExit) { + // Graceful: e.g. stop the listen loop so main() returns and calls + // StopTrayIcon(), which unwinds this message thread. + g_onExit(); + } else { + // No graceful hook (blocking stdin loop): terminate. The OS reaps + // the already-removed icon and this thread. + ExitProcess(0); + } +} + +LRESULT CALLBACK TrayWndProc(HWND hwnd, UINT msg, + WPARAM wParam, LPARAM lParam) { + if (msg == WM_TRAYICON) { + UINT event = LOWORD(lParam); + if (event == WM_RBUTTONUP || event == WM_CONTEXTMENU) { + // Right-click menu: a disabled label mirroring the tooltip, a + // separator, then an "Exit" item that shuts the helper down. + POINT pt; + GetCursorPos(&pt); + HMENU menu = CreatePopupMenu(); + if (menu) { + AppendMenuA(menu, MF_STRING | MF_GRAYED, IDM_TOOLTIP, g_tooltip); + AppendMenuA(menu, MF_SEPARATOR, 0, nullptr); + AppendMenuA(menu, MF_STRING, IDM_EXIT, "Exit"); + // SetForegroundWindow is required so the menu dismisses + // correctly when the user clicks outside it. + SetForegroundWindow(hwnd); + // TPM_RETURNCMD returns the picked id inline instead of + // posting WM_COMMAND, so we can act on it right here. + UINT cmd = TrackPopupMenu(menu, TPM_RIGHTBUTTON | TPM_RETURNCMD, + pt.x, pt.y, 0, hwnd, NULL); + DestroyMenu(menu); + if (cmd == IDM_EXIT) RequestExit(hwnd); + } + } + return 0; + } + if (msg == WM_DESTROY) { + PostQuitMessage(0); + return 0; + } + return DefWindowProcA(hwnd, msg, wParam, lParam); +} + +DWORD WINAPI TrayThreadProc(LPVOID /*param*/) { + HINSTANCE hInst = GetModuleHandleA(NULL); + + WNDCLASSEXA wc = {}; + wc.cbSize = sizeof(wc); + wc.lpfnWndProc = TrayWndProc; + wc.hInstance = hInst; + wc.lpszClassName = "SherpaTrayClass"; + if (!RegisterClassExA(&wc) && GetLastError() != ERROR_CLASS_ALREADY_EXISTS) { + return 1; + } + + HWND hwnd = CreateWindowExA(0, "SherpaTrayClass", "Sherpa TTS", + 0, 0, 0, 0, 0, + HWND_MESSAGE, // message-only window + NULL, hInst, NULL); + if (!hwnd) return 1; + + NOTIFYICONDATAA nid = {}; + nid.cbSize = sizeof(nid); + nid.hWnd = hwnd; + nid.uID = TRAY_ICON_UID; + nid.uFlags = NIF_MESSAGE | NIF_ICON | NIF_TIP; + nid.uCallbackMessage = WM_TRAYICON; + nid.hIcon = LoadIconA(NULL, IDI_APPLICATION); + // szTip is 128 chars on Win2000+; Shell_NotifyIcon truncates safely + // if the source is shorter (we keep g_tooltip null-terminated). + strncpy(nid.szTip, g_tooltip, sizeof(nid.szTip) - 1); + + if (!Shell_NotifyIconA(NIM_ADD, &nid)) { + DestroyWindow(hwnd); + return 1; + } + g_iconLive.store(true, std::memory_order_release); + + MSG msg; + while (GetMessageA(&msg, NULL, 0, 0) > 0) { + TranslateMessage(&msg); + DispatchMessageA(&msg); + } + + NOTIFYICONDATAA nidDel = {}; + nidDel.cbSize = sizeof(nidDel); + nidDel.hWnd = hwnd; + nidDel.uID = TRAY_ICON_UID; + Shell_NotifyIconA(NIM_DELETE, &nidDel); + g_iconLive.store(false, std::memory_order_release); + + DestroyWindow(hwnd); + return 0; +} + +} // namespace + +void StartTrayIcon(const std::string& tooltip, std::function onExit) { + if (g_thread) return; // already started — silently ignore + + g_onExit = std::move(onExit); + + // Snapshot the tooltip into our static buffer so the worker thread + // doesn't read the caller's std::string. + strncpy(g_tooltip, tooltip.c_str(), sizeof(g_tooltip) - 1); + g_tooltip[sizeof(g_tooltip) - 1] = '\0'; + + g_thread = CreateThread(NULL, 0, TrayThreadProc, NULL, 0, &g_threadId); +} + +void StopTrayIcon() { + if (!g_thread) return; + + if (g_threadId != 0) { + // PostThreadMessage delivers a WM_QUIT directly to the tray + // thread's queue; GetMessage returns 0 and the loop exits. + PostThreadMessageA(g_threadId, WM_QUIT, 0, 0); + } + // 2 s is plenty for the icon-removal + window-destroy path. + WaitForSingleObject(g_thread, 2000); + CloseHandle(g_thread); + g_thread = nullptr; + g_threadId = 0; + g_onExit = nullptr; // drop any captured references +} + +} // namespace sherpa + +#else // !_WIN32 + +namespace sherpa { +void StartTrayIcon(const std::string&, std::function) {} +void StopTrayIcon() {} +} // namespace sherpa + +#endif diff --git a/tray_icon_win32.hpp b/tray_icon_win32.hpp new file mode 100644 index 0000000..083175d --- /dev/null +++ b/tray_icon_win32.hpp @@ -0,0 +1,37 @@ +// Optional tray-icon indicator for the long-running sherpa_server process. +// Windows-only; on other platforms the StartTrayIcon / StopTrayIcon calls +// are no-ops via the #ifdef _WIN32 stub at the bottom of the .cpp. +// +// The icon tells the user a background TTS helper is running and offers a +// right-click "Exit" to shut it down. The process is normally reaped by the +// parent (the Numen NVSE plugin spawns sherpa_server.exe into a kill-on-close +// Job Object); Exit is the manual path for when the helper is left running +// standalone. The stdin request loop can't be unblocked from the tray thread, +// so sherpa_server passes no onExit hook and Exit calls ExitProcess. +// +// Mirrors third-party/piper/src/cpp/tray_icon_win32.{hpp,cpp}; duplicated +// rather than shared because the two helpers ship out of independent CMake +// trees (piper is a submodule, sherpa_server is in this repo). Keep the +// two copies in sync when touching either. +#pragma once + +#include +#include + +namespace sherpa { + +// Add a tray icon with the given tooltip text. The optional onExit hook is +// invoked on the tray thread when the user picks "Exit"; pass it to shut the +// host down gracefully. When empty, "Exit" terminates the process via +// ExitProcess. Idempotent; subsequent calls are ignored while an icon is +// already active. The icon and its hidden message-only window live on a +// dedicated thread so the request loop on the main thread is unaffected. +void StartTrayIcon(const std::string& tooltip, + std::function onExit = {}); + +// Remove the tray icon and wait briefly for the message-pump thread to +// exit. Safe to call from a normal exit path; on TerminateProcess the +// OS reaps the icon anyway. +void StopTrayIcon(); + +} // namespace sherpa