From 4d8518f43345086c8445aba7cf83dcd0baaaa232 Mon Sep 17 00:00:00 2001 From: Clive Blackledge Date: Tue, 14 Oct 2025 21:23:14 -0700 Subject: [PATCH 1/2] Fixing handshake stalls for Android devices --- src/mesh/PhoneAPI.cpp | 36 +++++++++++++++++++- src/mesh/PhoneAPI.h | 11 +++++++ src/nimble/NimbleBluetooth.cpp | 60 +++++++++++++++++++++++++++------- 3 files changed, 95 insertions(+), 12 deletions(-) diff --git a/src/mesh/PhoneAPI.cpp b/src/mesh/PhoneAPI.cpp index 9eeadf5a21..74c5c496da 100644 --- a/src/mesh/PhoneAPI.cpp +++ b/src/mesh/PhoneAPI.cpp @@ -33,7 +33,6 @@ // Flag to indicate a heartbeat was received and we should send queue status bool heartbeatReceived = false; - PhoneAPI::PhoneAPI() { lastContactMsec = millis(); @@ -74,6 +73,9 @@ void PhoneAPI::handleStartConfig() nodeInfoForPhone.num = 0; // Don't keep returning old nodeinfos nodeInfoQueue.clear(); resetReadIndex(); + configStartMsec = millis(); + configHandshakeRestarted = false; + onConfigHandshakeStarted(); } void PhoneAPI::close() @@ -101,8 +103,12 @@ void PhoneAPI::close() fromRadioNum = 0; config_nonce = 0; config_state = 0; + // Reset duplicate filter so each new connection starts clean + std::fill(std::begin(recentToRadioPacketIds), std::end(recentToRadioPacketIds), 0); pauseBluetoothLogging = false; heartbeatReceived = false; + configStartMsec = 0; + configHandshakeRestarted = false; } } @@ -757,7 +763,35 @@ int PhoneAPI::onNotify(uint32_t newValue) onNowHasData(newValue); } else { LOG_DEBUG("Client not yet interested in packets (state=%d)", state); + checkConfigHandshakeTimeout(); } return timeout ? -1 : 0; // If we timed out, MeshService should stop iterating through observers as we just removed one } + +bool PhoneAPI::isConfigHandshakeActive() const +{ + return !configHandshakeRestarted && state == STATE_SEND_MY_INFO && configStartMsec != 0; +} + +uint32_t PhoneAPI::getConfigHandshakeElapsedMs() const +{ + if (configStartMsec == 0) + return 0; + return millis() - configStartMsec; +} + +bool PhoneAPI::checkConfigHandshakeTimeout() +{ + if (!isConfigHandshakeActive()) + return false; + + uint32_t elapsedMs = getConfigHandshakeElapsedMs(); + if (elapsedMs > kConfigHandshakeTimeoutMs) { + LOG_WARN("Config handshake stuck in state=%d for %u ms, forcing transport restart", state, elapsedMs); + configHandshakeRestarted = true; + onConfigHandshakeTimeout(); + return true; + } + return false; +} diff --git a/src/mesh/PhoneAPI.h b/src/mesh/PhoneAPI.h index 692fdd0b99..91720e5955 100644 --- a/src/mesh/PhoneAPI.h +++ b/src/mesh/PhoneAPI.h @@ -91,6 +91,8 @@ class PhoneAPI /// Use to ensure that clients don't get confused about old messages from the radio uint32_t config_nonce = 0; uint32_t readIndex = 0; + uint32_t configStartMsec = 0; + bool configHandshakeRestarted = false; std::vector filesManifest = {}; @@ -135,6 +137,7 @@ class PhoneAPI bool isConnected() { return state != STATE_SEND_NOTHING; } protected: + static constexpr uint32_t kConfigHandshakeTimeoutMs = 3000; /// Our fromradio packet while it is being assembled meshtastic_FromRadio fromRadioScratch = {}; @@ -144,12 +147,20 @@ class PhoneAPI /// Hookable to find out when connection changes virtual void onConnectionChanged(bool connected) {} + /// Invoked if the config handshake stalls long enough that we want to drop the BLE link. + virtual void onConfigHandshakeTimeout() {} + virtual void onConfigHandshakeStarted() {} + /// If we haven't heard from the other side in a while then say not connected. Returns true if timeout occurred bool checkConnectionTimeout(); /// Check the current underlying physical link to see if the client is currently connected virtual bool checkIsConnected() = 0; + bool checkConfigHandshakeTimeout(); + bool isConfigHandshakeActive() const; + uint32_t getConfigHandshakeElapsedMs() const; + /** * Subclasses can use this as a hook to provide custom notifications for their transport (i.e. bluetooth notifies) */ diff --git a/src/nimble/NimbleBluetooth.cpp b/src/nimble/NimbleBluetooth.cpp index 4b0c336092..945f4f706b 100644 --- a/src/nimble/NimbleBluetooth.cpp +++ b/src/nimble/NimbleBluetooth.cpp @@ -54,22 +54,39 @@ class BluetoothPhoneAPI : public PhoneAPI, public concurrency::OSThread protected: virtual int32_t runOnce() override { - std::lock_guard guard(nimble_mutex); - if (queue_size > 0) { - for (uint8_t i = 0; i < queue_size; i++) { - handleToRadio(nimble_queue.at(i).data(), nimble_queue.at(i).length()); + bool scheduledImmediate = false; + { + std::lock_guard guard(nimble_mutex); + if (queue_size > 0) { + for (uint8_t i = 0; i < queue_size; i++) { + handleToRadio(nimble_queue.at(i).data(), nimble_queue.at(i).length()); + } + LOG_DEBUG("Queue_size %u", queue_size); + queue_size = 0; + // Reset our timer so any newly queued work is handled right away. + setIntervalFromNow(0); + scheduledImmediate = true; + } + if (!hasChecked && phoneWants) { + // Pull fresh data while we're outside of the NimBLE callback context. + numBytes = getFromRadio(fromRadioBytes); + hasChecked = true; + // Make sure we wake immediately to publish the prefetched data. + setIntervalFromNow(0); + scheduledImmediate = true; } - LOG_DEBUG("Queue_size %u", queue_size); - queue_size = 0; } - if (!hasChecked && phoneWants) { - // Pull fresh data while we're outside of the NimBLE callback context. - numBytes = getFromRadio(fromRadioBytes); - hasChecked = true; + + bool timedOut = checkConfigHandshakeTimeout(); + if (!timedOut && !scheduledImmediate && isConfigHandshakeActive()) { + uint32_t elapsed = getConfigHandshakeElapsedMs(); + uint32_t remaining = elapsed >= kConfigHandshakeTimeoutMs ? 1 : (kConfigHandshakeTimeoutMs - elapsed); + // Keep nudging the thread while the config handshake is in flight. + setIntervalFromNow(remaining); } // the run is triggered via NimbleBluetoothToRadioCallback and NimbleBluetoothFromRadioCallback - return INT32_MAX; + return RUN_SAME; } /** * Subclasses can use this as a hook to provide custom notifications for their transport (i.e. bluetooth notifies) @@ -92,6 +109,27 @@ class BluetoothPhoneAPI : public PhoneAPI, public concurrency::OSThread #endif } + virtual void onConfigHandshakeStarted() override { setIntervalFromNow(kConfigHandshakeTimeoutMs); } + + virtual void onConfigHandshakeTimeout() override + { + LOG_WARN("Config handshake stalled; restarting BLE connection"); + if (!bleServer) { + return; + } + auto peers = bleServer->getPeerDevices(); + if (peers.empty()) { + LOG_WARN("No BLE peers to disconnect during restart"); + return; + } + for (auto connHandle : peers) { + int rc = bleServer->disconnect(connHandle); + if (rc != 0) { + LOG_WARN("Failed to disconnect BLE handle %u (rc=%d)", connHandle, rc); + } + } + } + /// Check the current underlying physical link to see if the client is currently connected virtual bool checkIsConnected() { return bleServer && bleServer->getConnectedCount() > 0; } }; From 70390c9f63dc9459526266e14db9541a7ef5c1a9 Mon Sep 17 00:00:00 2001 From: Clive Blackledge Date: Tue, 14 Oct 2025 22:50:10 -0700 Subject: [PATCH 2/2] To make copilot happy. --- src/mesh/PhoneAPI.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesh/PhoneAPI.cpp b/src/mesh/PhoneAPI.cpp index 74c5c496da..e97dae6495 100644 --- a/src/mesh/PhoneAPI.cpp +++ b/src/mesh/PhoneAPI.cpp @@ -33,6 +33,7 @@ // Flag to indicate a heartbeat was received and we should send queue status bool heartbeatReceived = false; + PhoneAPI::PhoneAPI() { lastContactMsec = millis();