FiPy crashing using WiFi
-
I have FiPy connected to wifi and publishing data to my mqtt broker every second. The system will crash at random times. I have attached the last 5 crash dumps. The second one happened just under an hour after the first and the third happened about 1.5 hours after the second. It started to have a fourth crash about an hour after the third but the system just stopped and did not write out any crash dump. Then my FiPy just sat there not doing anything for the rest of the weekend. The last 2 files happened while i was watching the unit this morning and they last one happened just 3 min after fourth.
Here is my main code that handles connecting to the internet and publishing data to my mqtt broker.
import pycom import utime import ujson import gc from debug import log import config import machine import _thread from microWebCli import MicroWebCli from constants import const from machine import Timer from rtc import rtc # from lora import NanoGateway from wifi import WiFi from lte import lte from schedule import schedule from mqtt_old import MQTTClient from OTA import WiFiOTA, lteOTA gc.enable() gc.threshold(1000000) # Initialize LED pycom.heartbeat(False) pycom.rgbled(0x0000ff) def sub_cb(topic, msg): log.debug('{}', topic) log.debug('{}', msg) sub = topic.split(b'/') json_msg = ujson.loads(msg) if b'commands' in sub: id = int(sub[len(sub)-1]) # id = json_msg["commandDefinitionId"] if id == 0: command = json_msg["command"] submit_cur = json_msg["submitted"] submit_last = config.read('submitted') log.debug('{}***{}',submit_cur, submit_last) if submit_cur != submit_last: log.debug('Command: {}', command) config.write('submitted', submit_cur) cmdObj = ujson.loads(command) if cmdObj.get('cmd') is not None: cmd = cmdObj["cmd"].lower() if submit_last: if 'schedule_start' in cmd: schedule.start() elif 'manual_zone' in cmd: zone = cmdObj["zone"] if "state" in cmdObj: schedule.relay(zone,cmdObj["state"]) elif "duration" in cmdObj: schedule.zone_on(zone,cmdObj["duration"]) else: log.warning('Unkown manual zone command: {}', cmdObj) elif 'update' in cmd: config.update = True else: log.warning('Uknown command: {}', cmd) else: log.warning('Ignoring \"{}\" because last submitted time is empty', cmd) else: log.warning('Unknown command object: {}', cmdObj) elif b'attributes' in sub: # attr_name = json_msg["attributeName"].lower() attr_name = sub[len(sub)-1] attr_value = json_msg["attributeValue"] if 'schedule' in attr_name: # config.write('schedule', ujson.loads(attr_value)) pass elif 'starttime' in attr_name: # config.write('start_time', ujson.loads(attr_value)) pass elif 'auto' in attr_name: config.write('auto', ujson.loads(attr_value)["Auto"]) elif 'timezone' in attr_name: tz = ujson.loads(attr_value) config.write('timezone', tz) if not rtc.synced(): rtc.setTimezone(tz['offset']) rtc.setDST(tz['dst']) rtc.update() else: rtc.updateTimezone(timezone=tz['offset'], dst=tz['dst']) elif 'sprinkler-programs' in attr_name: config.write('programs', ujson.loads(attr_value)) elif 'zone-config' in attr_name: config.write('zones', ujson.loads(attr_value)) elif 'assigned-programs' in attr_name: config.write('active', ujson.loads(attr_value)) else: log.warning('Undefined attribute {}: {}', attr_name, attr_value) else: log.warning('Unknown message received: {}', msg) # def _publishTimeout(topic, msg): # global _pub_task # try: # mqtt_client.publish(topic, msg) # finally: # _pub_task = None # # global _pub_task = None def mqtt_loop(timeout): # global _pub_task while True: utime.sleep_ms(1000) packets = config.flush_msg() if not config.update: if config.mqtt_connected: i = 0 try: for packet in packets: log.debug('{}', packet) mqtt_client.publish(const.MQTT_DATA, packet) # task = None # start = utime.ticks_ms() # while utime.ticks_diff(utime.ticks_ms(), start) < timeout: # if _pub_task is None and task is None: # task = _thread.start_new_thread(_publishTimeout, (const.MQTT_DATA, packet,)) # _pub_task = task # elif task is not None: # if _pub_task != task: # return # utime.sleep_ms(20) # if task is not None: # task.exit() i += 1 mqtt_client.check_msg() except Exception as ex: log.error('MQTT error: {}', ex) mqtt_close() log.info('MQTT published packets: {}', i) else: if config.mqtt_connected: mqtt_disconnect() def connect(): for net in const.NETWORK_PREFERENCES: log.info('Attempting to connect to network: {}', net) if net == 'lte' or net == 'nbiot': if lte.connect(): # rtc.setTimezone(lte.timezone()) config.network_type = const.NETWORK_TYPE_LTE break elif net == 'wifi': if wifi.connect(): config.network_type = const.NETWORK_TYPE_WIFI break utime.sleep(1) if isconnected(): result = None log.debug('{}', const.ASSET_URL) auth = MicroWebCli.AuthBasic(const.USERNAME, const.PASSWORD) try: result = MicroWebCli.JSONRequest(const.ASSET_URL, o='POST', auth=auth) except Exception as e: log.error('Asset request error: {}', e) if result is not None: config.write('asset_id', result['assetId']) config.write('lon', result['longitude']) config.write('lat', result['latitude']) const.ASSET_ID = config.read('asset_id') log.info('Asset ID: {}', const.ASSET_ID) if const.ASSET_ID is not None: const.MQTT_TOPIC = '%s/assets/%d' % (const.MQTT_SERVER, const.ASSET_ID) return True else: log.warning('Failed to connect') return False def disconnect(keep_wifi=False): if (config.network_type == const.NETWORK_TYPE_WIFI and not keep_wifi): log.info('Connected over WiFi... disconnecting') wifi.disconnect() if (config.network_type == const.NETWORK_TYPE_LTE): log.info('Connected over LTE... disconnecting') lte.disconnect() config.network_type = None def network_reconnect(): try: disconnect() utime.sleep(1) return connect() except Exception as ex: log.error('Reconnect error: {}', ex) return False def isconnected(): return wifi.isconnected() or lte.isconnected() def mqtt_close(): mqtt_client.close() config.mqtt_connected = False def mqtt_disconnect(): mqtt_client.disconnect() config.mqtt_connected = False def temp_loop(): data = {} while True: log.info('Current time: {}', utime.localtime()) temp = machine.temperature() log.info('CPU temp: {} degF', temp) data['CPU Temperature'] = temp data['Current Time'] = utime.time() config.enqueue_msg(ujson.dumps(data)) utime.sleep(1) # configure the MQTT client mqtt_client = MQTTClient(const.MODEL, const.MQTT_SERVER, port=const.MQTT_PORT) mqtt_client.set_callback(sub_cb) schedule = schedule() wifi = WiFi(const.WIFI_EXT_ANT) lte = lte(True, reconnect=network_reconnect) rtc = rtc() tz = config.read('timezone') if tz is not None: rtc.setTimezone(tz['offset']) rtc.setDST(tz['dst']) log.info('Timezone: {}', tz) try: lora = NanoGateway(const.LORA_FREQ) except: pass publish_timeout = 5000 _thread.stack_size(10240) _thread.start_new_thread(mqtt_loop, (publish_timeout,)) _thread.stack_size(8192) _thread.start_new_thread(temp_loop, ()) wait = 1 while True: reconnect = False if not config.update: if isconnected(): if not rtc.synced(): rtc.update() if not config.mqtt_connected: try: if not mqtt_client.connect(): log.info('MQTT connected') for topic in const.MQTT_SUB: mqtt_client.subscribe(const.MQTT_TOPIC.lower() + "/" + topic) config.mqtt_connected = True else: log.info('MQTT not connected') config.mqtt_connected = False except Exception as ex: log.error("MQTT connect error: {}", ex) reconnect = True # if ex.args[0] is 202: #EAI_FAIL else: reconnect = True if reconnect: mqtt_close() if network_reconnect(): wait = 1 else: if wait < 30: wait += 1 else: while config.mqtt_connected: utime.sleep_ms(10) # ota = WiFiOTA(const.OTA_WIFI_SSID, const.OTA_WIFI_PW, const.OTA_SERVER, const.OTA_PORT) ota = lteOTA(const.LTE_BAND, const.LTE_CID, const.LTE_APN, const.OTA_SERVER, const.OTA_PORT) if ota.connect(): result = ota.update() if result is 0: machine.reset() else: config.update = False msg_size = config.size_msg() log.debug('Queue size: {}', msg_size) if msg_size >= const.MAX_QUEUE_SIZE: log.warning('MQTT queue is full, restarting connection') mqtt_close() gc.collect() log.info('Free memory: {}', gc.mem_free()) utime.sleep(wait)
2020-10-16T17-24-8_core_dump.py 2020-10-16T18-20-21_core_dump.py 2020-10-16T20-2-4_core_dump.py
2020-10-16T10-20-14_core_dump.py 2020-10-16T10-23-34_core_dump.py
-
@Xykon @Gijs, I have put those commands into my code and my system still crashes. Here is the last crash report it did.
=============================================================== ==================== ESP32 CORE DUMP START ==================== ================== CURRENT THREAD REGISTERS =================== pc 0x40163f84 0x40163f84 <KISS_Find_channel_direct+768> lbeg 0x400933c0 1074344896 lend 0x400933ee 1074344942 lcount 0xffffffff 4294967295 sar 0x10 16 ps 0x60d20 396576 threadptr <unavailable> br <unavailable> scompare1 <unavailable> acclo <unavailable> acchi <unavailable> m0 <unavailable> m1 <unavailable> m2 <unavailable> m3 <unavailable> expstate <unavailable> f64r_lo <unavailable> f64r_hi <unavailable> f64s <unavailable> fcr <unavailable> fsr <unavailable> a0 0x4018a15c 1075355996 a1 0x3ffbaee0 1073458912 a2 0xb 11 a3 0x3f9400c0 1066664128 a4 0x0 0 a5 0x3fbc23c0 1069294528 a6 0xfffffff4 -12 a7 0x14 20 a8 0x8018402e -2145894354 a9 0x3ffbaec0 1073458880 a10 0x3f9400c0 1066664128 a11 0x0 0 a12 0x0 0 a13 0x0 0 a14 0x3ffeef64 1073672036 a15 0x4 4 ==================== CURRENT THREAD STACK ===================== #0 0x40163f84 in KISS_Find_channel_direct () #1 0x4018a15c in sys_check_timeouts () at /Users/ehlers/pycom/pycom-esp-idf/components/lwip/lwip/src/core/timeouts.c:354 #2 sys_timeouts_mbox_fetch (mbox=0x3ffd1980 <mbox>, msg=0x3ffbaf30) at /Users/ehlers/pycom/pycom-esp-idf/components/lwip/lwip/src/core/timeouts.c:433 #3 0x40182aab in tcpip_thread (arg=<optimized out>) at /Users/ehlers/pycom/pycom-esp-idf/components/lwip/lwip/src/api/tcpip.c:109 ======================== THREADS INFO ========================= Id Target Id Frame * 1 <main task> 0x40163f84 in KISS_Find_channel_direct () 2 process 1 err_to_errno (err=0 '\\000') at /Users/ehlers/pycom/pycom-esp-idf/components/lwip/lwip/src/api/err.c:71 3 process 2 0x402209a6 in esp_pm_impl_waiti () at /Users/ehlers/pycom/pycom-esp-idf/components/esp32/pm_esp32.c:492 4 process 3 0x402209a6 in esp_pm_impl_waiti () at /Users/ehlers/pycom/pycom-esp-idf/components/esp32/pm_esp32.c:492 5 process 4 0x40096b4a in vTaskDelay (xTicksToDelay=<optimized out>) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/tasks.c:1484 6 process 5 0x40096b4a in vTaskDelay (xTicksToDelay=<optimized out>) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/tasks.c:1484 7 process 6 0x40096b4a in vTaskDelay (xTicksToDelay=<optimized out>) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/tasks.c:1484 8 process 7 0x40096b4a in vTaskDelay (xTicksToDelay=<optimized out>) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/tasks.c:1484 9 process 8 0x40095d50 in xQueueGenericReceive (xQueue=0x3ffdfd98, pvBuffer=0x3ffe62e0, xTicksToWait=6000, xJustPeeking=0) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/queue.c:1592 10 process 9 0x40096b4a in vTaskDelay (xTicksToDelay=<optimized out>) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/tasks.c:1484 11 process 10 0x40097d31 in prvProcessTimerOrBlockTask (xListWasEmpty=1, xNextExpireTime=<optimized out>) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/timers.c:589 12 process 11 0x4008162c in esp_crosscore_int_send_yield (core_id=1) at /Users/ehlers/pycom/pycom-esp-idf/components/esp32/crosscore_int.c:112 13 process 12 0x40095d50 in xQueueGenericReceive (xQueue=0x3ffdce88, pvBuffer=0x3ffe23a0, xTicksToWait=4294967295, xJustPeeking=0) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/queue.c:1592 14 process 13 0x40095d50 in xQueueGenericReceive (xQueue=0x3ffe3e78, pvBuffer=0x3ffd52a4, xTicksToWait=4294967295, xJustPeeking=0) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/queue.c:1592 15 process 14 0x40095d50 in xQueueGenericReceive (xQueue=0x3ffb7bd8, pvBuffer=0x0, xTicksToWait=4294967295, xJustPeeking=0) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/queue.c:1592 16 process 15 0x40095d50 in xQueueGenericReceive (xQueue=0x3ffbd460, pvBuffer=0x3ffdac50, xTicksToWait=4294967295, xJustPeeking=0) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/queue.c:1592 17 process 16 0x4000bff0 in ?? () 18 process 17 0x40095d50 in xQueueGenericReceive (xQueue=0x3ffb760c, pvBuffer=0x0, xTicksToWait=4294967295, xJustPeeking=0) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/queue.c:1592 19 process 18 0x40095d50 in xQueueGenericReceive (xQueue=0x3ffc4040 <s_timer_semaphore_memory>, pvBuffer=0x0, xTicksToWait=4294967295, xJustPeeking=0) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/queue.c:1592 20 process 19 0x40095d50 in xQueueGenericReceive (xQueue=0x3ffc95c8 <mp_state_ctx+436>, pvBuffer=0x0, xTicksToWait=4294967295, xJustPeeking=0) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/queue.c:1592 21 process 20 0x40095d50 in xQueueGenericReceive (xQueue=0x3ffc95c8 <mp_state_ctx+436>, pvBuffer=0x0, xTicksToWait=4294967295, xJustPeeking=0) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/queue.c:1592 22 process 21 0x40095d50 in xQueueGenericReceive (xQueue=0x3ffc95c8 <mp_state_ctx+436>, pvBuffer=0x0, xTicksToWait=4294967295, xJustPeeking=0) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/queue.c:1592 23 process 22 0x4008162c in esp_crosscore_int_send_yield (core_id=1) at /Users/ehlers/pycom/pycom-esp-idf/components/esp32/crosscore_int.c:112 24 process 23 0x40095d50 in xQueueGenericReceive (xQueue=0x3ffc95c8 <mp_state_ctx+436>, pvBuffer=0x0, xTicksToWait=4294967295, xJustPeeking=0) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/queue.c:1592 25 process 24 0x40095d50 in xQueueGenericReceive (xQueue=0x3ffc95c8 <mp_state_ctx+436>, pvBuffer=0x0, xTicksToWait=4294967295, xJustPeeking=0) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/queue.c:1592 26 process 25 0x40095d50 in xQueueGenericReceive (xQueue=0x3ffc95c8 <mp_state_ctx+436>, pvBuffer=0x0, xTicksToWait=4294967295, xJustPeeking=0) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/queue.c:1592 27 process 26 0x40095d50 in xQueueGenericReceive (xQueue=0x3ffd54b8, pvBuffer=0x3ffd68b0, xTicksToWait=4294967295, xJustPeeking=0) at /Users/ehlers/pycom/pycom-esp-idf/components/freertos/queue.c:1592 28 process 27 0x4000bff0 in ?? () ======================= ALL MEMORY REGIONS ======================== Name Address Size Attrs .rtc.text 0x400c0000 0x64 R XA .rtc.data 0x50000000 0x81c RW A .rtc_noinit 0x5000081c 0x0 RW .rtc.force_slow 0x5000081c 0x0 RW .iram0.vectors 0x40080000 0x400 R XA .iram0.text 0x40080400 0x1886c R XA .dram0.data 0x3ffbdb60 0x4960 RW A .noinit 0x3ffc24c0 0x0 RW .flash.rodata 0x3f400020 0x5e864 RW A .flash.text 0x400d0018 0x153c00 R XA .coredump.tasks.data 0x3ffb9fb4 0x16c RW .coredump.tasks.data 0x3ffbae20 0x1a4 RW .coredump.tasks.data 0x3fff2638 0x16c RW .coredump.tasks.data 0x3fff4a20 0x584 RW .coredump.tasks.data 0x3ffbbeb4 0x16c RW .coredump.tasks.data 0x3ffbbd10 0x19c RW .coredump.tasks.data 0x3ffbb940 0x16c RW .coredump.tasks.data 0x3ffbb7a0 0x198 RW .coredump.tasks.data 0x3ffdfb04 0x16c RW .coredump.tasks.data 0x3ffd49b0 0x170 RW .coredump.tasks.data 0x3ffb4d38 0x16c RW .coredump.tasks.data 0x3ffb4b40 0x1f0 RW .coredump.tasks.data 0x3ffdcd18 0x16c RW .coredump.tasks.data 0x3ffdcbb0 0x160 RW .coredump.tasks.data 0x3ffc45ec 0x16c RW .coredump.tasks.data 0x3ffd9870 0x894 RW .coredump.tasks.data 0x3ffe556c 0x16c RW .coredump.tasks.data 0x3ffe61e0 0x8f8 RW .coredump.tasks.data 0x3ffb5e74 0x16c RW .coredump.tasks.data 0x3ffd3d10 0x20c RW .coredump.tasks.data 0x3ffbcb20 0x16c RW .coredump.tasks.data 0x3ffbc9c0 0x158 RW .coredump.tasks.data 0x3ffb5aac 0x16c RW .coredump.tasks.data 0x3ffb58f0 0x1b4 RW .coredump.tasks.data 0x3ffde570 0x16c RW .coredump.tasks.data 0x3ffe2240 0x218 RW .coredump.tasks.data 0x3ffd533c 0x16c RW .coredump.tasks.data 0x3ffd5170 0x1c4 RW .coredump.tasks.data 0x3ffb7c30 0x16c RW .coredump.tasks.data 0x3ffb9cb0 0x188 RW .coredump.tasks.data 0x3ffbb0a4 0x16c RW .coredump.tasks.data 0x3ffdab50 0x1b8 RW .coredump.tasks.data 0x3ffddfe4 0x16c RW .coredump.tasks.data 0x3ffdde40 0x19c RW .coredump.tasks.data 0x3ffb7a68 0x16c RW .coredump.tasks.data 0x3ffb78e0 0x180 RW .coredump.tasks.data 0x3ffb727c 0x16c RW .coredump.tasks.data 0x3ffb70e0 0x194 RW .coredump.tasks.data 0x3ffe95c8 0x16c RW .coredump.tasks.data 0x3ffea6e0 0x454 RW .coredump.tasks.data 0x3ffec0b0 0x16c RW .coredump.tasks.data 0x3ffed1c0 0x45c RW .coredump.tasks.data 0x3ffe8054 0x16c RW .coredump.tasks.data 0x3ffe9170 0x450 RW .coredump.tasks.data 0x3ffeab3c 0x16c RW .coredump.tasks.data 0x3ffebc30 0x478 RW .coredump.tasks.data 0x3ffe6ae0 0x16c RW .coredump.tasks.data 0x3ffe7bf0 0x45c RW .coredump.tasks.data 0x3fff4fac 0x16c RW .coredump.tasks.data 0x3fff6d50 0x3c8 RW .coredump.tasks.data 0x3ffed624 0x16c RW .coredump.tasks.data 0x3ffee740 0x450 RW .coredump.tasks.data 0x3ffd6954 0x16c RW .coredump.tasks.data 0x3ffd6780 0x1cc RW .coredump.tasks.data 0x3ffbb2c4 0x16c RW .coredump.tasks.data 0x3ffdbb70 0x180 RW ===================== ESP32 CORE DUMP END ===================== =============================================================== Done!
After this crash, it recovered and ran for about an hour before it crashed again but this time it just got hung up trying to write out the core dump. This is the last thing i got from it.
Guru Meditation Error: Core 1 panic'ed (InstrFetchProhibited). Exception was unhandled. Core 1 register dump: PC : 0xffffffff PS : 0x00060c30 A0 : 0x400f801c A1 : 0x3fff6e00 A2 : 0x3fff6e60 A3 : 0x3f970420 A4 : 0x00000003 A5 : 0x00000001 A6 : 0x00000000 A7 : 0x3ffb4cb0 A8 : 0xffffffff A9 : 0x3f40e200 A10 : 0x3fff6e60 A11 : 0x3f45babb A12 : 0x00000001 A13 : 0x00000000 A14 : 0x00000000 A15 : 0x00000000 SAR : 0x0000001e EXCCAUSE: 0x00000014 EXCVADDR: 0xfffffffc LBEG : 0x40093b64 LEND : 0x40093b6f LCOUNT : 0x00000000 ELF file SHA256: ```
-
@ssummers said in FiPy crashing using WiFi:
@Gijs, I am getting an attribute error saying "'module' object has no attribute 'lte_modem_on_boot'". I am currently running 1.20.2.r0.
The correct command is
pycom.lte_modem_en_on_boot(False)
These settings are persistent and the commands need to be executed only once unless the config partition is erased/reset.
-
@Gijs, I am getting an attribute error saying "'module' object has no attribute 'lte_modem_on_boot'". I am currently running 1.20.2.r0.
-
-
Can you try to
import pycom pycom.wifi_on_boot(False) pycom.pybytes_on_boot(False) pycom.lte_modem_on_boot(False)
(I thought there were some more but I cannot think of any at the moment)
Gijs
-
@Cees_Meijer, thank you for the advice. I already disable the heartbeat on boot but I do not use deepsleep. My system has to run 24/7.
-
This could be unrelated to your problem, but I also have been struggling with FiPy crashes lately, and found it to be a power problem, in combination with the LED heartbeat. The FiPy crashed after some time (1 to 12 hours), right after waking from deepsleep. I resolved it by disabling the heartbeat in boot.py. Multiple systems have been running flawless ever since.
-
To add to this topic, I have switched from WiFi to LTE for internet and so far the system has not crashed since my first post. Also, could the RTC have something to do with this? I am using the default sync time of 1 hour which is about how long my system would run on WiFi before crashing.
firmware version is 1.20.2.r0