diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index 568b19dd2..c585f9af9 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -1892,6 +1892,7 @@ def test_DomInfoUpdateTask_task_worker(self, mock_post_pm_info, mock_update_stat task.task_stopping_event.wait = MagicMock(side_effect=[False, True]) task.get_dom_polling_from_config_db = MagicMock(return_value='enabled') task.is_port_in_cmis_terminal_state = MagicMock(return_value=False) + task.check_transceiver_temperature = MagicMock() mock_detect_error.return_value = True task.task_worker() assert task.port_mapping.logical_port_list.count('Ethernet0') @@ -1911,6 +1912,48 @@ def test_DomInfoUpdateTask_task_worker(self, mock_post_pm_info, mock_update_stat assert mock_update_status_hw.call_count == 1 assert mock_post_pm_info.call_count == 1 + @patch('xcvrd.xcvrd_utilities.port_event_helper.PortMapping.logical_port_name_to_physical_port_list', MagicMock(return_value=[0])) + @patch('xcvrd.xcvrd._wrapper_get_presence', MagicMock(return_value=True)) + @pytest.mark.parametrize("dom_info_cache, dom_th_info, expected", [ + ({0: {'temperature': '75'}}, + (('temphighalarm', '80'), + ('templowalarm', '0'), + ('temphighwarning', '70'), + ('templowwarning', '0')), + 3), #TEMP_NORMAL = 0 + ({0: {'temperature': '85'}}, + (('temphighalarm', '80'), + ('templowalarm', '0'), + ('temphighwarning', '70'), + ('templowwarning', '10')), + 1), #TEMP_HIGH_ALARM = 1 + ({0: {'temperature': '5'}}, + (('temphighalarm', '80'), + ('templowalarm', '0'), + ('temphighwarning', '70'), + ('templowwarning', '10')), + 4), #TEMP_LOW_WARNING = 4 + ]) + def test_check_transceiver_temperature(self, dom_info_cache, dom_th_info, expected): + class MockTable: + data = {} + def set(self, key, fvs): + self.data[key] = fvs + + def get(self, key): + return self.data.get(key) + + port_mapping = PortMapping() + stop_event = threading.Event() + mock_cmis_manager = MagicMock() + task = DomInfoUpdateTask(DEFAULT_NAMESPACE, port_mapping, stop_event, mock_cmis_manager) + logical_port_name = 'Ethernet0' + temperature_status = {} + dom_th_tbl = MockTable() + dom_th_tbl.get = MagicMock(return_value=(True, dom_th_info)) + task.check_transceiver_temperature(logical_port_name, dom_th_tbl, dom_info_cache, temperature_status) + assert temperature_status[0] == expected + @patch('xcvrd.xcvrd._wrapper_get_presence', MagicMock(return_value=False)) @patch('xcvrd.xcvrd.XcvrTableHelper') @patch('xcvrd.xcvrd.delete_port_from_status_table_hw') diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index aaecb27ed..a9e196ff2 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -1683,6 +1683,7 @@ def task_worker(self): transceiver_status_cache = {} pm_info_cache = {} sel, asic_context = port_event_helper.subscribe_port_config_change(self.namespaces) + temperature_status = {} # Start loop to update dom info in DB periodically while not self.task_stopping_event.wait(DOM_INFO_UPDATE_PERIOD_SECS): @@ -1735,6 +1736,8 @@ def task_worker(self): helper_logger.log_warning("Got exception {} while processing pm info for port {}, ignored".format(repr(e), logical_port_name)) continue + self.check_transceiver_temperature(logical_port_name, self.xcvr_table_helper.get_dom_threshold_tbl(asic_index), dom_info_cache, temperature_status) + helper_logger.log_info("Stop DOM monitoring loop") def run(self): @@ -1758,6 +1761,64 @@ def join(self): if self.exc: raise self.exc + def check_transceiver_temperature(self, logical_port_name, th_table, dom_info_cache, temperature_status): + TEMP_NORMAL = 0 + TEMP_HIGH_ALARM = 1 + TEMP_LOW_ALARM = 2 + TEMP_HIGH_WARNING = 3 + TEMP_LOW_WARNING = 4 + + TEMP_ERROR_TO_DESCRIPTION_DICT = { + TEMP_NORMAL: "temperature normal", + TEMP_HIGH_ALARM: "temperature high alarm", + TEMP_LOW_ALARM: "temperature low alarm", + TEMP_HIGH_WARNING: "temperature high warning", + TEMP_LOW_WARNING: "temperature low warning" + } + + for physical_port, physical_port_name in get_physical_port_name_dict(logical_port_name, self.port_mapping).items(): + orig_temp_status = temperature_status.get(physical_port) + if orig_temp_status is None: + orig_temp_status = TEMP_NORMAL + temperature_status[physical_port] = orig_temp_status + new_temp_status = TEMP_NORMAL + + dom_info_dict = dom_info_cache.get(physical_port) + presence, threshold = th_table.get(physical_port_name) + if presence: + dom_th_info_dict = dict(threshold) + else: + dom_th_info_dict = None + if dom_info_dict is not None and dom_th_info_dict is not None: + temperature = dom_info_dict.get("temperature") + temphighalarm = dom_th_info_dict.get("temphighalarm") + templowalarm = dom_th_info_dict.get("templowalarm") + temphighwarning = dom_th_info_dict.get("temphighwarning") + templowwarning = dom_th_info_dict.get("templowwarning") + if temperature != 'N/A' and temphighalarm != 'N/A' and templowalarm != 'N/A' and \ + temphighwarning != 'N/A' and templowwarning != 'N/A': + if float(temperature) > float(temphighalarm): + new_temp_status = TEMP_HIGH_ALARM + elif float(temperature) > float(temphighwarning): + new_temp_status = TEMP_HIGH_WARNING + elif float(temperature) < float(templowalarm): + new_temp_status = TEMP_LOW_ALARM + elif float(temperature) < float(templowwarning): + new_temp_status = TEMP_LOW_WARNING + else: + new_temp_status = TEMP_NORMAL + + if orig_temp_status != new_temp_status: + temperature_status[physical_port] = new_temp_status + helper_logger.log_notice("{}: temperature status changed from {} to {}".format( + physical_port_name, + TEMP_ERROR_TO_DESCRIPTION_DICT[orig_temp_status], + TEMP_ERROR_TO_DESCRIPTION_DICT[new_temp_status])) + elif new_temp_status > 0: + helper_logger.log_notice("{}: {}".format(physical_port_name, TEMP_ERROR_TO_DESCRIPTION_DICT[new_temp_status])) + else: + temperature_status[physical_port] = TEMP_NORMAL + def on_port_config_change(self, port_change_event): if port_change_event.event_type == port_event_helper.PortChangeEvent.PORT_REMOVE: self.on_remove_logical_port(port_change_event)