From 1c845e0ff87676d93b46ada90f6973122b0fabf3 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Wed, 16 Oct 2024 18:47:11 -0400 Subject: [PATCH] [speaker, i2s_audio] I2S Speaker implementation using a ring buffer (#7605) --- CODEOWNERS | 1 + esphome/components/audio/__init__.py | 9 + esphome/components/audio/audio.h | 21 + .../components/i2s_audio/speaker/__init__.py | 3 +- .../i2s_audio/speaker/i2s_audio_speaker.cpp | 587 ++++++++++++------ .../i2s_audio/speaker/i2s_audio_speaker.h | 111 +++- esphome/components/speaker/__init__.py | 25 +- esphome/components/speaker/automation.h | 5 + esphome/components/speaker/speaker.h | 37 +- tests/components/speaker/test.esp32-ard.yaml | 1 + .../components/speaker/test.esp32-c3-ard.yaml | 1 + .../components/speaker/test.esp32-c3-idf.yaml | 1 + tests/components/speaker/test.esp32-idf.yaml | 1 + 13 files changed, 569 insertions(+), 234 deletions(-) create mode 100644 esphome/components/audio/__init__.py create mode 100644 esphome/components/audio/audio.h diff --git a/CODEOWNERS b/CODEOWNERS index d6104c9345fc..7ac6aa2f76ed 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -48,6 +48,7 @@ esphome/components/at581x/* @X-Ryl669 esphome/components/atc_mithermometer/* @ahpohl esphome/components/atm90e26/* @danieltwagner esphome/components/atm90e32/* @circuitsetup @descipher +esphome/components/audio/* @kahrendt esphome/components/audio_dac/* @kbx81 esphome/components/b_parasite/* @rbaron esphome/components/ballu/* @bazuchan diff --git a/esphome/components/audio/__init__.py b/esphome/components/audio/__init__.py new file mode 100644 index 000000000000..4ffdc401dcd3 --- /dev/null +++ b/esphome/components/audio/__init__.py @@ -0,0 +1,9 @@ +import esphome.codegen as cg +import esphome.config_validation as cv + +CODEOWNERS = ["@kahrendt"] +audio_ns = cg.esphome_ns.namespace("audio") + +CONFIG_SCHEMA = cv.All( + cv.Schema({}), +) diff --git a/esphome/components/audio/audio.h b/esphome/components/audio/audio.h new file mode 100644 index 000000000000..b0968dc8dabc --- /dev/null +++ b/esphome/components/audio/audio.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +namespace esphome { +namespace audio { + +struct AudioStreamInfo { + bool operator==(const AudioStreamInfo &rhs) const { + return (channels == rhs.channels) && (bits_per_sample == rhs.bits_per_sample) && (sample_rate == rhs.sample_rate); + } + bool operator!=(const AudioStreamInfo &rhs) const { return !operator==(rhs); } + size_t get_bytes_per_sample() const { return bits_per_sample / 8; } + uint8_t channels = 1; + uint8_t bits_per_sample = 16; + uint32_t sample_rate = 16000; +}; + +} // namespace audio +} // namespace esphome diff --git a/esphome/components/i2s_audio/speaker/__init__.py b/esphome/components/i2s_audio/speaker/__init__.py index bba886b39b9b..9fdaced64c06 100644 --- a/esphome/components/i2s_audio/speaker/__init__.py +++ b/esphome/components/i2s_audio/speaker/__init__.py @@ -16,6 +16,7 @@ register_i2s_audio_component, ) +AUTO_LOAD = ["audio"] CODEOWNERS = ["@jesserockz"] DEPENDENCIES = ["i2s_audio"] @@ -72,7 +73,7 @@ def validate_esp32_variant(config): .extend( { cv.Optional( - CONF_TIMEOUT, default="100ms" + CONF_TIMEOUT, default="500ms" ): cv.positive_time_period_milliseconds, } ) diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp index 97c1d86c363f..4fc489d0a313 100644 --- a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp +++ b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp @@ -4,6 +4,8 @@ #include +#include "esphome/components/audio/audio.h" + #include "esphome/core/application.h" #include "esphome/core/hal.h" #include "esphome/core/log.h" @@ -11,186 +13,296 @@ namespace esphome { namespace i2s_audio { -static const size_t BUFFER_COUNT = 20; +static const size_t DMA_BUFFER_SIZE = 512; +static const size_t DMA_BUFFERS_COUNT = 4; +static const size_t FRAMES_IN_ALL_DMA_BUFFERS = DMA_BUFFER_SIZE * DMA_BUFFERS_COUNT; +static const size_t RING_BUFFER_SAMPLES = 8192; +static const size_t TASK_DELAY_MS = 10; +static const size_t TASK_STACK_SIZE = 4096; +static const ssize_t TASK_PRIORITY = 23; static const char *const TAG = "i2s_audio.speaker"; +enum SpeakerEventGroupBits : uint32_t { + COMMAND_START = (1 << 0), // Starts the main task purpose + COMMAND_STOP = (1 << 1), // stops the main task + COMMAND_STOP_GRACEFULLY = (1 << 2), // Stops the task once all data has been written + MESSAGE_RING_BUFFER_AVAILABLE_TO_WRITE = (1 << 5), // Locks the ring buffer when not set + STATE_STARTING = (1 << 10), + STATE_RUNNING = (1 << 11), + STATE_STOPPING = (1 << 12), + STATE_STOPPED = (1 << 13), + ERR_TASK_FAILED_TO_START = (1 << 15), + ERR_ESP_INVALID_STATE = (1 << 16), + ERR_ESP_INVALID_ARG = (1 << 17), + ERR_ESP_INVALID_SIZE = (1 << 18), + ERR_ESP_NO_MEM = (1 << 19), + ERR_ESP_FAIL = (1 << 20), + ALL_ERR_ESP_BITS = ERR_ESP_INVALID_STATE | ERR_ESP_INVALID_ARG | ERR_ESP_INVALID_SIZE | ERR_ESP_NO_MEM | ERR_ESP_FAIL, + ALL_BITS = 0x00FFFFFF, // All valid FreeRTOS event group bits +}; + +// Translates a SpeakerEventGroupBits ERR_ESP bit to the coressponding esp_err_t +static esp_err_t err_bit_to_esp_err(uint32_t bit) { + switch (bit) { + case SpeakerEventGroupBits::ERR_ESP_INVALID_STATE: + return ESP_ERR_INVALID_STATE; + case SpeakerEventGroupBits::ERR_ESP_INVALID_ARG: + return ESP_ERR_INVALID_ARG; + case SpeakerEventGroupBits::ERR_ESP_INVALID_SIZE: + return ESP_ERR_INVALID_SIZE; + case SpeakerEventGroupBits::ERR_ESP_NO_MEM: + return ESP_ERR_NO_MEM; + default: + return ESP_FAIL; + } +} + +/// @brief Multiplies the input array of Q15 numbers by a Q15 constant factor +/// +/// Based on `dsps_mulc_s16_ansi` from the esp-dsp library: +/// https://github.com/espressif/esp-dsp/blob/master/modules/math/mulc/fixed/dsps_mulc_s16_ansi.c +/// (accessed on 2024-09-30). +/// @param input Array of Q15 numbers +/// @param output Array of Q15 numbers +/// @param len Length of array +/// @param c Q15 constant factor +static void q15_multiplication(const int16_t *input, int16_t *output, size_t len, int16_t c) { + for (int i = 0; i < len; i++) { + int32_t acc = (int32_t) input[i] * (int32_t) c; + output[i] = (int16_t) (acc >> 15); + } +} + +// Lists the Q15 fixed point scaling factor for volume reduction. +// Has 100 values representing silence and a reduction [49, 48.5, ... 0.5, 0] dB. +// dB to PCM scaling factor formula: floating_point_scale_factor = 2^(-db/6.014) +// float to Q15 fixed point formula: q15_scale_factor = floating_point_scale_factor * 2^(15) +static const std::vector Q15_VOLUME_SCALING_FACTORS = { + 0, 116, 122, 130, 137, 146, 154, 163, 173, 183, 194, 206, 218, 231, 244, + 259, 274, 291, 308, 326, 345, 366, 388, 411, 435, 461, 488, 517, 548, 580, + 615, 651, 690, 731, 774, 820, 868, 920, 974, 1032, 1094, 1158, 1227, 1300, 1377, + 1459, 1545, 1637, 1734, 1837, 1946, 2061, 2184, 2313, 2450, 2596, 2750, 2913, 3085, 3269, + 3462, 3668, 3885, 4116, 4360, 4619, 4893, 5183, 5490, 5816, 6161, 6527, 6914, 7324, 7758, + 8218, 8706, 9222, 9770, 10349, 10963, 11613, 12302, 13032, 13805, 14624, 15491, 16410, 17384, 18415, + 19508, 20665, 21891, 23189, 24565, 26022, 27566, 29201, 30933, 32767}; + void I2SAudioSpeaker::setup() { ESP_LOGCONFIG(TAG, "Setting up I2S Audio Speaker..."); - this->buffer_queue_ = xQueueCreate(BUFFER_COUNT, sizeof(DataEvent)); - if (this->buffer_queue_ == nullptr) { - ESP_LOGE(TAG, "Failed to create buffer queue"); - this->mark_failed(); - return; + if (this->event_group_ == nullptr) { + this->event_group_ = xEventGroupCreate(); } - this->event_queue_ = xQueueCreate(BUFFER_COUNT, sizeof(TaskEvent)); - if (this->event_queue_ == nullptr) { - ESP_LOGE(TAG, "Failed to create event queue"); + if (this->event_group_ == nullptr) { + ESP_LOGE(TAG, "Failed to create event group"); this->mark_failed(); return; } } -void I2SAudioSpeaker::start() { - if (this->is_failed()) { - ESP_LOGE(TAG, "Cannot start audio, speaker failed to setup"); - return; +void I2SAudioSpeaker::loop() { + uint32_t event_group_bits = xEventGroupGetBits(this->event_group_); + + if (event_group_bits & SpeakerEventGroupBits::ERR_TASK_FAILED_TO_START) { + this->status_set_error("Failed to start speaker task"); } - if (this->task_created_) { - ESP_LOGW(TAG, "Called start while task has been already created."); - return; + + if (event_group_bits & SpeakerEventGroupBits::ALL_ERR_ESP_BITS) { + uint32_t error_bits = event_group_bits & SpeakerEventGroupBits::ALL_ERR_ESP_BITS; + ESP_LOGW(TAG, "Error writing to I2S: %s", esp_err_to_name(err_bit_to_esp_err(error_bits))); + this->status_set_warning(); } - this->state_ = speaker::STATE_STARTING; -} -void I2SAudioSpeaker::start_() { - if (this->task_created_) { - return; + + if (event_group_bits & SpeakerEventGroupBits::STATE_STARTING) { + ESP_LOGD(TAG, "Starting Speaker"); + this->state_ = speaker::STATE_STARTING; + xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::STATE_STARTING); } - if (!this->parent_->try_lock()) { - return; // Waiting for another i2s component to return lock + if (event_group_bits & SpeakerEventGroupBits::STATE_RUNNING) { + ESP_LOGD(TAG, "Started Speaker"); + this->state_ = speaker::STATE_RUNNING; + xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::STATE_RUNNING); + this->status_clear_warning(); + this->status_clear_error(); } + if (event_group_bits & SpeakerEventGroupBits::STATE_STOPPING) { + ESP_LOGD(TAG, "Stopping Speaker"); + this->state_ = speaker::STATE_STOPPING; + xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::STATE_STOPPING); + } + if (event_group_bits & SpeakerEventGroupBits::STATE_STOPPED) { + if (!this->task_created_) { + ESP_LOGD(TAG, "Stopped Speaker"); + this->state_ = speaker::STATE_STOPPED; + xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::ALL_BITS); + this->speaker_task_handle_ = nullptr; + } + } +} - xTaskCreate(I2SAudioSpeaker::player_task, "speaker_task", 8192, (void *) this, 1, &this->player_task_handle_); - this->task_created_ = true; +void I2SAudioSpeaker::set_volume(float volume) { + this->volume_ = volume; + ssize_t decibel_index = remap(volume, 0.0f, 1.0f, 0, Q15_VOLUME_SCALING_FACTORS.size() - 1); + this->q15_volume_factor_ = Q15_VOLUME_SCALING_FACTORS[decibel_index]; } -template const uint8_t *convert_data_format(const a *from, b *to, size_t &bytes, bool repeat) { - if (sizeof(a) == sizeof(b) && !repeat) { - return reinterpret_cast(from); +size_t I2SAudioSpeaker::play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) { + if (this->is_failed()) { + ESP_LOGE(TAG, "Cannot play audio, speaker failed to setup"); + return 0; } - const b *result = to; - for (size_t i = 0; i < bytes; i += sizeof(a)) { - b value = static_cast(*from++) << (sizeof(b) - sizeof(a)) * 8; - *to++ = value; - if (repeat) - *to++ = value; + if (this->state_ != speaker::STATE_RUNNING && this->state_ != speaker::STATE_STARTING) { + this->start(); } - bytes *= (sizeof(b) / sizeof(a)) * (repeat ? 2 : 1); // NOLINT - return reinterpret_cast(result); -} -void I2SAudioSpeaker::player_task(void *params) { - I2SAudioSpeaker *this_speaker = (I2SAudioSpeaker *) params; + // Wait for the ring buffer to be available + uint32_t event_bits = + xEventGroupWaitBits(this->event_group_, SpeakerEventGroupBits::MESSAGE_RING_BUFFER_AVAILABLE_TO_WRITE, pdFALSE, + pdFALSE, pdMS_TO_TICKS(TASK_DELAY_MS)); - TaskEvent event; - event.type = TaskEventType::STARTING; - xQueueSend(this_speaker->event_queue_, &event, portMAX_DELAY); + if (event_bits & SpeakerEventGroupBits::MESSAGE_RING_BUFFER_AVAILABLE_TO_WRITE) { + // Ring buffer is available to write - i2s_driver_config_t config = { - .mode = (i2s_mode_t) (this_speaker->i2s_mode_ | I2S_MODE_TX), - .sample_rate = this_speaker->sample_rate_, - .bits_per_sample = this_speaker->bits_per_sample_, - .channel_format = this_speaker->channel_, - .communication_format = this_speaker->i2s_comm_fmt_, - .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1, - .dma_buf_count = 8, - .dma_buf_len = 256, - .use_apll = this_speaker->use_apll_, - .tx_desc_auto_clear = true, - .fixed_mclk = 0, - .mclk_multiple = I2S_MCLK_MULTIPLE_256, - .bits_per_chan = this_speaker->bits_per_channel_, - }; -#if SOC_I2S_SUPPORTS_DAC - if (this_speaker->internal_dac_mode_ != I2S_DAC_CHANNEL_DISABLE) { - config.mode = (i2s_mode_t) (config.mode | I2S_MODE_DAC_BUILT_IN); + // Lock the ring buffer, write to it, then unlock it + xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::MESSAGE_RING_BUFFER_AVAILABLE_TO_WRITE); + size_t bytes_written = this->audio_ring_buffer_->write_without_replacement((void *) data, length, ticks_to_wait); + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::MESSAGE_RING_BUFFER_AVAILABLE_TO_WRITE); + + return bytes_written; } -#endif - esp_err_t err = i2s_driver_install(this_speaker->parent_->get_port(), &config, 0, nullptr); - if (err != ESP_OK) { - event.type = TaskEventType::WARNING; - event.err = err; - xQueueSend(this_speaker->event_queue_, &event, 0); - event.type = TaskEventType::STOPPED; - xQueueSend(this_speaker->event_queue_, &event, 0); - while (true) { - delay(10); - } + return 0; +} + +bool I2SAudioSpeaker::has_buffered_data() const { + if (this->audio_ring_buffer_ != nullptr) { + return this->audio_ring_buffer_->available() > 0; } + return false; +} -#if SOC_I2S_SUPPORTS_DAC - if (this_speaker->internal_dac_mode_ == I2S_DAC_CHANNEL_DISABLE) { -#endif - i2s_pin_config_t pin_config = this_speaker->parent_->get_pin_config(); - pin_config.data_out_num = this_speaker->dout_pin_; +void I2SAudioSpeaker::speaker_task(void *params) { + I2SAudioSpeaker *this_speaker = (I2SAudioSpeaker *) params; + uint32_t event_group_bits = + xEventGroupWaitBits(this_speaker->event_group_, + SpeakerEventGroupBits::COMMAND_START | SpeakerEventGroupBits::COMMAND_STOP | + SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY, // Bit message to read + pdTRUE, // Clear the bits on exit + pdFALSE, // Don't wait for all the bits, + portMAX_DELAY); // Block indefinitely until a bit is set + + if (event_group_bits & (SpeakerEventGroupBits::COMMAND_STOP | SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY)) { + // Received a stop signal before the task was requested to start + this_speaker->delete_task_(0); + } - i2s_set_pin(this_speaker->parent_->get_port(), &pin_config); -#if SOC_I2S_SUPPORTS_DAC + xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::STATE_STARTING); + + audio::AudioStreamInfo audio_stream_info = this_speaker->audio_stream_info_; + const ssize_t bytes_per_sample = audio_stream_info.get_bytes_per_sample(); + const uint8_t number_of_channels = audio_stream_info.channels; + + const size_t dma_buffers_size = FRAMES_IN_ALL_DMA_BUFFERS * bytes_per_sample * number_of_channels; + + if (this_speaker->send_esp_err_to_event_group_( + this_speaker->allocate_buffers_(dma_buffers_size, RING_BUFFER_SAMPLES * bytes_per_sample))) { + // Failed to allocate buffers + xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_NO_MEM); + this_speaker->delete_task_(dma_buffers_size); + } + + if (this_speaker->send_esp_err_to_event_group_(this_speaker->start_i2s_driver_())) { + // Failed to start I2S driver + this_speaker->delete_task_(dma_buffers_size); } else { - i2s_set_dac_mode(this_speaker->internal_dac_mode_); + // Ring buffer is allocated, so indicate its can be written to + xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::MESSAGE_RING_BUFFER_AVAILABLE_TO_WRITE); } -#endif - DataEvent data_event; + if (!this_speaker->send_esp_err_to_event_group_(this_speaker->reconfigure_i2s_stream_info_(audio_stream_info))) { + // Successfully set the I2S stream info, ready to write audio data to the I2S port - event.type = TaskEventType::STARTED; - xQueueSend(this_speaker->event_queue_, &event, portMAX_DELAY); + xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::STATE_RUNNING); - int32_t buffer[BUFFER_SIZE]; + bool stop_gracefully = false; + uint32_t last_data_received_time = millis(); - while (true) { - if (xQueueReceive(this_speaker->buffer_queue_, &data_event, this_speaker->timeout_ / portTICK_PERIOD_MS) != - pdTRUE) { - break; // End of audio from main thread - } - if (data_event.stop) { - // Stop signal from main thread - xQueueReset(this_speaker->buffer_queue_); // Flush queue - break; - } + while ((millis() - last_data_received_time) <= this_speaker->timeout_) { + event_group_bits = xEventGroupGetBits(this_speaker->event_group_); - const uint8_t *data = data_event.data; - size_t remaining = data_event.len; - switch (this_speaker->bits_per_sample_) { - case I2S_BITS_PER_SAMPLE_8BIT: - case I2S_BITS_PER_SAMPLE_16BIT: { - data = convert_data_format(reinterpret_cast(data), reinterpret_cast(buffer), - remaining, this_speaker->channel_ == I2S_CHANNEL_FMT_ALL_LEFT); + if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP) { break; } - case I2S_BITS_PER_SAMPLE_24BIT: - case I2S_BITS_PER_SAMPLE_32BIT: { - data = convert_data_format(reinterpret_cast(data), reinterpret_cast(buffer), - remaining, this_speaker->channel_ == I2S_CHANNEL_FMT_ALL_LEFT); - break; + if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY) { + stop_gracefully = true; } - } - while (remaining != 0) { - size_t bytes_written; - esp_err_t err = - i2s_write(this_speaker->parent_->get_port(), data, remaining, &bytes_written, (32 / portTICK_PERIOD_MS)); - if (err != ESP_OK) { - event = {.type = TaskEventType::WARNING, .err = err}; - if (xQueueSend(this_speaker->event_queue_, &event, 10 / portTICK_PERIOD_MS) != pdTRUE) { - ESP_LOGW(TAG, "Failed to send WARNING event"); + size_t bytes_to_read = dma_buffers_size; + size_t bytes_read = this_speaker->audio_ring_buffer_->read((void *) this_speaker->data_buffer_, bytes_to_read, + pdMS_TO_TICKS(TASK_DELAY_MS)); + + if (bytes_read > 0) { + last_data_received_time = millis(); + size_t bytes_written = 0; + + if ((audio_stream_info.bits_per_sample == 16) && (this_speaker->q15_volume_factor_ < INT16_MAX)) { + // Scale samples by the volume factor in place + q15_multiplication((int16_t *) this_speaker->data_buffer_, (int16_t *) this_speaker->data_buffer_, + bytes_read / sizeof(int16_t), this_speaker->q15_volume_factor_); + } + + if (audio_stream_info.bits_per_sample == (uint8_t) this_speaker->bits_per_sample_) { + i2s_write(this_speaker->parent_->get_port(), this_speaker->data_buffer_, bytes_read, &bytes_written, + portMAX_DELAY); + } else if (audio_stream_info.bits_per_sample < (uint8_t) this_speaker->bits_per_sample_) { + i2s_write_expand(this_speaker->parent_->get_port(), this_speaker->data_buffer_, bytes_read, + audio_stream_info.bits_per_sample, this_speaker->bits_per_sample_, &bytes_written, + portMAX_DELAY); } - continue; + + if (bytes_written != bytes_read) { + xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_SIZE); + } + + } else { + // No data received + + if (stop_gracefully) { + break; + } + + i2s_zero_dma_buffer(this_speaker->parent_->get_port()); } - data += bytes_written; - remaining -= bytes_written; } } - - event.type = TaskEventType::STOPPING; - if (xQueueSend(this_speaker->event_queue_, &event, 10 / portTICK_PERIOD_MS) != pdTRUE) { - ESP_LOGW(TAG, "Failed to send STOPPING event"); - } - i2s_zero_dma_buffer(this_speaker->parent_->get_port()); + xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::STATE_STOPPING); + + i2s_stop(this_speaker->parent_->get_port()); i2s_driver_uninstall(this_speaker->parent_->get_port()); - event.type = TaskEventType::STOPPED; - if (xQueueSend(this_speaker->event_queue_, &event, 10 / portTICK_PERIOD_MS) != pdTRUE) { - ESP_LOGW(TAG, "Failed to send STOPPED event"); + this_speaker->parent_->unlock(); + this_speaker->delete_task_(dma_buffers_size); +} + +void I2SAudioSpeaker::start() { + if (this->is_failed()) + return; + if ((this->state_ == speaker::STATE_STARTING) || (this->state_ == speaker::STATE_RUNNING)) + return; + + if (this->speaker_task_handle_ == nullptr) { + xTaskCreate(I2SAudioSpeaker::speaker_task, "speaker_task", TASK_STACK_SIZE, (void *) this, TASK_PRIORITY, + &this->speaker_task_handle_); } - while (true) { - delay(10); + if (this->speaker_task_handle_ != nullptr) { + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::COMMAND_START); + this->task_created_ = true; + } else { + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_TASK_FAILED_TO_START); } } @@ -203,92 +315,169 @@ void I2SAudioSpeaker::stop_(bool wait_on_empty) { return; if (this->state_ == speaker::STATE_STOPPED) return; - if (this->state_ == speaker::STATE_STARTING) { - this->state_ = speaker::STATE_STOPPED; - return; - } - this->state_ = speaker::STATE_STOPPING; - DataEvent data; - data.stop = true; + if (wait_on_empty) { - xQueueSend(this->buffer_queue_, &data, portMAX_DELAY); + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY); } else { - xQueueSendToFront(this->buffer_queue_, &data, portMAX_DELAY); + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::COMMAND_STOP); } } -void I2SAudioSpeaker::watch_() { - TaskEvent event; - if (xQueueReceive(this->event_queue_, &event, 0) == pdTRUE) { - switch (event.type) { - case TaskEventType::STARTING: - ESP_LOGD(TAG, "Starting I2S Audio Speaker"); - break; - case TaskEventType::STARTED: - ESP_LOGD(TAG, "Started I2S Audio Speaker"); - this->state_ = speaker::STATE_RUNNING; - this->status_clear_warning(); - break; - case TaskEventType::STOPPING: - ESP_LOGD(TAG, "Stopping I2S Audio Speaker"); - break; - case TaskEventType::STOPPED: - this->state_ = speaker::STATE_STOPPED; - vTaskDelete(this->player_task_handle_); - this->task_created_ = false; - this->player_task_handle_ = nullptr; - this->parent_->unlock(); - xQueueReset(this->buffer_queue_); - ESP_LOGD(TAG, "Stopped I2S Audio Speaker"); - break; - case TaskEventType::WARNING: - ESP_LOGW(TAG, "Error writing to I2S: %s", esp_err_to_name(event.err)); - this->status_set_warning(); - break; - } +bool I2SAudioSpeaker::send_esp_err_to_event_group_(esp_err_t err) { + switch (err) { + case ESP_OK: + return false; + case ESP_ERR_INVALID_STATE: + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_STATE); + return true; + case ESP_ERR_INVALID_ARG: + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_ARG); + return true; + case ESP_ERR_INVALID_SIZE: + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_SIZE); + return true; + case ESP_ERR_NO_MEM: + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_NO_MEM); + return true; + default: + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_FAIL); + return true; } } -void I2SAudioSpeaker::loop() { - switch (this->state_) { - case speaker::STATE_STARTING: - this->start_(); - [[fallthrough]]; - case speaker::STATE_RUNNING: - case speaker::STATE_STOPPING: - this->watch_(); - break; - case speaker::STATE_STOPPED: - break; +esp_err_t I2SAudioSpeaker::allocate_buffers_(size_t data_buffer_size, size_t ring_buffer_size) { + if (this->data_buffer_ == nullptr) { + // Allocate data buffer for temporarily storing audio from the ring buffer before writing to the I2S bus + ExternalRAMAllocator allocator(ExternalRAMAllocator::ALLOW_FAILURE); + this->data_buffer_ = allocator.allocate(data_buffer_size); } + + if (this->data_buffer_ == nullptr) { + return ESP_ERR_NO_MEM; + } + + if (this->audio_ring_buffer_ == nullptr) { + // Allocate ring buffer + this->audio_ring_buffer_ = RingBuffer::create(ring_buffer_size); + } + + if (this->audio_ring_buffer_ == nullptr) { + return ESP_ERR_NO_MEM; + } + + return ESP_OK; } -size_t I2SAudioSpeaker::play(const uint8_t *data, size_t length) { - if (this->is_failed()) { - ESP_LOGE(TAG, "Cannot play audio, speaker failed to setup"); - return 0; +esp_err_t I2SAudioSpeaker::start_i2s_driver_() { + if (!this->parent_->try_lock()) { + return ESP_ERR_INVALID_STATE; } - if (this->state_ != speaker::STATE_RUNNING && this->state_ != speaker::STATE_STARTING) { - this->start(); + + i2s_driver_config_t config = { + .mode = (i2s_mode_t) (this->i2s_mode_ | I2S_MODE_TX), + .sample_rate = this->sample_rate_, + .bits_per_sample = this->bits_per_sample_, + .channel_format = this->channel_, + .communication_format = this->i2s_comm_fmt_, + .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1, + .dma_buf_count = DMA_BUFFERS_COUNT, + .dma_buf_len = DMA_BUFFER_SIZE, + .use_apll = this->use_apll_, + .tx_desc_auto_clear = true, + .fixed_mclk = I2S_PIN_NO_CHANGE, + .mclk_multiple = I2S_MCLK_MULTIPLE_256, + .bits_per_chan = this->bits_per_channel_, +#if SOC_I2S_SUPPORTS_TDM + .chan_mask = (i2s_channel_t) (I2S_TDM_ACTIVE_CH0 | I2S_TDM_ACTIVE_CH1), + .total_chan = 2, + .left_align = false, + .big_edin = false, + .bit_order_msb = false, + .skip_msk = false, +#endif + }; +#if SOC_I2S_SUPPORTS_DAC + if (this->internal_dac_mode_ != I2S_DAC_CHANNEL_DISABLE) { + config.mode = (i2s_mode_t) (config.mode | I2S_MODE_DAC_BUILT_IN); } - size_t remaining = length; - size_t index = 0; - while (remaining > 0) { - DataEvent event; - event.stop = false; - size_t to_send_length = std::min(remaining, BUFFER_SIZE); - event.len = to_send_length; - memcpy(event.data, data + index, to_send_length); - if (xQueueSend(this->buffer_queue_, &event, 0) != pdTRUE) { - return index; - } - remaining -= to_send_length; - index += to_send_length; +#endif + + esp_err_t err = i2s_driver_install(this->parent_->get_port(), &config, 0, nullptr); + if (err != ESP_OK) { + // Failed to install the driver, so unlock the I2S port + this->parent_->unlock(); + return err; + } + +#if SOC_I2S_SUPPORTS_DAC + if (this->internal_dac_mode_ == I2S_DAC_CHANNEL_DISABLE) { +#endif + i2s_pin_config_t pin_config = this->parent_->get_pin_config(); + pin_config.data_out_num = this->dout_pin_; + + err = i2s_set_pin(this->parent_->get_port(), &pin_config); +#if SOC_I2S_SUPPORTS_DAC + } else { + i2s_set_dac_mode(this->internal_dac_mode_); } - return index; +#endif + + if (err != ESP_OK) { + // Failed to set the data out pin, so uninstall the driver and unlock the I2S port + i2s_driver_uninstall(this->parent_->get_port()); + this->parent_->unlock(); + } + + return err; } -bool I2SAudioSpeaker::has_buffered_data() const { return uxQueueMessagesWaiting(this->buffer_queue_) > 0; } +esp_err_t I2SAudioSpeaker::reconfigure_i2s_stream_info_(audio::AudioStreamInfo &audio_stream_info) { + if (this->i2s_mode_ & I2S_MODE_MASTER) { + // ESP controls for the the I2S bus, so adjust the sample rate and bits per sample to match the incoming audio + this->sample_rate_ = audio_stream_info.sample_rate; + this->bits_per_sample_ = (i2s_bits_per_sample_t) audio_stream_info.bits_per_sample; + } else if (this->sample_rate_ != audio_stream_info.sample_rate) { + // Can't reconfigure I2S bus, so the sample rate must match the configured value + return ESP_ERR_INVALID_ARG; + } + + if ((i2s_bits_per_sample_t) audio_stream_info.bits_per_sample > this->bits_per_sample_) { + // Currently can't handle the case when the incoming audio has more bits per sample than the configured value + return ESP_ERR_INVALID_ARG; + } + + if (audio_stream_info.channels == 1) { + return i2s_set_clk(this->parent_->get_port(), this->sample_rate_, this->bits_per_sample_, I2S_CHANNEL_MONO); + } else if (audio_stream_info.channels == 2) { + return i2s_set_clk(this->parent_->get_port(), this->sample_rate_, this->bits_per_sample_, I2S_CHANNEL_STEREO); + } + + return ESP_ERR_INVALID_ARG; +} + +void I2SAudioSpeaker::delete_task_(size_t buffer_size) { + if (this->audio_ring_buffer_ != nullptr) { + xEventGroupWaitBits(this->event_group_, + MESSAGE_RING_BUFFER_AVAILABLE_TO_WRITE, // Bit message to read + pdFALSE, // Don't clear the bits on exit + pdTRUE, // Don't wait for all the bits, + portMAX_DELAY); // Block indefinitely until a command bit is set + + this->audio_ring_buffer_.reset(); // Deallocates the ring buffer stored in the unique_ptr + this->audio_ring_buffer_ = nullptr; + } + + if (this->data_buffer_ != nullptr) { + ExternalRAMAllocator allocator(ExternalRAMAllocator::ALLOW_FAILURE); + allocator.deallocate(this->data_buffer_, buffer_size); + this->data_buffer_ = nullptr; + } + + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::STATE_STOPPED); + + this->task_created_ = false; + vTaskDelete(nullptr); +} } // namespace i2s_audio } // namespace esphome diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h index 9d1817c86f13..245f97d1e768 100644 --- a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h +++ b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h @@ -5,38 +5,21 @@ #include "../i2s_audio.h" #include + +#include #include -#include +#include "esphome/components/audio/audio.h" #include "esphome/components/speaker/speaker.h" + #include "esphome/core/component.h" #include "esphome/core/gpio.h" #include "esphome/core/helpers.h" +#include "esphome/core/ring_buffer.h" namespace esphome { namespace i2s_audio { -static const size_t BUFFER_SIZE = 1024; - -enum class TaskEventType : uint8_t { - STARTING = 0, - STARTED, - STOPPING, - STOPPED, - WARNING = 255, -}; - -struct TaskEvent { - TaskEventType type; - esp_err_t err; -}; - -struct DataEvent { - bool stop; - size_t len; - uint8_t data[BUFFER_SIZE]; -}; - class I2SAudioSpeaker : public I2SAudioOut, public speaker::Speaker, public Component { public: float get_setup_priority() const override { return esphome::setup_priority::LATE; } @@ -55,25 +38,89 @@ class I2SAudioSpeaker : public I2SAudioOut, public speaker::Speaker, public Comp void stop() override; void finish() override; - size_t play(const uint8_t *data, size_t length) override; + /// @brief Plays the provided audio data. + /// Starts the speaker task, if necessary. Writes the audio data to the ring buffer. + /// @param data Audio data in the format set by the parent speaker classes ``set_audio_stream_info`` method. + /// @param length The length of the audio data in bytes. + /// @param ticks_to_wait The FreeRTOS ticks to wait before writing as much data as possible to the ring buffer. + /// @return The number of bytes that were actually written to the ring buffer. + size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) override; + size_t play(const uint8_t *data, size_t length) override { return play(data, length, 0); } bool has_buffered_data() const override; + /// @brief Sets the volume of the speaker. It is implemented as a software volume control. + /// Overrides the default setter to convert the floating point volume to a Q15 fixed-point factor. + /// @param volume + void set_volume(float volume) override; + float get_volume() override { return this->volume_; } + protected: - void start_(); + /// @brief Function for the FreeRTOS task handling audio output. + /// After receiving the COMMAND_START signal, allocates space for the buffers, starts the I2S driver, and reads + /// audio from the ring buffer and writes audio to the I2S port. Stops immmiately after receiving the COMMAND_STOP + /// signal and stops only after the ring buffer is empty after receiving the COMMAND_STOP_GRACEFULLY signal. Stops if + /// the ring buffer hasn't read data for more than timeout_ milliseconds. When stopping, it deallocates the buffers, + /// stops the I2S driver, unlocks the I2S port, and deletes the task. It communicates the state and any errors via + /// event_group_. + /// @param params I2SAudioSpeaker component + static void speaker_task(void *params); + + /// @brief Sends a stop command to the speaker task via event_group_. + /// @param wait_on_empty If false, sends the COMMAND_STOP signal. If true, sends the COMMAND_STOP_GRACEFULLY signal. void stop_(bool wait_on_empty); - void watch_(); - - static void player_task(void *params); - TaskHandle_t player_task_handle_{nullptr}; - QueueHandle_t buffer_queue_; - QueueHandle_t event_queue_; + /// @brief Sets the corresponding ERR_ESP event group bits. + /// @param err esp_err_t error code. + /// @return True if an ERR_ESP bit is set and false if err == ESP_OK + bool send_esp_err_to_event_group_(esp_err_t err); + + /// @brief Allocates the data buffer and ring buffer + /// @param data_buffer_size Number of bytes to allocate for the data buffer. + /// @param ring_buffer_size Number of bytes to allocate for the ring buffer. + /// @return ESP_ERR_NO_MEM if either buffer fails to allocate + /// ESP_OK if successful + esp_err_t allocate_buffers_(size_t data_buffer_size, size_t ring_buffer_size); + + /// @brief Starts the ESP32 I2S driver. + /// Attempts to lock the I2S port, starts the I2S driver, and sets the data out pin. If it fails, it will unlock + /// the I2S port and uninstall the driver, if necessary. + /// @return ESP_ERR_INVALID_STATE if the I2S port is already locked. + /// ESP_ERR_INVALID_ARG if installing the driver or setting the data out pin fails due to a parameter error. + /// ESP_ERR_NO_MEM if the driver fails to install due to a memory allocation error. + /// ESP_FAIL if setting the data out pin fails due to an IO error + /// ESP_OK if successful + esp_err_t start_i2s_driver_(); + + /// @brief Adjusts the I2S driver configuration to match the incoming audio stream. + /// Modifies I2S driver's sample rate, bits per sample, and number of channel settings. If the I2S is in secondary + /// mode, it only modifies the number of channels. + /// @param audio_stream_info Describes the incoming audio stream + /// @return ESP_ERR_INVALID_ARG if there is a parameter error, if there is more than 2 channels in the stream, or if + /// the audio settings are incompatible with the configuration. + /// ESP_ERR_NO_MEM if the driver fails to reconfigure due to a memory allocation error. + /// ESP_OK if successful. + esp_err_t reconfigure_i2s_stream_info_(audio::AudioStreamInfo &audio_stream_info); + + /// @brief Deletes the speaker's task. + /// Deallocates the data_buffer_ and audio_ring_buffer_, if necessary, and deletes the task. Should only be called by + /// the speaker_task itself. + /// @param buffer_size The allocated size of the data_buffer_. + void delete_task_(size_t buffer_size); + + TaskHandle_t speaker_task_handle_{nullptr}; + EventGroupHandle_t event_group_{nullptr}; + + uint8_t *data_buffer_; + std::unique_ptr audio_ring_buffer_; + + uint32_t timeout_; + uint8_t dout_pin_; - uint32_t timeout_{0}; - uint8_t dout_pin_{0}; bool task_created_{false}; + int16_t q15_volume_factor_{INT16_MAX}; + #if SOC_I2S_SUPPORTS_DAC i2s_dac_mode_t internal_dac_mode_{I2S_DAC_CHANNEL_DISABLE}; #endif diff --git a/esphome/components/speaker/__init__.py b/esphome/components/speaker/__init__.py index d28b726d1fd1..1bbc0b02ef51 100644 --- a/esphome/components/speaker/__init__.py +++ b/esphome/components/speaker/__init__.py @@ -2,7 +2,7 @@ from esphome.automation import maybe_simple_id import esphome.codegen as cg import esphome.config_validation as cv -from esphome.const import CONF_DATA, CONF_ID +from esphome.const import CONF_DATA, CONF_ID, CONF_VOLUME from esphome.core import CORE from esphome.coroutine import coroutine_with_priority @@ -23,6 +23,10 @@ FinishAction = speaker_ns.class_( "FinishAction", automation.Action, cg.Parented.template(Speaker) ) +VolumeSetAction = speaker_ns.class_( + "VolumeSetAction", automation.Action, cg.Parented.template(Speaker) +) + IsPlayingCondition = speaker_ns.class_("IsPlayingCondition", automation.Condition) IsStoppedCondition = speaker_ns.class_("IsStoppedCondition", automation.Condition) @@ -90,6 +94,25 @@ async def speaker_play_action(config, action_id, template_arg, args): )(speaker_action) +@automation.register_action( + "speaker.volume_set", + VolumeSetAction, + cv.maybe_simple_value( + { + cv.GenerateID(): cv.use_id(Speaker), + cv.Required(CONF_VOLUME): cv.templatable(cv.percentage), + }, + key=CONF_VOLUME, + ), +) +async def speaker_volume_set_action(config, action_id, template_arg, args): + var = cg.new_Pvariable(action_id, template_arg) + await cg.register_parented(var, config[CONF_ID]) + volume = await cg.templatable(config[CONF_VOLUME], args, float) + cg.add(var.set_volume(volume)) + return var + + @coroutine_with_priority(100.0) async def to_code(config): cg.add_global(speaker_ns.using) diff --git a/esphome/components/speaker/automation.h b/esphome/components/speaker/automation.h index 2716fe61002e..9efda011f294 100644 --- a/esphome/components/speaker/automation.h +++ b/esphome/components/speaker/automation.h @@ -34,6 +34,11 @@ template class PlayAction : public Action, public Parente std::vector data_static_{}; }; +template class VolumeSetAction : public Action, public Parented { + TEMPLATABLE_VALUE(float, volume) + void play(Ts... x) override { this->parent_->set_volume(this->volume_.value(x...)); } +}; + template class StopAction : public Action, public Parented { public: void play(Ts... x) override { this->parent_->stop(); } diff --git a/esphome/components/speaker/speaker.h b/esphome/components/speaker/speaker.h index 375ccc4e8c88..9390e4edb72c 100644 --- a/esphome/components/speaker/speaker.h +++ b/esphome/components/speaker/speaker.h @@ -4,6 +4,12 @@ #include #include +#ifdef USE_ESP32 +#include +#endif + +#include "esphome/components/audio/audio.h" + namespace esphome { namespace speaker { @@ -16,14 +22,33 @@ enum State : uint8_t { class Speaker { public: +#ifdef USE_ESP32 + /// @brief Plays the provided audio data. + /// If the speaker component doesn't implement this method, it falls back to the play method without this parameter. + /// @param data Audio data in the format specified by ``set_audio_stream_info`` method. + /// @param length The length of the audio data in bytes. + /// @param ticks_to_wait The FreeRTOS ticks to wait before writing as much data as possible to the ring buffer. + /// @return The number of bytes that were actually written to the speaker's internal buffer. + virtual size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) { + return this->play(data, length); + }; +#endif + + /// @brief Plays the provided audio data. + /// If the audio stream is not the default defined in "esphome/core/audio.h" and the speaker component implements it, + /// then this should be called after calling ``set_audio_stream_info``. + /// @param data Audio data in the format specified by ``set_audio_stream_info`` method. + /// @param length The length of the audio data in bytes. + /// @return The number of bytes that were actually written to the speaker's internal buffer. virtual size_t play(const uint8_t *data, size_t length) = 0; + size_t play(const std::vector &data) { return this->play(data.data(), data.size()); } virtual void start() = 0; virtual void stop() = 0; // In compare between *STOP()* and *FINISH()*; *FINISH()* will stop after emptying the play buffer, // while *STOP()* will break directly. - // When finish() is not implemented on the plateform component it should just do a normal stop. + // When finish() is not implemented on the platform component it should just do a normal stop. virtual void finish() { this->stop(); } virtual bool has_buffered_data() const = 0; @@ -31,8 +56,18 @@ class Speaker { bool is_running() const { return this->state_ == STATE_RUNNING; } bool is_stopped() const { return this->state_ == STATE_STOPPED; } + // Volume control must be implemented by each speaker component, otherwise it will have no effect. + virtual void set_volume(float volume) { this->volume_ = volume; }; + virtual float get_volume() { return this->volume_; } + + void set_audio_stream_info(const audio::AudioStreamInfo &audio_stream_info) { + this->audio_stream_info_ = audio_stream_info; + } + protected: State state_{STATE_STOPPED}; + audio::AudioStreamInfo audio_stream_info_; + float volume_{1.0f}; }; } // namespace speaker diff --git a/tests/components/speaker/test.esp32-ard.yaml b/tests/components/speaker/test.esp32-ard.yaml index ab20f36eb630..9a24d00f68ff 100644 --- a/tests/components/speaker/test.esp32-ard.yaml +++ b/tests/components/speaker/test.esp32-ard.yaml @@ -5,6 +5,7 @@ esphome: condition: speaker.is_stopped then: - speaker.play: [0, 1, 2, 3] + - speaker.volume_set: 0.9 - if: condition: speaker.is_playing then: diff --git a/tests/components/speaker/test.esp32-c3-ard.yaml b/tests/components/speaker/test.esp32-c3-ard.yaml index c966f9daa7cc..f28014337cbd 100644 --- a/tests/components/speaker/test.esp32-c3-ard.yaml +++ b/tests/components/speaker/test.esp32-c3-ard.yaml @@ -5,6 +5,7 @@ esphome: condition: speaker.is_stopped then: - speaker.play: [0, 1, 2, 3] + - speaker.volume_set: 0.9 - if: condition: speaker.is_playing then: diff --git a/tests/components/speaker/test.esp32-c3-idf.yaml b/tests/components/speaker/test.esp32-c3-idf.yaml index c966f9daa7cc..f28014337cbd 100644 --- a/tests/components/speaker/test.esp32-c3-idf.yaml +++ b/tests/components/speaker/test.esp32-c3-idf.yaml @@ -5,6 +5,7 @@ esphome: condition: speaker.is_stopped then: - speaker.play: [0, 1, 2, 3] + - speaker.volume_set: 0.9 - if: condition: speaker.is_playing then: diff --git a/tests/components/speaker/test.esp32-idf.yaml b/tests/components/speaker/test.esp32-idf.yaml index ab20f36eb630..9a24d00f68ff 100644 --- a/tests/components/speaker/test.esp32-idf.yaml +++ b/tests/components/speaker/test.esp32-idf.yaml @@ -5,6 +5,7 @@ esphome: condition: speaker.is_stopped then: - speaker.play: [0, 1, 2, 3] + - speaker.volume_set: 0.9 - if: condition: speaker.is_playing then: