Skip to content

Commit

Permalink
Audio pre-roll and loudness measurement support for USAC (#79)
Browse files Browse the repository at this point in the history
Tests done: Smoke test
  • Loading branch information
akshayragir833 authored Mar 6, 2024
1 parent d0702a8 commit 58381f7
Show file tree
Hide file tree
Showing 34 changed files with 1,420 additions and 242 deletions.
1 change: 1 addition & 0 deletions Android.bp
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,7 @@ cc_library_static {
"encoder/ixheaace_hybrid.c",
"encoder/ixheaace_hybrid_init.c",
"encoder/ixheaace_interface.c",
"encoder/ixheaace_loudness_measurement.c",
"encoder/ixheaace_mdct_480.c",
"encoder/ixheaace_mps_bitstream.c",
"encoder/ixheaace_mps_dct.c",
Expand Down
5 changes: 5 additions & 0 deletions README_enc.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ The configuration file for DRC is placed in `encoder\test` directory(impd_drc_co
[-esbr_hq:<esbr_hq_flag>]
[-drc:<drc_flag>]
[-inter_tes_enc:<inter_tes_enc_flag>]
[-rap:<random access interval in ms>]
[-stream_id:<stream identifier>]
where,
<paramfile> is the parameter file with multiple commands
Expand Down Expand Up @@ -101,6 +103,9 @@ where,
<esbr_hq_flag> Valid values are 0 (disable high quality eSBR) and 1 (enable high quality eSBR). Default is 0.
<drc_flag> Valid values are 0 (disable DRC encoding) and 1 (enable DRC encoding). Default is 0.
<inter_tes_enc_flag> Valid values are 0 (disable inter-TES encoding) and 1 (enable inter-TES encoding). Default is 0.
<random access interval in ms> is the time interval between audio preroll frames in ms. It is applicable only for AOT 42. Valid values are -1 (Audio preroll sent only at beginning of file) and greater than 1000 ms. Default is -1.
<stream identifier> It is the stream id used to uniquely identify configuration of a stream within a set of associated streams. It is applicable only for AOT 42. Valid values are 0 to 65535. Any value outside this range is type-casted to a value of unsigned short type. Default is 0.
```
Sample CLI:
```
Expand Down
Binary file modified docs/LIBXAAC-Enc-API.pdf
Binary file not shown.
Binary file modified docs/LIBXAAC-Enc-GSG.pdf
Binary file not shown.
17 changes: 17 additions & 0 deletions encoder/drc_src/impd_drc_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/

#include <string.h>
#include "ixheaac_type_def.h"
#include "ixheaac_error_standards.h"
#include "ixheaace_error_codes.h"
Expand Down Expand Up @@ -311,6 +312,22 @@ IA_ERRORCODE impd_drc_enc_init(VOID *pstr_drc_state, VOID *ptr_drc_scratch,
return err_code;
}

IA_ERRORCODE impd_loudness_info_init(VOID *pstr_drc_state, ia_drc_input_config *pstr_inp_config) {
IA_ERRORCODE err_code = IA_NO_ERROR;
ia_drc_enc_state *pstr_drc_state_local = pstr_drc_state;

iusace_create_bit_buffer(&pstr_drc_state_local->str_bit_buf_cfg_ext,
pstr_drc_state_local->bit_buf_base_cfg_ext,
sizeof(pstr_drc_state_local->bit_buf_base_cfg_ext), 1);

memcpy(&pstr_drc_state_local->str_gain_enc.str_loudness_info_set,
&pstr_inp_config->str_enc_loudness_info_set,
sizeof(ia_drc_loudness_info_set_struct));

err_code = impd_drc_write_measured_loudness_info(pstr_drc_state_local);
return err_code;
}

IA_ERRORCODE impd_drc_enc(VOID *pstr_drc_state, FLOAT32 **pptr_input, UWORD32 inp_offset,
WORD32 *ptr_bits_written, VOID *pstr_scratch) {
LOOPIDX i, j, k;
Expand Down
9 changes: 9 additions & 0 deletions encoder/drc_src/impd_drc_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,17 @@ typedef struct {
ia_drc_uni_drc_gain_ext_struct str_enc_gain_extension;
} ia_drc_input_config;

typedef struct {
ia_drc_enc_params_struct str_enc_params;
ia_drc_uni_drc_config_struct str_uni_drc_config;
ia_drc_loudness_info_set_struct str_enc_loudness_info_set;
ia_drc_uni_drc_gain_ext_struct str_enc_gain_extension;
} ia_drc_internal_config;

IA_ERRORCODE impd_drc_enc_init(VOID *pstr_drc_state, VOID *ptr_drc_scratch,
ia_drc_input_config *pstr_inp_config);

IA_ERRORCODE impd_drc_enc(VOID *pstr_drc_state, FLOAT32 **pptr_input, UWORD32 inp_offset,
WORD32 *ptr_bits_written, VOID *pstr_scratch);

IA_ERRORCODE impd_loudness_info_init(VOID *pstr_drc_state, ia_drc_input_config *pstr_inp_config);
6 changes: 6 additions & 0 deletions encoder/drc_src/impd_drc_enc.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,9 @@ IA_ERRORCODE impd_drc_write_loudness_info_set_extension(
IA_ERRORCODE impd_drc_write_uni_drc_config(ia_drc_enc_state *pstr_drc_state, WORD32 *ptr_bit_cnt);

VOID impd_drc_write_uni_drc_gain(ia_drc_enc_state *pstr_drc_state, WORD32 *ptr_bit_cnt);

IA_ERRORCODE impd_drc_write_measured_loudness_info(ia_drc_enc_state *pstr_drc_state);

IA_ERRORCODE impd_drc_write_loudness_info_set(ia_drc_enc_state *pstr_drc_state,
ia_bit_buf_struct *it_bit_buf,
WORD32 *ptr_bit_cnt);
14 changes: 14 additions & 0 deletions encoder/drc_src/impd_drc_mux.c
Original file line number Diff line number Diff line change
Expand Up @@ -3005,6 +3005,20 @@ IA_ERRORCODE impd_drc_write_uni_drc_config(ia_drc_enc_state *pstr_drc_state,
return err_code;
}

IA_ERRORCODE impd_drc_write_measured_loudness_info(ia_drc_enc_state *pstr_drc_state) {

IA_ERRORCODE err_code = IA_NO_ERROR;
ia_bit_buf_struct *it_bit_buf_lis = &pstr_drc_state->str_bit_buf_cfg_ext;
WORD32 bit_cnt_lis = 0;
err_code = impd_drc_write_loudness_info_set(pstr_drc_state, it_bit_buf_lis, &bit_cnt_lis);
if (err_code & IA_FATAL_ERROR) {
return (err_code);
}
pstr_drc_state->drc_config_ext_data_size_bit = bit_cnt_lis;

return err_code;
}

IA_ERRORCODE impd_drc_enc_initial_gain(const WORD32 gain_coding_profile, FLOAT32 gain_initial,
FLOAT32 *gain_initial_quant, WORD32 *code_size,
WORD32 *code) {
Expand Down
2 changes: 2 additions & 0 deletions encoder/drc_src/impd_drc_uni_drc.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@
#define MIN_METHOD_VALUE (-116.0f)
#define MAX_METHOD_VALUE (121.0f)
#define MAX_FLT_VAL_DB (770.6367883810890080451095799195f)
#define DEFAULT_METHOD_VALUE (-31.0f)
#define DEFAULT_SAMPLE_PEAK_VALUE (-31.0f)

typedef struct {
WORD32 level_estim_k_weighting_type;
Expand Down
9 changes: 9 additions & 0 deletions encoder/iusace_cnst.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,12 @@
#define USACE_MAX_SCR_SIZE (733836)
#define USACE_SCR_STACK (10 * 1024)
#define MAX_USAC_ESBR_BITRATE (96000)

#define MAX_PREROLL_FRAMES (3)
#define MAX_OUTPUT_BYTES_PER_CH (768)
#define MAXIMUM_VALUE_8BIT (255)
#define DEFAULT_RAP_INTERVAL_IN_MS (-1)
#define MIN_RAP_INTERVAL_IN_MS (1000)
#define MAX_PREROLL_CONFIG_SIZE (1024)
#define CC_NUM_PREROLL_FRAMES (1)
#define USAC_FIRST_FRAME_FLAG_DEFAULT_VALUE (1)
14 changes: 14 additions & 0 deletions encoder/iusace_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,13 @@ typedef struct {

#define ID_EXT_ELE_FILL 0
#define ID_EXT_ELE_UNI_DRC 4
#define ID_EXT_ELE_AUDIOPREROLL (3)

#define ID_CONFIG_EXT_FILL 0
#define ID_CONFIG_EXT_DOWNMIX (1)
#define ID_CONFIG_EXT_LOUDNESS_INFO (2)
#define ID_CONFIG_EXT_STREAM_ID (7)
#define CONFIG_EXT_LEN_STREAM_ID (2)
#define NUM_COEFF (1024)

typedef enum {
Expand Down Expand Up @@ -223,6 +226,7 @@ typedef struct {
WORD32 output_channel_pos[BS_MAX_NUM_OUT_CHANNELS];
WORD32 ccfl;
ia_usac_enc_element_config_struct str_usac_element_config[USAC_MAX_ELEMENTS];
UWORD16 stream_identifier;
} ia_usac_config_struct;

typedef struct {
Expand Down Expand Up @@ -272,6 +276,16 @@ typedef struct {
WORD32 drc_frame_size;
ia_drc_input_config str_drc_cfg;
WORD32 use_acelp_only;
WORD32 random_access_interval;
WORD32 preroll_flag;
WORD32 num_preroll_frames;
WORD32 preroll_idx;
WORD32 is_ipf;
WORD32 preroll_frame;
WORD32 is_first_frame;
ia_drc_internal_config str_internal_drc_cfg;
WORD32 use_measured_loudness;
UWORD16 stream_id;
} ia_usac_encoder_config_struct;

typedef struct {
Expand Down
77 changes: 53 additions & 24 deletions encoder/iusace_enc_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,14 @@ IA_ERRORCODE iusace_enc_init(ia_usac_encoder_config_struct *ptr_usac_config,
p_audio_specific_config->channel_configuration = ptr_usac_config->channels;
}
}

ia_usac_enc_element_config_struct *pstr_usac_elem_config =
&(pstr_asc_usac_config->str_usac_element_config[pstr_asc_usac_config->num_elements]);
pstr_asc_usac_config->usac_element_type[pstr_asc_usac_config->num_elements] = ID_USAC_EXT;
pstr_usac_elem_config->usac_ext_ele_type = ID_EXT_ELE_AUDIOPREROLL;
pstr_usac_elem_config->usac_ext_ele_dflt_len_present = 0;
pstr_usac_elem_config->usac_ext_ele_payload_present = 0;
pstr_usac_elem_config->usac_ext_ele_cfg_len = 0;
pstr_asc_usac_config->num_elements++;
// DRC Config
if (ptr_usac_config->use_drc_element) {
ptr_usac_config->str_drc_cfg.str_uni_drc_config.str_channel_layout.base_ch_count =
Expand All @@ -637,26 +644,40 @@ IA_ERRORCODE iusace_enc_init(ia_usac_encoder_config_struct *ptr_usac_config,

if (ptr_usac_config->use_drc_element) {
ia_usac_enc_element_config_struct *pstr_usac_elem_config =
&(pstr_asc_usac_config->str_usac_element_config[pstr_asc_usac_config->num_elements]);
&(pstr_asc_usac_config->str_usac_element_config[pstr_asc_usac_config->num_elements]);
pstr_asc_usac_config->usac_element_type[pstr_asc_usac_config->num_elements] = ID_USAC_EXT;
pstr_usac_elem_config->usac_ext_ele_type = ID_EXT_ELE_UNI_DRC;
pstr_usac_elem_config->usac_ext_ele_dflt_len_present = 0;
pstr_usac_elem_config->usac_ext_ele_payload_present = 0;
pstr_usac_elem_config->drc_config_data = usac_data->str_drc_state.bit_buf_base_cfg;
pstr_usac_elem_config->usac_ext_ele_cfg_len =
(usac_data->str_drc_state.drc_config_data_size_bit + 7) >> 3;
(usac_data->str_drc_state.drc_config_data_size_bit + 7) >> 3;
pstr_asc_usac_config->num_elements++;
}
} else {
err_code =
impd_loudness_info_init(&usac_data->str_drc_state, &ptr_usac_config->str_drc_cfg);
if (err_code) {
return err_code;
}
}
if (ptr_usac_config->use_drc_element) // For Loudness
{
pstr_asc_usac_config->usac_config_ext_type[pstr_asc_usac_config->num_config_extensions] =
ID_CONFIG_EXT_LOUDNESS_INFO;
pstr_asc_usac_config->usac_config_ext_len[pstr_asc_usac_config->num_config_extensions] =
(usac_data->str_drc_state.drc_config_ext_data_size_bit + 7) >> 3;
pstr_asc_usac_config->usac_config_ext_buf[pstr_asc_usac_config->num_config_extensions] =
usac_data->str_drc_state.bit_buf_base_cfg_ext;
pstr_asc_usac_config->num_config_extensions++;

pstr_asc_usac_config->usac_config_ext_type[pstr_asc_usac_config->num_config_extensions] =
ID_CONFIG_EXT_STREAM_ID;
pstr_asc_usac_config->usac_config_ext_len[pstr_asc_usac_config->num_config_extensions] =
CONFIG_EXT_LEN_STREAM_ID;
pstr_asc_usac_config->num_config_extensions++;
pstr_asc_usac_config->stream_identifier = ptr_usac_config->stream_id;

pstr_asc_usac_config->usac_config_ext_type[pstr_asc_usac_config->num_config_extensions] =
ID_CONFIG_EXT_LOUDNESS_INFO;
pstr_asc_usac_config->usac_config_ext_len[pstr_asc_usac_config->num_config_extensions] =
(usac_data->str_drc_state.drc_config_ext_data_size_bit + 7) >> 3;
pstr_asc_usac_config->usac_config_ext_buf[pstr_asc_usac_config->num_config_extensions] =
usac_data->str_drc_state.bit_buf_base_cfg_ext;
pstr_asc_usac_config->num_config_extensions++;

if (pstr_asc_usac_config->num_config_extensions) {
pstr_asc_usac_config->usac_cfg_ext_present = 1;
}

Expand Down Expand Up @@ -778,7 +799,7 @@ IA_ERRORCODE iusace_enc_init(ia_usac_encoder_config_struct *ptr_usac_config,

for (; elem_idx < pstr_asc_usac_config->num_elements; elem_idx++) {
idx = elem_idx - pstr_asc_usac_config->num_ext_elements;
pstr_asc_usac_config->str_usac_element_config[idx].noise_filling =
pstr_asc_usac_config->str_usac_element_config[elem_idx].noise_filling =
usac_data->noise_filling[idx];
usac_data->channel_elem_type[idx] = pstr_asc_usac_config->usac_element_type[elem_idx];
}
Expand All @@ -792,6 +813,8 @@ IA_ERRORCODE iusace_enc_init(ia_usac_encoder_config_struct *ptr_usac_config,
pstr_usac_elem_config->usac_ext_ele_dflt_len_present = 0;
pstr_usac_elem_config->usac_ext_ele_payload_present = 0;
pstr_asc_usac_config->num_elements++;
ptr_usac_config->num_ext_elements++;
ptr_usac_config->num_elements++;
}

if (ptr_usac_config->codec_mode == USAC_SWITCHED) {
Expand Down Expand Up @@ -1045,7 +1068,7 @@ IA_ERRORCODE ixheaace_usac_encode(FLOAT32 **ptr_input,
WORD32 len_next_high_rate;
WORD8 elem_idx, nr_core_coder_channels = 0, chn = 0;
WORD32 ch_offset = 0;
WORD32 elem_idx_max = ptr_usac_config->num_elements;
WORD32 elem_idx_max = ptr_usac_config->num_elements - ptr_usac_config->num_ext_elements;
WORD32 td_buffer_offset = (TD_BUFFER_OFFSET * ptr_usac_config->ccfl) / FRAME_LEN_LONG;
usac_independency_flg = ptr_usac_data->usac_independency_flag;

Expand All @@ -1063,15 +1086,16 @@ IA_ERRORCODE ixheaace_usac_encode(FLOAT32 **ptr_input,
ptr_usac_data->min_bits_needed = 0;
}

if (ptr_usac_config->use_drc_element == 1) {
elem_idx_max -= 1;
}

num_bits = 0;

iusace_write_bits_buf(pstr_it_bit_buff, usac_independency_flg, 1);
num_bits++;

if (ptr_usac_config->preroll_flag) {
if (ptr_usac_config->iframes_interval != ptr_usac_config->num_preroll_frames) {
iusace_write_bits_buf(pstr_it_bit_buff, usac_independency_flg, 1);
num_bits++;
}
} else {
iusace_write_bits_buf(pstr_it_bit_buff, usac_independency_flg, 1);
num_bits++;
}
for (elem_idx = 0; elem_idx < elem_idx_max; elem_idx++) {
switch (ptr_usac_data->channel_elem_type[elem_idx]) {
case USAC_ELEMENT_TYPE_SCE:
Expand Down Expand Up @@ -1289,7 +1313,12 @@ IA_ERRORCODE ixheaace_usac_encode(FLOAT32 **ptr_input,
}
}
}

if (ptr_usac_config->preroll_flag) {
if (ptr_usac_config->iframes_interval != ptr_usac_config->num_preroll_frames) {
iusace_write_bits_buf(pstr_it_bit_buff, 0, 1); // extension element present
num_bits++;
}
}
if (ptr_usac_config->use_drc_element) {
WORD32 num_bits_ext_elem = 0;
err = iusace_enc_ext_elemts(ID_EXT_ELE_UNI_DRC, ptr_usac_config, pstr_state, pstr_asc,
Expand Down Expand Up @@ -1424,4 +1453,4 @@ IA_ERRORCODE ixheaace_usac_encode(FLOAT32 **ptr_input,
}

return err;
}
}
3 changes: 3 additions & 0 deletions encoder/iusace_main.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ typedef struct {
WORD32 min_bits_needed;
WORD32 num_drc_bits;
WORD32 use_acelp_only;
WORD32 prev_out_bytes[MAX_PREROLL_FRAMES];
UWORD8 prev_out_data[MAX_PREROLL_FRAMES][MAX_OUTPUT_BYTES_PER_CH * IXHEAACE_MAX_CH_IN_BS_ELE];
UWORD32 stereo_config_index;
} ia_usac_data_struct;

typedef struct {
Expand Down
6 changes: 6 additions & 0 deletions encoder/iusace_psy_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@ static VOID iusace_sfb_init(WORD32 sample_rate, WORD32 block_type, WORD32 *ptr_s
case 8000:
ptr_sfb_params = pstr_sfb_info_tbls[0].cb_offset_long;
break;
default:
ptr_sfb_params = pstr_sfb_info_tbls[8].cb_offset_long;
break;
}
} else {
block_len = ccfl >> 3;
Expand Down Expand Up @@ -202,6 +205,9 @@ static VOID iusace_sfb_init(WORD32 sample_rate, WORD32 block_type, WORD32 *ptr_s
case 8000:
ptr_sfb_params = pstr_sfb_info_tbls[0].cb_offset_short;
break;
default:
ptr_sfb_params = pstr_sfb_info_tbls[8].cb_offset_short;
break;
}
}

Expand Down
2 changes: 1 addition & 1 deletion encoder/iusace_write_bitstream.c
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ WORD32 iusace_write_cpe(ia_sfb_params_struct *pstr_sfb_prms, ia_bit_buf_struct *
pstr_sfb_prms->num_window_groups[ch], pstr_sfb_prms->max_sfb_ste);

{
if (ms_mask == 3) {
if ((ms_mask == 3) && (pstr_usac_data->stereo_config_index == 0)) {
bit_count += iusace_write_cplx_pred_data(
it_bit_buf, pstr_sfb_prms->num_window_groups[ch], pstr_sfb_prms->max_sfb_ste,
pstr_usac_data->complex_coef[ch], pstr_usac_data->pred_coef_re[ch],
Expand Down
Loading

0 comments on commit 58381f7

Please sign in to comment.