TT#118800 add support for `channels` mix mode

Change-Id: I00a51c4278ccc8a34c8db271eb08e0a8f48fd28a
pull/1285/head
Richard Fuchs 4 years ago
parent e67e395a69
commit a5d2837615

@ -75,11 +75,9 @@ decode_t *decoder_new(const char *payload_str, const char *format, int ptime, ou
// mono/stereo mixing goes here: out_format.channels = ...
if (outp) {
// if this output has been configured already, re-use the same format
if (outp->encoder->requested_format.format != -1)
out_format = outp->encoder->requested_format;
if (outp->requested_format.format != -1)
out_format = outp->requested_format;
output_config(outp, &out_format, &out_format);
// save the returned sample format so we don't output_config() twice
outp->encoder->requested_format.format = out_format.format;
}
str fmtp;

@ -37,6 +37,7 @@ char *spool_dir = NULL;
char *output_dir = NULL;
static char *output_format = NULL;
int output_mixed;
enum mix_method mix_method;
int output_single;
int output_enabled = 1;
mode_t output_chmod;
@ -175,6 +176,7 @@ static void options(int *argc, char ***argv) {
AUTO_CLEANUP_GBUF(chmod_dir_mode);
AUTO_CLEANUP_GBUF(user_uid);
AUTO_CLEANUP_GBUF(group_gid);
AUTO_CLEANUP_GBUF(mix_method_str);
GOptionEntry e[] = {
{ "table", 't', 0, G_OPTION_ARG_INT, &ktable, "Kernel table rtpengine uses", "INT" },
@ -187,6 +189,7 @@ static void options(int *argc, char ***argv) {
{ "resample-to", 0, 0, G_OPTION_ARG_INT, &resample_audio,"Resample all output audio", "INT" },
{ "mp3-bitrate", 0, 0, G_OPTION_ARG_INT, &mp3_bitrate, "Bits per second for MP3 encoding", "INT" },
{ "output-mixed", 0, 0, G_OPTION_ARG_NONE, &output_mixed, "Mix participating sources into a single output",NULL },
{ "mix-method", 0, 0, G_OPTION_ARG_STRING, &mix_method_str,"How to mix multiple sources", "direct|channels"},
{ "output-single", 0, 0, G_OPTION_ARG_NONE, &output_single, "Create one output file for each source",NULL },
{ "output-chmod", 0, 0, G_OPTION_ARG_STRING, &chmod_mode, "File mode for recordings", "OCTAL" },
{ "output-chmod-dir", 0, 0, G_OPTION_ARG_STRING, &chmod_dir_mode,"Directory mode for recordings", "OCTAL" },
@ -246,6 +249,13 @@ static void options(int *argc, char ***argv) {
else
die("Invalid 'output-storage' option");
if (!mix_method_str || !mix_method_str[0] || !strcmp(mix_method_str, "direct"))
mix_method = MM_DIRECT;
else if (!strcmp(mix_method_str, "channels"))
mix_method = MM_CHANNELS;
else
die("Invalid 'mix-method' option");
if ((output_storage & OUTPUT_STORAGE_FILE) && !strcmp(output_dir, spool_dir))
die("The spool-dir cannot be the same as the output-dir");

@ -12,6 +12,10 @@ enum output_storage_enum {
OUTPUT_STORAGE_DB = 0x2,
OUTPUT_STORAGE_BOTH = 0x3,
};
enum mix_method {
MM_DIRECT = 0,
MM_CHANNELS,
};
extern int ktable;
extern int num_threads;
@ -19,6 +23,7 @@ extern enum output_storage_enum output_storage;
extern char *spool_dir;
extern char *output_dir;
extern int output_mixed;
extern enum mix_method mix_method;
extern int output_single;
extern int output_enabled;
extern mode_t output_chmod;

@ -74,6 +74,8 @@ static void meta_stream_interface(metafile_t *mf, unsigned long snum, char *cont
pthread_mutex_lock(&mf->mix_lock);
if (!mf->mix) {
mf->mix_out = output_new(output_dir, mf->parent, "mix");
if (mix_method == MM_CHANNELS)
mf->mix_out->channel_mult = MIX_NUM_INPUTS;
mf->mix = mix_new();
db_do_stream(mf, mf->mix_out, "mixed", NULL, 0);
}

@ -12,20 +12,21 @@
#include "log.h"
#include "output.h"
#include "resample.h"
#include "main.h"
#define NUM_INPUTS 4
struct mix_s {
format_t format;
format_t in_format,
out_format;
AVFilterGraph *graph;
AVFilterContext *src_ctxs[NUM_INPUTS];
uint64_t pts_offs[NUM_INPUTS]; // initialized at first input seen
uint64_t in_pts[NUM_INPUTS]; // running counter of next expected adjusted pts
struct timeval last_use[NUM_INPUTS]; // to recycle old mix inputs
void *input_ref[NUM_INPUTS]; // to avoid collisions in case of idx re-use
AVFilterContext *src_ctxs[MIX_NUM_INPUTS];
uint64_t pts_offs[MIX_NUM_INPUTS]; // initialized at first input seen
uint64_t in_pts[MIX_NUM_INPUTS]; // running counter of next expected adjusted pts
struct timeval last_use[MIX_NUM_INPUTS]; // to recycle old mix inputs
void *input_ref[MIX_NUM_INPUTS]; // to avoid collisions in case of idx re-use
uint64_t channel_layout[MIX_NUM_INPUTS];
AVFilterContext *amix_ctx;
AVFilterContext *sink_ctx;
unsigned int next_idx;
@ -48,7 +49,7 @@ static void mix_shutdown(mix_t *mix) {
avfilter_free(mix->sink_ctx);
mix->sink_ctx = NULL;
for (unsigned int i = 0; i < NUM_INPUTS; i++) {
for (unsigned int i = 0; i < MIX_NUM_INPUTS; i++) {
if (mix->src_ctxs[i])
avfilter_free(mix->src_ctxs[i]);
mix->src_ctxs[i] = NULL;
@ -57,7 +58,8 @@ static void mix_shutdown(mix_t *mix) {
resample_shutdown(&mix->resample);
avfilter_graph_free(&mix->graph);
format_init(&mix->format);
format_init(&mix->in_format);
format_init(&mix->out_format);
}
@ -80,7 +82,7 @@ static void mix_input_reset(mix_t *mix, unsigned int idx) {
unsigned int mix_get_index(mix_t *mix, void *ptr) {
unsigned int next = mix->next_idx++;
if (next < NUM_INPUTS) {
if (next < MIX_NUM_INPUTS) {
// must be unused
mix->input_ref[next] = ptr;
return next;
@ -89,7 +91,7 @@ unsigned int mix_get_index(mix_t *mix, void *ptr) {
// too many inputs - find one to re-use
struct timeval earliest = {0,};
next = 0;
for (unsigned int i = 0; i < NUM_INPUTS; i++) {
for (unsigned int i = 0; i < MIX_NUM_INPUTS; i++) {
if (earliest.tv_sec == 0 || timeval_cmp(&earliest, &mix->last_use[i]) > 0) {
next = i;
earliest = mix->last_use[i];
@ -107,12 +109,12 @@ int mix_config(mix_t *mix, const format_t *format) {
const char *err;
char args[512];
if (format_eq(format, &mix->format))
if (format_eq(format, &mix->in_format))
return 0;
mix_shutdown(mix);
mix->format = *format;
mix->in_format = *format;
// filter graph
err = "failed to alloc filter graph";
@ -123,13 +125,17 @@ int mix_config(mix_t *mix, const format_t *format) {
mix->graph->thread_type = 0;
// amix
err = "no amix filter available";
const AVFilter *flt = avfilter_get_by_name("amix");
err = "no amix/amerge filter available";
const AVFilter *flt = NULL;
if (mix_method == MM_DIRECT)
flt = avfilter_get_by_name("amix");
else if (mix_method == MM_CHANNELS)
flt = avfilter_get_by_name("amerge");
if (!flt)
goto err;
snprintf(args, sizeof(args), "inputs=%lu", (unsigned long) NUM_INPUTS);
err = "failed to create amix filter context";
snprintf(args, sizeof(args), "inputs=%lu", (unsigned long) MIX_NUM_INPUTS);
err = "failed to create amix/amerge filter context";
if (avfilter_graph_create_filter(&mix->amix_ctx, flt, NULL, args, NULL, mix->graph))
goto err;
@ -139,14 +145,27 @@ int mix_config(mix_t *mix, const format_t *format) {
if (!flt)
goto err;
for (unsigned int i = 0; i < NUM_INPUTS; i++) {
uint64_t channel_layout = av_get_default_channel_layout(mix->in_format.channels);
uint64_t ext_layout = av_get_default_channel_layout(mix->in_format.channels * MIX_NUM_INPUTS);
for (unsigned int i = 0; i < MIX_NUM_INPUTS; i++) {
dbg("init input ctx %i", i);
uint64_t ch_layout = channel_layout;
if (mix_method == MM_CHANNELS) {
ch_layout = 0;
for (int ch = 0; ch < mix->in_format.channels; ch++)
ch_layout |= av_channel_layout_extract_channel(ext_layout,
i * mix->in_format.channels + ch);
}
snprintf(args, sizeof(args), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:"
"channel_layout=0x%" PRIx64,
1, mix->format.clockrate, mix->format.clockrate,
av_get_sample_fmt_name(mix->format.format),
av_get_default_channel_layout(mix->format.channels));
1, mix->in_format.clockrate, mix->in_format.clockrate,
av_get_sample_fmt_name(mix->in_format.format),
ch_layout);
mix->channel_layout[i] = ch_layout;
err = "failed to create abuffer filter context";
if (avfilter_graph_create_filter(&mix->src_ctxs[i], flt, NULL, args, NULL, mix->graph))
@ -176,6 +195,10 @@ int mix_config(mix_t *mix, const format_t *format) {
if (avfilter_graph_config(mix->graph, NULL))
goto err;
mix->out_format = mix->in_format;
if (mix_method == MM_CHANNELS)
mix->out_format.channels *= MIX_NUM_INPUTS;
return 0;
err:
@ -187,10 +210,11 @@ err:
mix_t *mix_new() {
mix_t *mix = g_slice_alloc0(sizeof(*mix));
format_init(&mix->format);
format_init(&mix->in_format);
format_init(&mix->out_format);
mix->sink_frame = av_frame_alloc();
for (unsigned int i = 0; i < NUM_INPUTS; i++)
for (unsigned int i = 0; i < MIX_NUM_INPUTS; i++)
mix->pts_offs[i] = (uint64_t) -1LL;
return mix;
@ -198,10 +222,10 @@ mix_t *mix_new() {
static void mix_silence_fill_idx_upto(mix_t *mix, unsigned int idx, uint64_t upto) {
unsigned int silence_samples = mix->format.clockrate / 100;
unsigned int silence_samples = mix->in_format.clockrate / 100;
while (mix->in_pts[idx] < upto) {
if (G_UNLIKELY(upto - mix->in_pts[idx] > mix->format.clockrate * 30)) {
if (G_UNLIKELY(upto - mix->in_pts[idx] > mix->in_format.clockrate * 30)) {
ilog(LOG_WARN, "More than 30 seconds of silence needed to fill mix buffer, resetting");
mix->in_pts[idx] = upto;
break;
@ -209,16 +233,16 @@ static void mix_silence_fill_idx_upto(mix_t *mix, unsigned int idx, uint64_t upt
if (G_UNLIKELY(!mix->silence_frame)) {
mix->silence_frame = av_frame_alloc();
mix->silence_frame->format = mix->format.format;
mix->silence_frame->format = mix->in_format.format;
mix->silence_frame->channel_layout =
av_get_default_channel_layout(mix->format.channels);
av_get_default_channel_layout(mix->in_format.channels);
mix->silence_frame->nb_samples = silence_samples;
mix->silence_frame->sample_rate = mix->format.clockrate;
mix->silence_frame->sample_rate = mix->in_format.clockrate;
if (av_frame_get_buffer(mix->silence_frame, 0) < 0) {
ilog(LOG_ERR, "Failed to get silence frame buffers");
return;
}
int planes = av_sample_fmt_is_planar(mix->silence_frame->format) ? mix->format.channels : 1;
int planes = av_sample_fmt_is_planar(mix->silence_frame->format) ? mix->in_format.channels : 1;
for (int i = 0; i < planes; i++)
memset(mix->silence_frame->extended_data[i], 0, mix->silence_frame->linesize[0]);
}
@ -231,6 +255,7 @@ static void mix_silence_fill_idx_upto(mix_t *mix, unsigned int idx, uint64_t upt
mix->silence_frame->nb_samples = MIN(silence_samples, upto - mix->in_pts[idx]);
mix->in_pts[idx] += mix->silence_frame->nb_samples;
mix->silence_frame->channel_layout = mix->channel_layout[idx];
if (av_buffersrc_write_frame(mix->src_ctxs[idx], mix->silence_frame))
ilog(LOG_WARN, "Failed to write silence frame to buffer");
}
@ -238,14 +263,14 @@ static void mix_silence_fill_idx_upto(mix_t *mix, unsigned int idx, uint64_t upt
static void mix_silence_fill(mix_t *mix) {
if (mix->out_pts < mix->format.clockrate)
if (mix->out_pts < mix->in_format.clockrate)
return;
for (unsigned int i = 0; i < NUM_INPUTS; i++) {
for (unsigned int i = 0; i < MIX_NUM_INPUTS; i++) {
// check the pts of each input and give them max 0.5 second of delay.
// if they fall behind too much, fill input with silence. otherwise
// output stalls and won't produce media
mix_silence_fill_idx_upto(mix, i, mix->out_pts - mix->format.clockrate / 2);
mix_silence_fill_idx_upto(mix, i, mix->out_pts - mix->in_format.clockrate / 2);
}
}
@ -254,7 +279,7 @@ int mix_add(mix_t *mix, AVFrame *frame, unsigned int idx, void *ptr, output_t *o
const char *err;
err = "index out of range";
if (idx >= NUM_INPUTS)
if (idx >= MIX_NUM_INPUTS)
goto err;
err = "mixer not initialized";
@ -291,6 +316,7 @@ int mix_add(mix_t *mix, AVFrame *frame, unsigned int idx, void *ptr, output_t *o
uint64_t next_pts = frame->pts + frame->nb_samples;
frame->channel_layout = mix->channel_layout[idx];
err = "failed to add frame to mixer";
if (av_buffersrc_add_frame(mix->src_ctxs[idx], frame))
goto err;
@ -314,7 +340,7 @@ int mix_add(mix_t *mix, AVFrame *frame, unsigned int idx, void *ptr, output_t *o
else
goto err;
}
frame = resample_frame(&mix->resample, mix->sink_frame, &mix->format);
frame = resample_frame(&mix->resample, mix->sink_frame, &mix->out_format);
ret = output_add(output, frame);

@ -5,6 +5,9 @@
#include <libavutil/frame.h>
#define MIX_NUM_INPUTS 4
mix_t *mix_new(void);
void mix_destroy(mix_t *mix);

@ -165,6 +165,9 @@ done:;
ret->full_filename = g_strdup_printf("%s/%s", path, f->str);
ret->file_format = output_file_format;
ret->encoder = encoder_new();
ret->channel_mult = 1;
ret->requested_format.format = -1;
ret->actual_format.format = -1;
create_parent_dirs(ret->full_filename);
@ -178,12 +181,16 @@ int output_config(output_t *output, const format_t *requested_format, format_t *
const char *err;
int av_ret = 0;
format_t req_fmt = *requested_format;
// if we've already done this and don't care about the sample format,
// restore the already determined sample format
if (req_fmt.format == -1 && output->requested_format.format != -1)
req_fmt.format = output->requested_format.format;
// anything to do?
if (G_LIKELY(format_eq(requested_format, &output->encoder->requested_format))) {
if (actual_format)
*actual_format = output->encoder->actual_format;
if (G_LIKELY(format_eq(&req_fmt, &output->requested_format)))
goto done;
}
output_shutdown(output);
@ -196,9 +203,19 @@ int output_config(output_t *output, const format_t *requested_format, format_t *
if (!output->fmtctx->oformat)
goto err;
if (encoder_config(output->encoder, output_codec, mp3_bitrate, 0, requested_format, actual_format))
// mask the channel multiplier from external view
output->requested_format = *requested_format;
req_fmt.channels *= output->channel_mult;
if (encoder_config(output->encoder, output_codec, mp3_bitrate, 0, &req_fmt, &output->actual_format))
goto err;
if (output->actual_format.channels == req_fmt.channels)
output->actual_format.channels /= output->channel_mult;
// save the sample format
if (requested_format->format == -1)
output->requested_format.format = output->actual_format.format;
err = "failed to alloc output stream";
output->avst = avformat_new_stream(output->fmtctx, output->encoder->u.avc.codec);
if (!output->avst)
@ -244,6 +261,8 @@ got_fn:
db_config_stream(output);
done:
if (actual_format)
*actual_format = output->actual_format;
return 0;
err:

@ -235,6 +235,20 @@ stream is produced. Audio mixing takes RTP timestamping into account, so gaps
and pauses in the RTP media are reflected in the output audio to keep the
multiple audio sources in sync.
=item B<--mix-method=>B<direct>|B<channels>
Selects a method to mix multiple audio inputs into a single output file for
B<mixed> output. The default is B<direct> which directly mixes all audio inputs
together, producing a mixed output file with the same format as an audio file
from a single input (B<output-single>) would be.
The B<channels> mixing method puts each audio input into its own audio channel
in the output file, therefore producing a multi-channel output file. Up to four
separate RTP SSRCs are supported for a mixed output, which means that if each
input is mono audio, then the mixed output file would contain 4 audio channels.
This mixing method requires an output file format which supports these kinds of
multi-channel audio formats (e.g. B<wav>).
=item B<--output-chmod=>I<INT>
Change the file permissions of recording files to the given mode. Must be given

@ -146,10 +146,13 @@ struct output_s {
*filename; // path + filename + suffix
const char *file_format;
unsigned long long db_id;
unsigned int channel_mult;
AVFormatContext *fmtctx;
AVStream *avst;
encoder_t *encoder;
format_t requested_format,
actual_format;
};

Loading…
Cancel
Save