TT#118800 add support for `channels` mix mode

Change-Id: I00a51c4278ccc8a34c8db271eb08e0a8f48fd28a
4 years ago · a5d2837615
parent e67e395a69
commit a5d2837615
9 changed files with 124 additions and 44 deletions
--- a/recording-daemon/decoder.c
+++ b/recording-daemon/decoder.c
@ -75,11 +75,9 @@ decode_t *decoder_new(const char *payload_str, const char *format, int ptime, ou
 	// mono/stereo mixing goes here: out_format.channels = ...
 	if (outp) {
 		// if this output has been configured already, re-use the same format
-		if (outp->encoder->requested_format.format != -1)
-			out_format = outp->encoder->requested_format;
+		if (outp->requested_format.format != -1)
+			out_format = outp->requested_format;
 		output_config(outp, &out_format, &out_format);
-		// save the returned sample format so we don't output_config() twice
-		outp->encoder->requested_format.format = out_format.format;
 	}

 	str fmtp;
--- a/recording-daemon/main.c
+++ b/recording-daemon/main.c
@ -37,6 +37,7 @@ char *spool_dir = NULL;
 char *output_dir = NULL;
 static char *output_format = NULL;
 int output_mixed;
+enum mix_method mix_method;
 int output_single;
 int output_enabled = 1;
 mode_t output_chmod;
@ -175,6 +176,7 @@ static void options(int *argc, char ***argv) {
 	AUTO_CLEANUP_GBUF(chmod_dir_mode);
 	AUTO_CLEANUP_GBUF(user_uid);
 	AUTO_CLEANUP_GBUF(group_gid);
+	AUTO_CLEANUP_GBUF(mix_method_str);

 	GOptionEntry e[] = {
 		{ "table",		't', 0, G_OPTION_ARG_INT,	&ktable,	"Kernel table rtpengine uses",		"INT"		},
@ -187,6 +189,7 @@ static void options(int *argc, char ***argv) {
 		{ "resample-to",	0,   0, G_OPTION_ARG_INT,	&resample_audio,"Resample all output audio",		"INT"		},
 		{ "mp3-bitrate",	0,   0, G_OPTION_ARG_INT,	&mp3_bitrate,	"Bits per second for MP3 encoding",	"INT"		},
 		{ "output-mixed",	0,   0, G_OPTION_ARG_NONE,	&output_mixed,	"Mix participating sources into a single output",NULL	},
+		{ "mix-method",		0,   0, G_OPTION_ARG_STRING,	&mix_method_str,"How to mix multiple sources",		"direct|channels"},
 		{ "output-single",	0,   0, G_OPTION_ARG_NONE,	&output_single,	"Create one output file for each source",NULL		},
 		{ "output-chmod",	0,   0, G_OPTION_ARG_STRING,	&chmod_mode,	"File mode for recordings",		"OCTAL"		},
 		{ "output-chmod-dir",	0,   0, G_OPTION_ARG_STRING,	&chmod_dir_mode,"Directory mode for recordings",	"OCTAL"		},
@ -246,6 +249,13 @@ static void options(int *argc, char ***argv) {
 	else
 		die("Invalid 'output-storage' option");

+	if (!mix_method_str || !mix_method_str[0] || !strcmp(mix_method_str, "direct"))
+		mix_method = MM_DIRECT;
+	else if (!strcmp(mix_method_str, "channels"))
+		mix_method = MM_CHANNELS;
+	else
+		die("Invalid 'mix-method' option");
+
 	if ((output_storage & OUTPUT_STORAGE_FILE) && !strcmp(output_dir, spool_dir))
 		die("The spool-dir cannot be the same as the output-dir");

--- a/recording-daemon/main.h
+++ b/recording-daemon/main.h
@ -12,6 +12,10 @@ enum output_storage_enum {
 	OUTPUT_STORAGE_DB = 0x2,
 	OUTPUT_STORAGE_BOTH = 0x3,
 };
+enum mix_method {
+	MM_DIRECT = 0,
+	MM_CHANNELS,
+};

 extern int ktable;
 extern int num_threads;
@ -19,6 +23,7 @@ extern enum output_storage_enum output_storage;
 extern char *spool_dir;
 extern char *output_dir;
 extern int output_mixed;
+extern enum mix_method mix_method;
 extern int output_single;
 extern int output_enabled;
 extern mode_t output_chmod;
--- a/recording-daemon/metafile.c
+++ b/recording-daemon/metafile.c
@ -74,6 +74,8 @@ static void meta_stream_interface(metafile_t *mf, unsigned long snum, char *cont
 		pthread_mutex_lock(&mf->mix_lock);
 		if (!mf->mix) {
 			mf->mix_out = output_new(output_dir, mf->parent, "mix");
+			if (mix_method == MM_CHANNELS)
+				mf->mix_out->channel_mult = MIX_NUM_INPUTS;
 			mf->mix = mix_new();
 			db_do_stream(mf, mf->mix_out, "mixed", NULL, 0);
 		}
--- a/recording-daemon/mix.c
+++ b/recording-daemon/mix.c
@ -12,20 +12,21 @@
 #include "log.h"
 #include "output.h"
 #include "resample.h"
+#include "main.h"


-#define NUM_INPUTS 4
-

 struct mix_s {
-	format_t format;
+	format_t in_format,
+		 out_format;

 	AVFilterGraph *graph;
-	AVFilterContext *src_ctxs[NUM_INPUTS];
-	uint64_t pts_offs[NUM_INPUTS]; // initialized at first input seen
-	uint64_t in_pts[NUM_INPUTS]; // running counter of next expected adjusted pts
-	struct timeval last_use[NUM_INPUTS]; // to recycle old mix inputs
-	void *input_ref[NUM_INPUTS]; // to avoid collisions in case of idx re-use
+	AVFilterContext *src_ctxs[MIX_NUM_INPUTS];
+	uint64_t pts_offs[MIX_NUM_INPUTS]; // initialized at first input seen
+	uint64_t in_pts[MIX_NUM_INPUTS]; // running counter of next expected adjusted pts
+	struct timeval last_use[MIX_NUM_INPUTS]; // to recycle old mix inputs
+	void *input_ref[MIX_NUM_INPUTS]; // to avoid collisions in case of idx re-use
+	uint64_t channel_layout[MIX_NUM_INPUTS];
 	AVFilterContext *amix_ctx;
 	AVFilterContext *sink_ctx;
 	unsigned int next_idx;
@ -48,7 +49,7 @@ static void mix_shutdown(mix_t *mix) {
 		avfilter_free(mix->sink_ctx);
 	mix->sink_ctx = NULL;

-	for (unsigned int i = 0; i < NUM_INPUTS; i++) {
+	for (unsigned int i = 0; i < MIX_NUM_INPUTS; i++) {
 		if (mix->src_ctxs[i])
 			avfilter_free(mix->src_ctxs[i]);
 		mix->src_ctxs[i] = NULL;
@ -57,7 +58,8 @@ static void mix_shutdown(mix_t *mix) {
 	resample_shutdown(&mix->resample);
 	avfilter_graph_free(&mix->graph);

-	format_init(&mix->format);
+	format_init(&mix->in_format);
+	format_init(&mix->out_format);
 }


@ -80,7 +82,7 @@ static void mix_input_reset(mix_t *mix, unsigned int idx) {

 unsigned int mix_get_index(mix_t *mix, void *ptr) {
 	unsigned int next = mix->next_idx++;
-	if (next < NUM_INPUTS) {
+	if (next < MIX_NUM_INPUTS) {
 		// must be unused
 		mix->input_ref[next] = ptr;
 		return next;
@ -89,7 +91,7 @@ unsigned int mix_get_index(mix_t *mix, void *ptr) {
 	// too many inputs - find one to re-use
 	struct timeval earliest = {0,};
 	next = 0;
-	for (unsigned int i = 0; i < NUM_INPUTS; i++) {
+	for (unsigned int i = 0; i < MIX_NUM_INPUTS; i++) {
 		if (earliest.tv_sec == 0 || timeval_cmp(&earliest, &mix->last_use[i]) > 0) {
 			next = i;
 			earliest = mix->last_use[i];
@ -107,12 +109,12 @@ int mix_config(mix_t *mix, const format_t *format) {
 	const char *err;
 	char args[512];

-	if (format_eq(format, &mix->format))
+	if (format_eq(format, &mix->in_format))
 		return 0;

 	mix_shutdown(mix);

-	mix->format = *format;
+	mix->in_format = *format;

 	// filter graph
 	err = "failed to alloc filter graph";
@ -123,13 +125,17 @@ int mix_config(mix_t *mix, const format_t *format) {
 	mix->graph->thread_type = 0;

 	// amix
-	err = "no amix filter available";
-	const AVFilter *flt = avfilter_get_by_name("amix");
+	err = "no amix/amerge filter available";
+	const AVFilter *flt = NULL;
+	if (mix_method == MM_DIRECT)
+		flt = avfilter_get_by_name("amix");
+	else if (mix_method == MM_CHANNELS)
+		flt = avfilter_get_by_name("amerge");
 	if (!flt)
 		goto err;

-	snprintf(args, sizeof(args), "inputs=%lu", (unsigned long) NUM_INPUTS);
-	err = "failed to create amix filter context";
+	snprintf(args, sizeof(args), "inputs=%lu", (unsigned long) MIX_NUM_INPUTS);
+	err = "failed to create amix/amerge filter context";
 	if (avfilter_graph_create_filter(&mix->amix_ctx, flt, NULL, args, NULL, mix->graph))
 		goto err;

@ -139,14 +145,27 @@ int mix_config(mix_t *mix, const format_t *format) {
 	if (!flt)
 		goto err;

-	for (unsigned int i = 0; i < NUM_INPUTS; i++) {
+	uint64_t channel_layout = av_get_default_channel_layout(mix->in_format.channels);
+	uint64_t ext_layout = av_get_default_channel_layout(mix->in_format.channels * MIX_NUM_INPUTS);
+
+	for (unsigned int i = 0; i < MIX_NUM_INPUTS; i++) {
 		dbg("init input ctx %i", i);

+		uint64_t ch_layout = channel_layout;
+
+		if (mix_method == MM_CHANNELS) {
+			ch_layout = 0;
+			for (int ch = 0; ch < mix->in_format.channels; ch++)
+				ch_layout |= av_channel_layout_extract_channel(ext_layout,
+						i * mix->in_format.channels + ch);
+		}
+
 		snprintf(args, sizeof(args), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:"
 				"channel_layout=0x%" PRIx64,
-				1, mix->format.clockrate, mix->format.clockrate,
-				av_get_sample_fmt_name(mix->format.format),
-				av_get_default_channel_layout(mix->format.channels));
+				1, mix->in_format.clockrate, mix->in_format.clockrate,
+				av_get_sample_fmt_name(mix->in_format.format),
+				ch_layout);
+		mix->channel_layout[i] = ch_layout;

 		err = "failed to create abuffer filter context";
 		if (avfilter_graph_create_filter(&mix->src_ctxs[i], flt, NULL, args, NULL, mix->graph))
@ -176,6 +195,10 @@ int mix_config(mix_t *mix, const format_t *format) {
 	if (avfilter_graph_config(mix->graph, NULL))
 		goto err;

+	mix->out_format = mix->in_format;
+	if (mix_method == MM_CHANNELS)
+		mix->out_format.channels *= MIX_NUM_INPUTS;
+
 	return 0;

 err:
@ -187,10 +210,11 @@ err:

 mix_t *mix_new() {
 	mix_t *mix = g_slice_alloc0(sizeof(*mix));
-	format_init(&mix->format);
+	format_init(&mix->in_format);
+	format_init(&mix->out_format);
 	mix->sink_frame = av_frame_alloc();

-	for (unsigned int i = 0; i < NUM_INPUTS; i++)
+	for (unsigned int i = 0; i < MIX_NUM_INPUTS; i++)
 		mix->pts_offs[i] = (uint64_t) -1LL;

 	return mix;
@ -198,10 +222,10 @@ mix_t *mix_new() {


 static void mix_silence_fill_idx_upto(mix_t *mix, unsigned int idx, uint64_t upto) {
-	unsigned int silence_samples = mix->format.clockrate / 100;
+	unsigned int silence_samples = mix->in_format.clockrate / 100;

 	while (mix->in_pts[idx] < upto) {
-		if (G_UNLIKELY(upto - mix->in_pts[idx] > mix->format.clockrate * 30)) {
+		if (G_UNLIKELY(upto - mix->in_pts[idx] > mix->in_format.clockrate * 30)) {
 			ilog(LOG_WARN, "More than 30 seconds of silence needed to fill mix buffer, resetting");
 			mix->in_pts[idx] = upto;
 			break;
@ -209,16 +233,16 @@ static void mix_silence_fill_idx_upto(mix_t *mix, unsigned int idx, uint64_t upt

 		if (G_UNLIKELY(!mix->silence_frame)) {
 			mix->silence_frame = av_frame_alloc();
-			mix->silence_frame->format = mix->format.format;
+			mix->silence_frame->format = mix->in_format.format;
 			mix->silence_frame->channel_layout =
-				av_get_default_channel_layout(mix->format.channels);
+				av_get_default_channel_layout(mix->in_format.channels);
 			mix->silence_frame->nb_samples = silence_samples;
-			mix->silence_frame->sample_rate = mix->format.clockrate;
+			mix->silence_frame->sample_rate = mix->in_format.clockrate;
 			if (av_frame_get_buffer(mix->silence_frame, 0) < 0) {
 				ilog(LOG_ERR, "Failed to get silence frame buffers");
 				return;
 			}
-			int planes = av_sample_fmt_is_planar(mix->silence_frame->format) ? mix->format.channels : 1;
+			int planes = av_sample_fmt_is_planar(mix->silence_frame->format) ? mix->in_format.channels : 1;
 			for (int i = 0; i < planes; i++)
 				memset(mix->silence_frame->extended_data[i], 0, mix->silence_frame->linesize[0]);
 		}
@ -231,6 +255,7 @@ static void mix_silence_fill_idx_upto(mix_t *mix, unsigned int idx, uint64_t upt
 		mix->silence_frame->nb_samples = MIN(silence_samples, upto - mix->in_pts[idx]);
 		mix->in_pts[idx] += mix->silence_frame->nb_samples;

+		mix->silence_frame->channel_layout = mix->channel_layout[idx];
 		if (av_buffersrc_write_frame(mix->src_ctxs[idx], mix->silence_frame))
 			ilog(LOG_WARN, "Failed to write silence frame to buffer");
 	}
@ -238,14 +263,14 @@ static void mix_silence_fill_idx_upto(mix_t *mix, unsigned int idx, uint64_t upt


 static void mix_silence_fill(mix_t *mix) {
-	if (mix->out_pts < mix->format.clockrate)
+	if (mix->out_pts < mix->in_format.clockrate)
 		return;

-	for (unsigned int i = 0; i < NUM_INPUTS; i++) {
+	for (unsigned int i = 0; i < MIX_NUM_INPUTS; i++) {
 		// check the pts of each input and give them max 0.5 second of delay.
 		// if they fall behind too much, fill input with silence. otherwise
 		// output stalls and won't produce media
-		mix_silence_fill_idx_upto(mix, i, mix->out_pts - mix->format.clockrate / 2);
+		mix_silence_fill_idx_upto(mix, i, mix->out_pts - mix->in_format.clockrate / 2);
 	}
 }

@ -254,7 +279,7 @@ int mix_add(mix_t *mix, AVFrame *frame, unsigned int idx, void *ptr, output_t *o
 	const char *err;

 	err = "index out of range";
-	if (idx >= NUM_INPUTS)
+	if (idx >= MIX_NUM_INPUTS)
 		goto err;

 	err = "mixer not initialized";
@ -291,6 +316,7 @@ int mix_add(mix_t *mix, AVFrame *frame, unsigned int idx, void *ptr, output_t *o

 	uint64_t next_pts = frame->pts + frame->nb_samples;

+	frame->channel_layout = mix->channel_layout[idx];
 	err = "failed to add frame to mixer";
 	if (av_buffersrc_add_frame(mix->src_ctxs[idx], frame))
 		goto err;
@ -314,7 +340,7 @@ int mix_add(mix_t *mix, AVFrame *frame, unsigned int idx, void *ptr, output_t *o
 			else
 				goto err;
 		}
-		frame = resample_frame(&mix->resample, mix->sink_frame, &mix->format);
+		frame = resample_frame(&mix->resample, mix->sink_frame, &mix->out_format);

 		ret = output_add(output, frame);

--- a/recording-daemon/mix.h
+++ b/recording-daemon/mix.h
@ -5,6 +5,9 @@
 #include <libavutil/frame.h>


+#define MIX_NUM_INPUTS 4
+
+
 mix_t *mix_new(void);
 void mix_destroy(mix_t *mix);

--- a/recording-daemon/output.c
+++ b/recording-daemon/output.c
@ -165,6 +165,9 @@ done:;
 	ret->full_filename = g_strdup_printf("%s/%s", path, f->str);
 	ret->file_format = output_file_format;
 	ret->encoder = encoder_new();
+	ret->channel_mult = 1;
+	ret->requested_format.format = -1;
+	ret->actual_format.format = -1;

 	create_parent_dirs(ret->full_filename);

@ -178,12 +181,16 @@ int output_config(output_t *output, const format_t *requested_format, format_t *
 	const char *err;
 	int av_ret = 0;

+	format_t req_fmt = *requested_format;
+
+	// if we've already done this and don't care about the sample format,
+	// restore the already determined sample format
+	if (req_fmt.format == -1 && output->requested_format.format != -1)
+		req_fmt.format = output->requested_format.format;
+
 	// anything to do?
-	if (G_LIKELY(format_eq(requested_format, &output->encoder->requested_format))) {
-		if (actual_format)
-			*actual_format = output->encoder->actual_format;
+	if (G_LIKELY(format_eq(&req_fmt, &output->requested_format)))
 		goto done;
-	}

 	output_shutdown(output);

@ -196,9 +203,19 @@ int output_config(output_t *output, const format_t *requested_format, format_t *
 	if (!output->fmtctx->oformat)
 		goto err;

-	if (encoder_config(output->encoder, output_codec, mp3_bitrate, 0, requested_format, actual_format))
+	// mask the channel multiplier from external view
+	output->requested_format = *requested_format;
+	req_fmt.channels *= output->channel_mult;
+
+	if (encoder_config(output->encoder, output_codec, mp3_bitrate, 0, &req_fmt, &output->actual_format))
 		goto err;

+	if (output->actual_format.channels == req_fmt.channels)
+		output->actual_format.channels /= output->channel_mult;
+	// save the sample format
+	if (requested_format->format == -1)
+		output->requested_format.format = output->actual_format.format;
+
 	err = "failed to alloc output stream";
 	output->avst = avformat_new_stream(output->fmtctx, output->encoder->u.avc.codec);
 	if (!output->avst)
@ -244,6 +261,8 @@ got_fn:

 	db_config_stream(output);
 done:
+	if (actual_format)
+		*actual_format = output->actual_format;
 	return 0;

 err:
--- a/recording-daemon/rtpengine-recording.pod
+++ b/recording-daemon/rtpengine-recording.pod
@ -235,6 +235,20 @@ stream is produced. Audio mixing takes RTP timestamping into account, so gaps
 and pauses in the RTP media are reflected in the output audio to keep the
 multiple audio sources in sync.

+=item B<--mix-method=>B<direct>|B<channels>
+
+Selects a method to mix multiple audio inputs into a single output file for
+B<mixed> output. The default is B<direct> which directly mixes all audio inputs
+together, producing a mixed output file with the same format as an audio file
+from a single input (B<output-single>) would be.
+
+The B<channels> mixing method puts each audio input into its own audio channel
+in the output file, therefore producing a multi-channel output file. Up to four
+separate RTP SSRCs are supported for a mixed output, which means that if each
+input is mono audio, then the mixed output file would contain 4 audio channels.
+This mixing method requires an output file format which supports these kinds of
+multi-channel audio formats (e.g. B<wav>).
+
 =item B<--output-chmod=>I<INT>

 Change the file permissions of recording files to the given mode. Must be given
--- a/recording-daemon/types.h
+++ b/recording-daemon/types.h
@ -146,10 +146,13 @@ struct output_s {
 		*filename; // path + filename + suffix
 	const char *file_format;
 	unsigned long long db_id;
+	unsigned int channel_mult;

 	AVFormatContext *fmtctx;
 	AVStream *avst;
 	encoder_t *encoder;
+	format_t requested_format,
+		 actual_format;
 };