func_jitterbuffer: Add audio/video sync support.

This change adds support to the JITTERBUFFER dialplan function for audio and video synchronization. When enabled the RTCP SR report is used to produce an NTP timestamp for both the audio and video streams. Using this information the video frames are queued until their NTP timestamp is equal to or behind the NTP timestamp of the audio. The audio jitterbuffer acts as the leader deciding when to shrink/grow the jitterbuffer when adaptive is in use. For both adaptive and fixed the video buffer follows the size of the audio jitterbuffer. ASTERISK-28533 Change-Id: I3fd75160426465e6d46bb2e198c07b9d314a4492
6 years ago · 7298a785ad
parent ac8db871ec
commit 7298a785ad
7 changed files with 237 additions and 24 deletions
--- a/doc/CHANGES-staging/func_jitterbuffer_video.txt
+++ b/doc/CHANGES-staging/func_jitterbuffer_video.txt
@ -0,0 +1,6 @@
+Subject: func_jitterbuffer
+
+The JITTERBUFFER dialplan function now has an option to enable video synchronization
+support. When enabled and used with a compatible channel driver (chan_sip, chan_pjsip)
+the video is buffered according to the size of the audio jitterbuffer and is
+synchronized to the audio.
--- a/funcs/func_jitterbuffer.c
+++ b/funcs/func_jitterbuffer.c
@ -62,8 +62,9 @@
 		</syntax>
 		<description>
 			<para>Jitterbuffers are constructed in two different ways.
-			The first always take three arguments: <replaceable>max_size</replaceable>,
-			<replaceable>resync_threshold</replaceable>, and <replaceable>target_extra</replaceable>.
+			The first always take four arguments: <replaceable>max_size</replaceable>,
+			<replaceable>resync_threshold</replaceable>, <replaceable>target_extra</replaceable>,
+			and <replaceable>sync_video</replaceable>.
 			Alternatively, a single argument of <literal>default</literal> can be provided,
 			which will construct the default jitterbuffer for the given
 			<replaceable>jitterbuffer type</replaceable>.</para>
@ -76,12 +77,17 @@
 			<para>target_extra: This option only affects the adaptive jitterbuffer. It represents
 			the amount time in milliseconds by which the new jitter buffer will pad its size.
 			Defaults to 40ms.</para>
+			<para>sync_video: This option enables video synchronization with the audio stream. It can be
+			turned on and off. Defaults to off.</para>
 			<example title="Fixed with defaults" language="text">
 			exten => 1,1,Set(JITTERBUFFER(fixed)=default)
 			</example>
 			<example title="Fixed with 200ms max size" language="text">
 			exten => 1,1,Set(JITTERBUFFER(fixed)=200)
 			</example>
+			<example title="Fixed with 200ms max size and video sync support" language="text">
+			exten => 1,1,Set(JITTERBUFFER(fixed)=200,,,yes)
+			</example>
 			<example title="Fixed with 200ms max size, resync threshold 1500" language="text">
 			exten => 1,1,Set(JITTERBUFFER(fixed)=200,1500)
 			</example>
@ -91,6 +97,9 @@
 			<example title="Adaptive with 200ms max size, 60ms target extra" language="text">
 			exten => 1,1,Set(JITTERBUFFER(adaptive)=200,,60)
 			</example>
+			<example title="Adaptive with 200ms max size and video sync support" language="text">
+			exten => 1,1,Set(JITTERBUFFER(adaptive)=200,,,yes)
+			</example>
 			<example title="Set a fixed jitterbuffer with defaults; then remove it" language="text">
 			exten => 1,1,Set(JITTERBUFFER(fixed)=default)
 			exten => 1,n,Set(JITTERBUFFER(disabled)=)
@ -133,6 +142,7 @@ static int jb_helper(struct ast_channel *chan, const char *cmd, char *data, cons
 			AST_APP_ARG(max_size);
 			AST_APP_ARG(resync_threshold);
 			AST_APP_ARG(target_extra);
+			AST_APP_ARG(sync_video);
 		);

 		AST_STANDARD_APP_ARGS(args, parse);
@ -151,6 +161,11 @@ static int jb_helper(struct ast_channel *chan, const char *cmd, char *data, cons
 				"jbtargetextra",
 				args.target_extra);
 		}
+		if (!ast_strlen_zero(args.sync_video)) {
+			res |= ast_jb_read_conf(&jb_conf,
+				"jbsyncvideo",
+				args.sync_video);
+		}
 		if (res) {
 			ast_log(LOG_WARNING, "Invalid jitterbuffer parameters %s\n", value);
 		}
--- a/include/asterisk/abstract_jb.h
+++ b/include/asterisk/abstract_jb.h
@ -44,7 +44,8 @@ struct ast_frame;
 enum {
 	AST_JB_ENABLED = (1 << 0),
 	AST_JB_FORCED =  (1 << 1),
-	AST_JB_LOG =     (1 << 2)
+	AST_JB_LOG =     (1 << 2),
+	AST_JB_SYNC_VIDEO =   (1 << 3)
 };

 enum ast_jb_type {
@ -89,6 +90,7 @@ struct ast_jb_conf
 #define AST_JB_CONF_TARGET_EXTRA "targetextra"
 #define AST_JB_CONF_IMPL "impl"
 #define AST_JB_CONF_LOG "log"
+#define AST_JB_CONF_SYNC_VIDEO "syncvideo"

 /* Hooks for the abstract jb implementation */
 /*! \brief Create */
--- a/include/asterisk/rtp_engine.h
+++ b/include/asterisk/rtp_engine.h
@ -2700,6 +2700,17 @@ struct ast_json *ast_rtp_convert_stats_json(const struct ast_rtp_instance_stats
 */
 struct ast_json *ast_rtp_instance_get_stats_all_json(struct ast_rtp_instance *instance);

+/*!
+ * \brief Retrieve the sample rate of a format according to RTP specifications
+ * \since 16.7.0
+ * \since 17.1.0
+ *
+ * \param format The media format
+ *
+ * \retval The sample rate
+ */
+int ast_rtp_get_rate(const struct ast_format *format);
+
 /*!
 * \since 12
 * \brief \ref stasis topic for RTP and RTCP related messages
--- a/main/abstract_jb.c
+++ b/main/abstract_jb.c
@ -41,6 +41,8 @@
 #include "asterisk/utils.h"
 #include "asterisk/pbx.h"
 #include "asterisk/timing.h"
+#include "asterisk/rtp_engine.h"
+#include "asterisk/format_cache.h"

 #include "asterisk/abstract_jb.h"
 #include "fixedjitterbuf.h"
@ -53,6 +55,9 @@ enum {
 	JB_CREATED =              (1 << 2)
 };

+/*! The maximum size we allow the early frame buffer to get */
+#define MAXIMUM_EARLY_FRAME_COUNT 200
+

 /* Implementation functions */
 /* fixed */
@ -568,6 +573,8 @@ int ast_jb_read_conf(struct ast_jb_conf *conf, const char *varname, const char *
 		}
 	} else if (!strcasecmp(name, AST_JB_CONF_LOG)) {
 		ast_set2_flag(conf, ast_true(value), AST_JB_LOG);
+	} else if (!strcasecmp(name, AST_JB_CONF_SYNC_VIDEO)) {
+		ast_set2_flag(conf, ast_true(value), AST_JB_SYNC_VIDEO);
 	} else {
 		return -1;
 	}
@ -832,6 +839,11 @@ static int jb_is_late_adaptive(void *jb, long ts)
 #define DEFAULT_RESYNC  1000
 #define DEFAULT_TYPE AST_JB_FIXED

+struct jb_stream_sync {
+	unsigned int timestamp;
+	struct timeval ntp;
+};
+
 struct jb_framedata {
 	const struct ast_jb_impl *jb_impl;
 	struct ast_jb_conf jb_conf;
@ -841,11 +853,21 @@ struct jb_framedata {
 	int timer_interval; /* ms between deliveries */
 	int timer_fd;
 	int first;
+	int audio_stream_id;
+	struct jb_stream_sync audio_stream_sync;
+	int video_stream_id;
+	struct jb_stream_sync video_stream_sync;
+	AST_LIST_HEAD_NOLOCK(, ast_frame) early_frames;
+	unsigned int early_frame_count;
+	struct timeval last_audio_ntp_timestamp;
+	int audio_flowing;
 	void *jb_obj;
 };

 static void jb_framedata_destroy(struct jb_framedata *framedata)
 {
+	struct ast_frame *frame;
+
 	if (framedata->timer) {
 		ast_timer_close(framedata->timer);
 		framedata->timer = NULL;
@ -859,11 +881,15 @@ static void jb_framedata_destroy(struct jb_framedata *framedata)
 		framedata->jb_obj = NULL;
 	}
 	ao2_cleanup(framedata->last_format);
+	while ((frame = AST_LIST_REMOVE_HEAD(&framedata->early_frames, frame_list))) {
+		ast_frfree(frame);
+	}
 	ast_free(framedata);
 }

 void ast_jb_conf_default(struct ast_jb_conf *conf)
 {
+	ast_clear_flag(conf, AST_FLAGS_ALL);
 	conf->max_size = DEFAULT_SIZE;
 	conf->resync_threshold = DEFAULT_RESYNC;
 	ast_copy_string(conf->impl, "fixed", sizeof(conf->impl));
@ -886,6 +912,44 @@ static void hook_destroy_cb(void *framedata)
 	jb_framedata_destroy((struct jb_framedata *) framedata);
 }

+static struct timeval jitterbuffer_frame_get_ntp_timestamp(const struct jb_stream_sync *stream_sync, const struct ast_frame *frame)
+{
+	int timestamp_diff;
+	unsigned int rate;
+
+	/* It's possible for us to receive frames before we receive the information allowing
+	 * us to do NTP/RTP timestamp calculations. Since the information isn't available we
+	 * can't generate one and give an empty timestamp.
+	 */
+	if (ast_tvzero(stream_sync->ntp)) {
+		return ast_tv(0, 0);
+	}
+
+	/* Convert the Asterisk timestamp into an RTP timestamp, and then based on the difference we can
+	 * determine how many samples are in the frame and how long has elapsed since the synchronization
+	 * RTP and NTP timestamps were received giving us the NTP timestamp for this frame.
+	 */
+	if (frame->frametype == AST_FRAME_VOICE) {
+		rate = ast_rtp_get_rate(frame->subclass.format);
+		timestamp_diff = (frame->ts * (rate / 1000)) - stream_sync->timestamp;
+	} else {
+		/* Video is special - internally we reference it as 1000 to preserve the RTP timestamp but
+		 * it is actualy 90000, this is why we can just directly subtract the timestamp.
+		 */
+		rate = 90000;
+		timestamp_diff = frame->ts - stream_sync->timestamp;
+	}
+
+	if (timestamp_diff < 0) {
+		/* It's possible for us to be asked for an NTP timestamp from before our latest
+		 * RTCP SR report. To handle this we subtract so we go back in time.
+		 */
+		return ast_tvsub(stream_sync->ntp, ast_samp2tv(abs(timestamp_diff), rate));
+	} else {
+		return ast_tvadd(stream_sync->ntp, ast_samp2tv(timestamp_diff, rate));
+	}
+}
+
 static struct ast_frame *hook_event_cb(struct ast_channel *chan, struct ast_frame *frame, enum ast_framehook_event event, void *data)
 {
 	struct jb_framedata *framedata = data;
@ -928,6 +992,77 @@ static struct ast_frame *hook_event_cb(struct ast_channel *chan, struct ast_fram
 		return frame;
 	}

+	if (ast_test_flag(&framedata->jb_conf, AST_JB_SYNC_VIDEO)) {
+		if (frame->frametype == AST_FRAME_VOICE) {
+			/* Store the stream identifier for the audio stream so we can associate the incoming RTCP SR
+			 * with the correct stream sync structure.
+			 */
+			framedata->audio_stream_id = frame->stream_num;
+		} else if (frame->frametype == AST_FRAME_RTCP && frame->subclass.integer == AST_RTP_RTCP_SR) {
+			struct ast_rtp_rtcp_report *rtcp_report = frame->data.ptr;
+			struct jb_stream_sync *stream_sync = NULL;
+
+			/* Determine which stream this RTCP is in regards to */
+			if (framedata->audio_stream_id == frame->stream_num) {
+				stream_sync = &framedata->audio_stream_sync;
+			} else if (framedata->video_stream_id == frame->stream_num) {
+				stream_sync = &framedata->video_stream_sync;
+			}
+
+			if (stream_sync) {
+				/* Store the RTP and NTP timestamp mapping so we can derive an NTP timestamp for each frame */
+				stream_sync->timestamp = rtcp_report->sender_information.rtp_timestamp;
+				stream_sync->ntp = rtcp_report->sender_information.ntp_timestamp;
+			}
+		} else if (frame->frametype == AST_FRAME_VIDEO) {
+			/* If a video frame is late according to the audio timestamp don't stash it away, just return it.
+			 * If however it is ahead then we keep it until such time as the audio catches up.
+			 */
+			struct ast_frame *jbframe;
+
+			framedata->video_stream_id = frame->stream_num;
+
+			/* If no timing information is available we can't store this away, so just let it through now */
+			if (!ast_test_flag(frame, AST_FRFLAG_HAS_TIMING_INFO)) {
+				return frame;
+			}
+
+			/* To ensure that the video starts when the audio starts we only start allowing frames through once
+			 * audio starts flowing.
+			 */
+			if (framedata->audio_flowing) {
+				struct timeval video_timestamp;
+
+				video_timestamp = jitterbuffer_frame_get_ntp_timestamp(&framedata->video_stream_sync, frame);
+				if (ast_tvdiff_ms(framedata->last_audio_ntp_timestamp, video_timestamp) >= 0) {
+					return frame;
+				}
+			}
+
+			/* To prevent the early frame buffer from growing uncontrolled we impose a maximum count that it can
+			 * get to. If this is reached then we drop a video frame, which should cause the receiver to ask for a
+			 * new key frame.
+			 */
+			if (framedata->early_frame_count == MAXIMUM_EARLY_FRAME_COUNT) {
+				jbframe = AST_LIST_REMOVE_HEAD(&framedata->early_frames, frame_list);
+				framedata->early_frame_count--;
+				ast_frfree(jbframe);
+			}
+
+			jbframe = ast_frisolate(frame);
+			if (!jbframe) {
+				/* If we can't isolate the frame the safest thing we can do is return it, even if the A/V sync
+				 * may be off.
+				 */
+				return frame;
+			}
+
+			AST_LIST_INSERT_TAIL(&framedata->early_frames, jbframe, frame_list);
+			framedata->early_frame_count++;
+			return &ast_null_frame;
+		}
+	}
+
 	now_tv = ast_tvnow();
 	now = ast_tvdiff_ms(now_tv, framedata->start_tv);

@ -1022,6 +1157,8 @@ static struct ast_frame *hook_event_cb(struct ast_channel *chan, struct ast_fram
 	}

 	if (frame->frametype == AST_FRAME_CONTROL) {
+		struct ast_frame *early_frame;
+
 		switch(frame->subclass.integer) {
 		case AST_CONTROL_HOLD:
 		case AST_CONTROL_UNHOLD:
@ -1029,12 +1166,50 @@ static struct ast_frame *hook_event_cb(struct ast_channel *chan, struct ast_fram
 		case AST_CONTROL_SRCUPDATE:
 		case AST_CONTROL_SRCCHANGE:
 			framedata->jb_impl->force_resync(framedata->jb_obj);
+			/* Since we are resyncing go ahead and clear out the video frames too */
+			while ((early_frame = AST_LIST_REMOVE_HEAD(&framedata->early_frames, frame_list))) {
+				ast_frfree(early_frame);
+			}
+			framedata->audio_flowing = 0;
+			framedata->early_frame_count = 0;
 			break;
 		default:
 			break;
 		}
 	}

+	/* If a voice frame is being passed through see if we need to add any additional frames to it */
+	if (ast_test_flag(&framedata->jb_conf, AST_JB_SYNC_VIDEO) && frame->frametype == AST_FRAME_VOICE) {
+		AST_LIST_HEAD_NOLOCK(, ast_frame) additional_frames;
+		struct ast_frame *early_frame;
+
+		/* We store the last NTP timestamp for the audio given to the core so that subsequents frames which
+		 * are late can be passed immediately through (this will occur for video frames which are returned here)
+		 */
+		framedata->last_audio_ntp_timestamp = jitterbuffer_frame_get_ntp_timestamp(&framedata->audio_stream_sync, frame);
+		framedata->audio_flowing = 1;
+
+		AST_LIST_HEAD_INIT_NOLOCK(&additional_frames);
+
+		AST_LIST_TRAVERSE_SAFE_BEGIN(&framedata->early_frames, early_frame, frame_list) {
+			struct timeval early_timestamp = jitterbuffer_frame_get_ntp_timestamp(&framedata->video_stream_sync, early_frame);
+			int diff = ast_tvdiff_ms(framedata->last_audio_ntp_timestamp, early_timestamp);
+
+			/* If this frame is from the past we need to include it with the audio frame that is going
+			 * out.
+			 */
+			if (diff >= 0) {
+				AST_LIST_REMOVE_CURRENT(frame_list);
+				framedata->early_frame_count--;
+				AST_LIST_INSERT_TAIL(&additional_frames, early_frame, frame_list);
+			}
+		}
+		AST_LIST_TRAVERSE_SAFE_END;
+
+		/* Append any additional frames we may want to include (such as video) */
+		AST_LIST_NEXT(frame, frame_list) = AST_LIST_FIRST(&additional_frames);
+	}
+
 	return frame;
 }

@ -1066,6 +1241,9 @@ static int jb_framedata_init(struct jb_framedata *framedata, struct ast_jb_conf
 		return -1;
 	}

+	framedata->audio_stream_id = -1;
+	framedata->video_stream_id = -1;
+	AST_LIST_HEAD_INIT_NOLOCK(&framedata->early_frames);
 	framedata->timer_fd = ast_timer_fd(framedata->timer);
 	framedata->timer_interval = DEFAULT_TIMER_INTERVAL;
 	ast_timer_set_rate(framedata->timer, 1000 / framedata->timer_interval);
--- a/main/rtp_engine.c
+++ b/main/rtp_engine.c
@ -3832,3 +3832,12 @@ struct ast_json *ast_rtp_instance_get_stats_all_json(struct ast_rtp_instance *in

 	return ast_rtp_convert_stats_json(&stats);
 }
+
+int ast_rtp_get_rate(const struct ast_format *format)
+{
+	/* For those wondering: due to a fluke in RFC publication, G.722 is advertised
+	 * as having a sample rate of 8kHz, while implementations must know that its
+	 * real rate is 16kHz. Seriously.
+	 */
+        return (ast_format_cmp(format, ast_format_g722) == AST_FORMAT_CMP_EQUAL) ? 8000 : (int)ast_format_get_sample_rate(format);
+}
--- a/res/res_rtp_asterisk.c
+++ b/res/res_rtp_asterisk.c
@ -3138,15 +3138,6 @@ static int rtp_sendto(struct ast_rtp_instance *instance, void *buf, size_t size,
 	return res;
 }

-static int rtp_get_rate(struct ast_format *format)
-{
-	/* For those wondering: due to a fluke in RFC publication, G.722 is advertised
-	 * as having a sample rate of 8kHz, while implementations must know that its
-	 * real rate is 16kHz. Seriously.
-	 */
-	return (ast_format_cmp(format, ast_format_g722) == AST_FORMAT_CMP_EQUAL) ? 8000 : (int)ast_format_get_sample_rate(format);
-}
-
 static unsigned int ast_rtcp_calc_interval(struct ast_rtp *rtp)
 {
 	unsigned int interval;
@ -4030,7 +4021,7 @@ static int ast_rtp_dtmf_end_with_duration(struct ast_rtp_instance *instance, cha

 	rtp->dtmfmute = ast_tvadd(ast_tvnow(), ast_tv(0, 500000));

-	if (duration > 0 && (measured_samples = duration * rtp_get_rate(rtp->f.subclass.format) / 1000) > rtp->send_duration) {
+	if (duration > 0 && (measured_samples = duration * ast_rtp_get_rate(rtp->f.subclass.format) / 1000) > rtp->send_duration) {
 		ast_debug(2, "Adjusting final end duration from %d to %u\n", rtp->send_duration, measured_samples);
 		rtp->send_duration = measured_samples;
 	}
@ -4283,7 +4274,7 @@ static int ast_rtcp_generate_report(struct ast_rtp_instance *instance, unsigned
 		report_block->lost_count.fraction = (fraction_lost & 0xff);
 		report_block->lost_count.packets = (lost_packets & 0xffffff);
 		report_block->highest_seq_no = (rtp->cycles | (rtp->lastrxseqno & 0xffff));
-		report_block->ia_jitter = (unsigned int)(rtp->rxjitter * rtp_get_rate(rtp->f.subclass.format));
+		report_block->ia_jitter = (unsigned int)(rtp->rxjitter * ast_rtp_get_rate(rtp->f.subclass.format));
 		report_block->lsr = rtp->rtcp->themrxlsr;
 		/* If we haven't received an SR report, DLSR should be 0 */
 		if (!ast_tvzero(rtp->rtcp->rxlsr)) {
@ -4365,7 +4356,7 @@ static int ast_rtcp_calculate_sr_rr_statistics(struct ast_rtp_instance *instance
 			ast_verbose("    Fraction lost: %d\n", report_block->lost_count.fraction);
 			ast_verbose("    Cumulative loss: %u\n", report_block->lost_count.packets);
 			ast_verbose("    Highest seq no: %u\n", report_block->highest_seq_no);
-			ast_verbose("    IA jitter: %.4f\n", (double)report_block->ia_jitter / rtp_get_rate(rtp->f.subclass.format));
+			ast_verbose("    IA jitter: %.4f\n", (double)report_block->ia_jitter / ast_rtp_get_rate(rtp->f.subclass.format));
 			ast_verbose("    Their last SR: %u\n", report_block->lsr);
 			ast_verbose("    DLSR: %4.4f (sec)\n\n", (double)(report_block->dlsr / 65536.0));
 		}
@ -4593,7 +4584,7 @@ static int rtp_raw_write(struct ast_rtp_instance *instance, struct ast_frame *fr
 	int pred, mark = 0;
 	unsigned int ms = calc_txstamp(rtp, &frame->delivery);
 	struct ast_sockaddr remote_address = { {0,} };
-	int rate = rtp_get_rate(frame->subclass.format) / 1000;
+	int rate = ast_rtp_get_rate(frame->subclass.format) / 1000;
 	unsigned int seqno;

 	if (ast_format_cmp(frame->subclass.format, ast_format_g722) == AST_FORMAT_CMP_EQUAL) {
@ -5062,7 +5053,7 @@ static void calc_rxstamp(struct timeval *tv, struct ast_rtp *rtp, unsigned int t
 	double d;
 	double dtv;
 	double prog;
-	int rate = rtp_get_rate(rtp->f.subclass.format);
+	int rate = ast_rtp_get_rate(rtp->f.subclass.format);

 	double normdev_rxjitter_current;
 	if ((!rtp->rxcore.tv_sec && !rtp->rxcore.tv_usec) || mark) {
@ -5217,7 +5208,7 @@ static void process_dtmf_rfc2833(struct ast_rtp_instance *instance, unsigned cha
 				rtp->dtmf_duration = new_duration;
 				rtp->resp = resp;
 				f = ast_frdup(create_dtmf_frame(instance, AST_FRAME_DTMF_END, 0));
-				f->len = ast_tvdiff_ms(ast_samp2tv(rtp->dtmf_duration, rtp_get_rate(f->subclass.format)), ast_tv(0, 0));
+				f->len = ast_tvdiff_ms(ast_samp2tv(rtp->dtmf_duration, ast_rtp_get_rate(f->subclass.format)), ast_tv(0, 0));
 				rtp->resp = 0;
 				rtp->dtmf_duration = rtp->dtmf_timeout = 0;
 				AST_LIST_INSERT_TAIL(frames, f, frame_list);
@ -5248,7 +5239,7 @@ static void process_dtmf_rfc2833(struct ast_rtp_instance *instance, unsigned cha
 			if (rtp->resp && rtp->resp != resp) {
 				/* Another digit already began. End it */
 				f = ast_frdup(create_dtmf_frame(instance, AST_FRAME_DTMF_END, 0));
-				f->len = ast_tvdiff_ms(ast_samp2tv(rtp->dtmf_duration, rtp_get_rate(f->subclass.format)), ast_tv(0, 0));
+				f->len = ast_tvdiff_ms(ast_samp2tv(rtp->dtmf_duration, ast_rtp_get_rate(f->subclass.format)), ast_tv(0, 0));
 				rtp->resp = 0;
 				rtp->dtmf_duration = rtp->dtmf_timeout = 0;
 				AST_LIST_INSERT_TAIL(frames, f, frame_list);
@ -5345,10 +5336,10 @@ static struct ast_frame *process_dtmf_cisco(struct ast_rtp_instance *instance, u
 		}
 	} else if ((rtp->resp == resp) && !power) {
 		f = create_dtmf_frame(instance, AST_FRAME_DTMF_END, ast_rtp_instance_get_prop(instance, AST_RTP_PROPERTY_DTMF_COMPENSATE));
-		f->samples = rtp->dtmfsamples * (rtp_get_rate(rtp->lastrxformat) / 1000);
+		f->samples = rtp->dtmfsamples * (ast_rtp_get_rate(rtp->lastrxformat) / 1000);
 		rtp->resp = 0;
 	} else if (rtp->resp == resp) {
-		rtp->dtmfsamples += 20 * (rtp_get_rate(rtp->lastrxformat) / 1000);
+		rtp->dtmfsamples += 20 * (ast_rtp_get_rate(rtp->lastrxformat) / 1000);
 	}

 	rtp->dtmf_timeout = 0;
@ -6084,6 +6075,7 @@ static struct ast_frame *ast_rtcp_interpret(struct ast_rtp_instance *instance, s
 			transport_rtp->f.delivery.tv_sec = 0;
 			transport_rtp->f.delivery.tv_usec = 0;
 			transport_rtp->f.src = "RTP";
+			transport_rtp->f.stream_num = rtp->stream_num;
 			f = &transport_rtp->f;
 			break;
 		case AST_RTP_RTCP_RTPFB:
@ -6954,7 +6946,7 @@ static struct ast_frame *ast_rtp_interpret(struct ast_rtp_instance *instance, st
 		if (rtp->resp) {
 			struct ast_frame *f;
 			f = create_dtmf_frame(instance, AST_FRAME_DTMF_END, 0);
-			f->len = ast_tvdiff_ms(ast_samp2tv(rtp->dtmf_duration, rtp_get_rate(f->subclass.format)), ast_tv(0, 0));
+			f->len = ast_tvdiff_ms(ast_samp2tv(rtp->dtmf_duration, ast_rtp_get_rate(f->subclass.format)), ast_tv(0, 0));
 			rtp->resp = 0;
 			rtp->dtmf_timeout = rtp->dtmf_duration = 0;
 			AST_LIST_INSERT_TAIL(&frames, f, frame_list);
@ -7038,7 +7030,7 @@ static struct ast_frame *ast_rtp_interpret(struct ast_rtp_instance *instance, st
 		calc_rxstamp(&rtp->f.delivery, rtp, timestamp, mark);
 		/* Add timing data to let ast_generic_bridge() put the frame into a jitterbuf */
 		ast_set_flag(&rtp->f, AST_FRFLAG_HAS_TIMING_INFO);
-		rtp->f.ts = timestamp / (rtp_get_rate(rtp->f.subclass.format) / 1000);
+		rtp->f.ts = timestamp / (ast_rtp_get_rate(rtp->f.subclass.format) / 1000);
 		rtp->f.len = rtp->f.samples / ((ast_format_get_sample_rate(rtp->f.subclass.format) / 1000));
 	} else if (ast_format_get_type(rtp->f.subclass.format) == AST_MEDIA_TYPE_VIDEO) {
 		/* Video -- samples is # of samples vs. 90000 */
@ -7046,7 +7038,7 @@ static struct ast_frame *ast_rtp_interpret(struct ast_rtp_instance *instance, st
 			rtp->lastividtimestamp = timestamp;
 		calc_rxstamp(&rtp->f.delivery, rtp, timestamp, mark);
 		ast_set_flag(&rtp->f, AST_FRFLAG_HAS_TIMING_INFO);
-		rtp->f.ts = timestamp / (rtp_get_rate(rtp->f.subclass.format) / 1000);
+		rtp->f.ts = timestamp / (ast_rtp_get_rate(rtp->f.subclass.format) / 1000);
 		rtp->f.samples = timestamp - rtp->lastividtimestamp;
 		rtp->lastividtimestamp = timestamp;
 		rtp->f.delivery.tv_sec = 0;