diff --git a/daemon/.ycm_extra_conf.py b/daemon/.ycm_extra_conf.py
index c2fe8c69e..eb01d3643 100644
--- a/daemon/.ycm_extra_conf.py
+++ b/daemon/.ycm_extra_conf.py
@@ -46,6 +46,7 @@ flags = [
     '-DWITH_TRANSCODING',
     '-DHAVE_BCG729',
     '-DHAVE_MQTT',
+    '-DHAVE_CUDECS',
     '-D__csh_lookup(x)=str_hash(x)',
     '-DCSH_LOOKUP(x)=' + csh_lookup_str,
     '-O2',
diff --git a/daemon/Makefile b/daemon/Makefile
index bb57894fd..636c0dce0 100644
--- a/daemon/Makefile
+++ b/daemon/Makefile
@@ -75,6 +75,7 @@ endif
 
 ifeq ($(with_transcoding),yes)
 include ../lib/g729.Makefile
+include ../lib/cudecs.Makefile
 endif
 
 include ../lib/mqtt.Makefile
diff --git a/daemon/codec.c b/daemon/codec.c
index 39d0b101d..edfac2cd6 100644
--- a/daemon/codec.c
+++ b/daemon/codec.c
@@ -173,6 +173,7 @@ struct codec_ssrc_handler {
 	struct codec_handler *handler;
 	decoder_t *decoder;
 	encoder_t *encoder;
+	codec_chain_t *chain;
 	format_t encoder_format;
 	int bitrate;
 	int ptime;
@@ -3523,6 +3524,8 @@ static void __delay_buffer_free(void *p) {
 	mutex_destroy(&dbuf->lock);
 }
 static void __dtx_setup(struct codec_ssrc_handler *ch) {
+	if (!ch->decoder)
+		return;
 	if (!decoder_has_dtx(ch->decoder))
 		return;
 
@@ -3765,6 +3768,21 @@ static struct ssrc_entry *__ssrc_handler_transcode_new(void *p) {
 		.channels = h->dest_pt.channels,
 		.format = -1,
 	};
+
+	// see if there's a complete codec chain usable for this
+	if (!h->pcm_dtmf_detect)
+		ch->chain = codec_chain_new(h->source_pt.codec_def, &dec_format,
+				h->dest_pt.codec_def, &enc_format,
+				ch->bitrate, ch->ptime);
+
+	if (ch->chain) {
+		ilogs(codec, LOG_DEBUG, "Using codec chain to transcode from " STR_FORMAT " to " STR_FORMAT,
+				STR_FMT(&h->source_pt.encoding_with_params),
+				STR_FMT(&h->dest_pt.encoding_with_params));
+
+		return &ch->h;
+	}
+
 	ch->encoder = encoder_new();
 	if (!ch->encoder)
 		goto err;
@@ -4096,10 +4114,20 @@ static int __rtp_decode(struct codec_ssrc_handler *ch, struct codec_ssrc_handler
 		struct transcode_packet *packet, struct media_packet *mp)
 {
 	int ret = 0;
-	if (packet)
-		ret = decoder_input_data_ptime(ch->decoder, packet->payload, packet->ts, &mp->ptime,
-				ch->handler->packet_decoded,
-				ch, mp);
+	if (packet) {
+		if (ch->chain) {
+			static const struct fraction chain_fact = {1,1};
+			AVPacket *pkt = codec_chain_input_data(ch->chain, packet->payload, packet->ts);
+			assert(pkt != NULL);
+			packet_encoded_packetize(pkt, ch, mp, packetizer_passthrough, NULL, &chain_fact,
+					packet_encoded_tx);
+			av_packet_unref(pkt);
+		}
+		else
+			ret = decoder_input_data_ptime(ch->decoder, packet->payload, packet->ts, &mp->ptime,
+					ch->handler->packet_decoded,
+					ch, mp);
+	}
 	__buffer_delay_seq(input_ch->handler->delay_buffer, mp, -1);
 	return ret;
 }
@@ -4111,7 +4139,7 @@ static int packet_decode(struct codec_ssrc_handler *ch, struct codec_ssrc_handle
 	if (!ch->csch.first_ts)
 		ch->csch.first_ts = packet->ts;
 
-	if (ch->decoder->def->dtmf) {
+	if (ch->decoder && ch->decoder->def->dtmf) {
 		if (packet_dtmf_event(ch, input_ch, packet, mp) == -1)
 			goto out;
 	}
diff --git a/lib/.ycm_extra_conf.py b/lib/.ycm_extra_conf.py
index bc17c71a0..79674acb1 100644
--- a/lib/.ycm_extra_conf.py
+++ b/lib/.ycm_extra_conf.py
@@ -42,6 +42,7 @@ flags = [
     '-DRE_PLUGIN_DIR="/usr/lib/rtpengine"',
     '-DWITH_IPTABLES_OPTION',
     '-DHAVE_BCG729',
+    '-DHAVE_CUDECS',
     '-D__csh_lookup(x)=str_hash(x)',
     '-DCSH_LOOKUP(x)=' + csh_lookup_str,
      '-O2',
diff --git a/lib/auxlib.c b/lib/auxlib.c
index cbdb72bbf..07f07aa29 100644
--- a/lib/auxlib.c
+++ b/lib/auxlib.c
@@ -215,6 +215,9 @@ void config_load(int *argc, char ***argv, GOptionEntry *app_entries, const char
 		{ "thread-stack",	0,0,	G_OPTION_ARG_INT,	&rtpe_common_config_ptr->thread_stack,	"Thread stack size in kB",		"INT"		},
 		{ "poller-size",	0,0,	G_OPTION_ARG_INT,	&rtpe_common_config_ptr->poller_size,	"Max poller items per iteration",	"INT"		},
 		{ "evs-lib-path",	0,0,	G_OPTION_ARG_FILENAME,	&rtpe_common_config_ptr->evs_lib_path,	"Location of .so for 3GPP EVS codec",	"FILE"		},
+#ifdef HAVE_CUDECS
+		{ "cudecs",		0,0,	G_OPTION_ARG_NONE,	&rtpe_common_config_ptr->cudecs,	"Enable usage of CUDA codecs",		NULL		},
+#endif
 		{ NULL, }
 	};
 #undef ll
diff --git a/lib/auxlib.h b/lib/auxlib.h
index e03439236..ab685ecf5 100644
--- a/lib/auxlib.h
+++ b/lib/auxlib.h
@@ -36,6 +36,7 @@ struct rtpengine_common_config {
 	int poller_size;
 	int max_log_line_length;
 	char *evs_lib_path;
+	int cudecs;
 };
 
 extern struct rtpengine_common_config *rtpe_common_config_ptr;
diff --git a/lib/codeclib.c b/lib/codeclib.c
index 3154a545c..43259fcf0 100644
--- a/lib/codeclib.c
+++ b/lib/codeclib.c
@@ -11,6 +11,11 @@
 #include <bcg729/decoder.h>
 #endif
 #include <opus.h>
+#ifdef HAVE_CUDECS
+#include <cudecs/g711opus.h>
+#include <cudecs/gpu-utils.h>
+#include <cudecs/gpu-opus.h>
+#endif
 #include "str.h"
 #include "log.h"
 #include "loglib.h"
@@ -32,7 +37,6 @@
 
 
 
-static packetizer_f packetizer_passthrough; // pass frames as they arrive in AVPackets
 static packetizer_f packetizer_samplestream; // flat stream of samples
 static packetizer_f packetizer_amr;
 
@@ -139,6 +143,34 @@ static select_encoder_format_f evs_select_encoder_format;
 
 
 
+struct codec_chain_s {
+#ifdef HAVE_CUDECS
+	union {
+		struct {
+			gpu_pcmu2opus_runner *runner;
+			gpu_float2opus *enc;
+		} pcmu2opus;
+		struct {
+			gpu_pcma2opus_runner *runner;
+			gpu_float2opus *enc;
+		} pcma2opus;
+		struct {
+			gpu_opus2pcmu_runner *runner;
+			gpu_opus2float *dec;
+		} opus2pcmu;
+		struct {
+			gpu_opus2pcma_runner *runner;
+			gpu_opus2float *dec;
+		} opus2pcma;
+	} u;
+	AVPacket *avpkt;
+	int (*run)(codec_chain_t *c, const str *data, unsigned long ts, AVPacket *);
+#endif
+};
+
+
+
+
 
 static const codec_type_t codec_type_avcodec = {
 	.def_init = avc_def_init,
@@ -239,6 +271,14 @@ static const codec_type_t codec_type_bcg729 = {
 #endif
 
 
+#ifdef HAVE_CUDECS
+static gpu_pcma2opus_runner *pcma2opus_runner;
+static gpu_pcmu2opus_runner *pcmu2opus_runner;
+static gpu_opus2pcmu_runner *opus2pcmu_runner;
+static gpu_opus2pcma_runner *opus2pcma_runner;
+#endif
+
+
 
 static struct codec_def_s __codec_defs[] = {
 	{
@@ -1223,6 +1263,31 @@ void codeclib_init(int print) {
 	codecs_ht = g_hash_table_new(str_case_hash, str_case_equal);
 	codecs_ht_by_av = g_hash_table_new(g_direct_hash, g_direct_equal);
 
+#ifdef HAVE_CUDECS
+	if (rtpe_common_config_ptr->cudecs) {
+		if (!gpu_init())
+			die("Failed to initialise CUDA codecs");
+
+		pcma2opus_runner = gpu_pcma2opus_runner_new(4, 3000, 160);
+		if (!pcma2opus_runner)
+			die("Failed to initialise GPU pcma2opus");
+
+		pcmu2opus_runner = gpu_pcmu2opus_runner_new(4, 3000, 160);
+		if (!pcmu2opus_runner)
+			die("Failed to initialise GPU pcmu2opus");
+
+		opus2pcmu_runner = gpu_opus2pcmu_runner_new(4, 3000, 160);
+		if (!opus2pcmu_runner)
+			die("Failed to initialise GPU opus2pcmu");
+
+		opus2pcma_runner = gpu_opus2pcma_runner_new(4, 3000, 160);
+		if (!opus2pcma_runner)
+			die("Failed to initialise GPU opus2pcma");
+
+		ilog(LOG_DEBUG, "CUDA codecs initialised");
+	}
+#endif
+
 	for (int i = 0; i < G_N_ELEMENTS(__codec_defs); i++) {
 		// add to hash table
 		struct codec_def_s *def = &__codec_defs[i];
@@ -1771,7 +1836,7 @@ int encoder_input_fifo(encoder_t *enc, AVFrame *frame,
 }
 
 
-static int packetizer_passthrough(AVPacket *pkt, GString *buf, str *output, encoder_t *enc) {
+int packetizer_passthrough(AVPacket *pkt, GString *buf, str *output, encoder_t *enc) {
 	if (!pkt)
 		return -1;
 	if (output->len < pkt->size) {
@@ -4589,3 +4654,171 @@ static void evs_def_init(struct codec_def_s *def) {
 static int evs_dtx(decoder_t *dec, GQueue *out, int ptime) {
 	return 0;
 }
+
+
+
+
+
+
+#ifdef HAVE_CUDECS
+int codec_chain_pcmu2opus_run(codec_chain_t *c, const str *data, unsigned long ts, AVPacket *pkt) {
+	ssize_t ret = gpu_pcmu2opus_runner_do(c->u.pcmu2opus.runner, c->u.pcmu2opus.enc,
+			(unsigned char *) data->s, data->len,
+			pkt->data, pkt->size);
+	assert(ret > 0);
+
+	pkt->size = ret;
+	pkt->duration = data->len * 6L;
+	pkt->pts = ts * 6L;
+
+	return 0;
+}
+
+int codec_chain_pcma2opus_run(codec_chain_t *c, const str *data, unsigned long ts, AVPacket *pkt) {
+	ssize_t ret = gpu_pcma2opus_runner_do(c->u.pcma2opus.runner, c->u.pcma2opus.enc,
+			(unsigned char *) data->s, data->len,
+			pkt->data, pkt->size);
+	assert(ret > 0);
+
+	pkt->size = ret;
+	pkt->duration = data->len * 6L;
+	pkt->pts = ts * 6L;
+
+	return 0;
+}
+
+int codec_chain_opus2pcmu_run(codec_chain_t *c, const str *data, unsigned long ts, AVPacket *pkt) {
+	ssize_t ret = gpu_opus2pcmu_runner_do(c->u.opus2pcmu.runner, c->u.opus2pcmu.dec,
+			(unsigned char *) data->s, data->len,
+			pkt->data, pkt->size);
+	assert(ret > 0);
+
+	pkt->size = ret;
+	pkt->duration = ret;
+	pkt->pts = ts / 6L;
+
+	return 0;
+}
+
+int codec_chain_opus2pcma_run(codec_chain_t *c, const str *data, unsigned long ts, AVPacket *pkt) {
+	ssize_t ret = gpu_opus2pcma_runner_do(c->u.opus2pcma.runner, c->u.opus2pcma.dec,
+			(unsigned char *) data->s, data->len,
+			pkt->data, pkt->size);
+	assert(ret > 0);
+
+	pkt->size = ret;
+	pkt->duration = ret;
+	pkt->pts = ts / 6L;
+
+	return 0;
+}
+#endif
+
+
+
+codec_chain_t *codec_chain_new(codec_def_t *src, format_t *src_format, codec_def_t *dst,
+		format_t *dst_format, int bitrate, int ptime)
+{
+#ifdef HAVE_CUDECS
+	if (!strcmp(dst->rtpname, "opus") && !strcmp(src->rtpname, "PCMA")) {
+		if (src_format->clockrate != 8000)
+			return NULL;
+		if (src_format->channels != 1)
+			return NULL;
+		if (dst_format->channels != 2)
+			return NULL;
+		if (dst_format->clockrate != 48000)
+			return NULL;
+
+		if (!pcma2opus_runner)
+			return NULL;
+
+		codec_chain_t *ret = g_slice_alloc0(sizeof(*ret));
+		ret->u.pcma2opus.enc = gpu_float2opus_new(bitrate);
+		ret->u.pcma2opus.runner = pcma2opus_runner;
+		ret->avpkt = av_packet_alloc();
+		ret->run = codec_chain_pcma2opus_run;
+
+		return ret;
+	}
+	else if (!strcmp(dst->rtpname, "opus") && !strcmp(src->rtpname, "PCMU")) {
+		if (src_format->clockrate != 8000)
+			return NULL;
+		if (src_format->channels != 1)
+			return NULL;
+		if (dst_format->channels != 1)
+			return NULL;
+		if (dst_format->clockrate != 48000)
+			return NULL;
+
+		if (!pcmu2opus_runner)
+			return NULL;
+
+		codec_chain_t *ret = g_slice_alloc0(sizeof(*ret));
+		ret->u.pcmu2opus.enc = gpu_float2opus_new(bitrate);
+		ret->u.pcmu2opus.runner = pcmu2opus_runner;
+		ret->avpkt = av_packet_alloc();
+		ret->run = codec_chain_pcmu2opus_run;
+
+		return ret;
+	}
+	else if (!strcmp(dst->rtpname, "PCMU") && !strcmp(src->rtpname, "opus")) {
+		if (dst_format->clockrate != 8000)
+			return NULL;
+		if (dst_format->channels != 1)
+			return NULL;
+		if (src_format->channels != 1)
+			return NULL;
+		if (src_format->clockrate != 48000)
+			return NULL;
+
+		if (!opus2pcmu_runner)
+			return NULL;
+
+		codec_chain_t *ret = g_slice_alloc0(sizeof(*ret));
+		ret->u.opus2pcmu.dec = gpu_opus2float_new();
+		ret->u.opus2pcmu.runner = opus2pcmu_runner;
+		ret->avpkt = av_packet_alloc();
+		ret->run = codec_chain_opus2pcmu_run;
+
+		return ret;
+	}
+	else if (!strcmp(dst->rtpname, "PCMA") && !strcmp(src->rtpname, "opus")) {
+		if (dst_format->clockrate != 8000)
+			return NULL;
+		if (dst_format->channels != 1)
+			return NULL;
+		if (src_format->channels != 2)
+			return NULL;
+		if (src_format->clockrate != 48000)
+			return NULL;
+
+		if (!opus2pcma_runner)
+			return NULL;
+
+		codec_chain_t *ret = g_slice_alloc0(sizeof(*ret));
+		ret->u.opus2pcma.dec = gpu_opus2float_new();
+		ret->u.opus2pcma.runner = opus2pcma_runner;
+		ret->avpkt = av_packet_alloc();
+		ret->run = codec_chain_opus2pcma_run;
+
+		return ret;
+	}
+#endif
+
+	return NULL;
+}
+
+AVPacket *codec_chain_input_data(codec_chain_t *c, const str *data, unsigned long ts) {
+#ifdef HAVE_CUDECS
+	av_new_packet(c->avpkt, MAX_OPUS_FRAME_SIZE * MAX_OPUS_FRAMES_PER_PACKET + MAX_OPUS_HEADER_SIZE);
+
+	int ret = c->run(c, data, ts, c->avpkt);
+	assert(ret == 0);
+
+	return c->avpkt;
+
+#else
+	return NULL;
+#endif
+}
diff --git a/lib/codeclib.h b/lib/codeclib.h
index 47a1bb8f8..4ee4219ca 100644
--- a/lib/codeclib.h
+++ b/lib/codeclib.h
@@ -95,6 +95,7 @@ typedef struct seq_packet_s seq_packet_t;
 typedef union codec_options_u codec_options_t;
 typedef struct encoder_callback_s encoder_callback_t;
 typedef struct dtx_method_s dtx_method_t;
+typedef struct codec_chain_s codec_chain_t;
 
 typedef int packetizer_f(AVPacket *, GString *, str *, encoder_t *);
 typedef void format_init_f(struct rtp_payload_type *);
@@ -415,6 +416,8 @@ int packet_sequencer_next_ok(packet_sequencer_t *ps);
 void *packet_sequencer_force_next_packet(packet_sequencer_t *ps);
 int packet_sequencer_insert(packet_sequencer_t *ps, seq_packet_t *);
 
+packetizer_f packetizer_passthrough; // pass frames as they arrive in AVPackets
+
 
 void frame_fill_tone_samples(enum AVSampleFormat fmt, void *samples, unsigned int offset, unsigned int num,
 		unsigned int freq, unsigned int volume, unsigned int sample_rate, unsigned int channels);
@@ -422,6 +425,11 @@ void frame_fill_dtmf_samples(enum AVSampleFormat fmt, void *samples, unsigned in
 		unsigned int event, unsigned int volume, unsigned int sample_rate, unsigned int channels);
 
 
+codec_chain_t *codec_chain_new(codec_def_t *src, format_t *src_format, codec_def_t *dst,
+		format_t *dst_format, int bitrate, int ptime);
+AVPacket *codec_chain_input_data(codec_chain_t *c, const str *data, unsigned long ts);
+
+
 #include "auxlib.h"
 
 
diff --git a/lib/cudecs.Makefile b/lib/cudecs.Makefile
new file mode 100644
index 000000000..5d021e691
--- /dev/null
+++ b/lib/cudecs.Makefile
@@ -0,0 +1,9 @@
+ifneq (,$(filter pkg.ngcp-rtpengine.cudecs,${DEB_BUILD_PROFILES}))
+ifneq (,$(wildcard $(CUDECS_HOME)/usr/include/cudecs/g711opus.h))
+CFLAGS+=	-DHAVE_CUDECS -I$(CUDECS_HOME)/usr/include
+LDLIBS+=	-L$(CUDECS_HOME)/usr/lib -lcudecs
+else ifneq (,$(wildcard /usr/include/cudecs/g711opus.h))
+CFLAGS+=	-DHAVE_CUDECS
+LDLIBS+=	-lcudecs
+endif
+endif
diff --git a/perf-tester/Makefile b/perf-tester/Makefile
index c667ce802..b9df02d07 100644
--- a/perf-tester/Makefile
+++ b/perf-tester/Makefile
@@ -37,6 +37,7 @@ LDLIBS += $(shell pkg-config --libs ncursesw)
 LDLIBS += $(shell pkg-config --libs openssl)
 
 include ../lib/g729.Makefile
+include ../lib/cudecs.Makefile
 
 SRCS = main.c log.c
 LIBSRCS = codeclib.strhash.c loglib.c auxlib.c resample.c str.c dtmflib.c rtplib.c poller.c ssllib.c
diff --git a/perf-tester/main.c b/perf-tester/main.c
index f0aed481b..86a5801d5 100644
--- a/perf-tester/main.c
+++ b/perf-tester/main.c
@@ -45,6 +45,7 @@ struct stream {
 	unsigned long long output_ts;
 	decoder_t *decoder;
 	encoder_t *encoder;
+	codec_chain_t *chain;
 	struct testparams in_params;
 	struct testparams out_params;
 	uint fixture_idx;
@@ -325,7 +326,13 @@ static void readable(int fd, void *o, uintptr_t x) {
 
 			str frame;
 			str_init_len(&frame, (char *) data->data, data->size);
-			decoder_input_data(s->decoder, &frame, s->input_ts, got_frame, s, NULL);
+
+			if (!s->chain)
+				decoder_input_data(s->decoder, &frame, s->input_ts, got_frame, s, NULL);
+			else {
+				AVPacket *pkt = codec_chain_input_data(s->chain, &frame, s->input_ts);
+				got_packet_pkt(s, pkt);
+			}
 
 			s->input_ts += data->duration;
 
@@ -437,15 +444,19 @@ static void new_stream_params(
 
 	format_t actual_enc_format;
 
-	s->encoder = encoder_new();
-	int res = encoder_config_fmtp(s->encoder, out_def, bitrate, 20, &dec_format, &enc_format,
-			&actual_enc_format,
-			NULL, NULL, NULL);
-	assert(res == 0); // TODO: handle failures gracefully
+	s->chain = codec_chain_new(in_def, &dec_format, out_def, &enc_format, bitrate, 20);
 
-	s->decoder = decoder_new_fmtp(in_def, dec_format.clockrate, dec_format.channels, 20,
-			&actual_enc_format, NULL, NULL, NULL); // TODO: support different options (fmtp etc)
-	assert(s->decoder != NULL); // TODO: handle failures gracefully
+	if (!s->chain) {
+		s->encoder = encoder_new();
+		int res = encoder_config_fmtp(s->encoder, out_def, bitrate, 20, &dec_format, &enc_format,
+				&actual_enc_format,
+				NULL, NULL, NULL);
+		assert(res == 0); // TODO: handle failures gracefully
+
+		s->decoder = decoder_new_fmtp(in_def, dec_format.clockrate, dec_format.channels, 20,
+				&actual_enc_format, NULL, NULL, NULL); // TODO: support different options (fmtp etc)
+		assert(s->decoder != NULL); // TODO: handle failures gracefully
+	}
 
 	// arm timer
 	struct itimerspec timer = {
@@ -458,7 +469,7 @@ static void new_stream_params(
 			(ssl_random() % ptime) * 1000,
 		},
 	};
-	res = timerfd_settime(s->timer_fd, 0, &timer, NULL);
+	int res = timerfd_settime(s->timer_fd, 0, &timer, NULL);
 	if (res != 0)
 		abort();