From 2fa121c0d90745fcbd714c20f0cbf9ae2f1e574a Mon Sep 17 00:00:00 2001 From: Richard Fuchs Date: Tue, 12 Sep 2023 10:06:58 -0400 Subject: [PATCH] MT#54294 add GPU support Change-Id: I035eff68a4448ad1b2f5b4520943626e1723fce7 --- daemon/.ycm_extra_conf.py | 1 + daemon/Makefile | 1 + daemon/codec.c | 38 +++++- lib/.ycm_extra_conf.py | 1 + lib/auxlib.c | 3 + lib/auxlib.h | 1 + lib/codeclib.c | 237 +++++++++++++++++++++++++++++++++++++- lib/codeclib.h | 8 ++ lib/cudecs.Makefile | 9 ++ perf-tester/Makefile | 1 + perf-tester/main.c | 31 +++-- 11 files changed, 314 insertions(+), 17 deletions(-) create mode 100644 lib/cudecs.Makefile diff --git a/daemon/.ycm_extra_conf.py b/daemon/.ycm_extra_conf.py index c2fe8c69e..eb01d3643 100644 --- a/daemon/.ycm_extra_conf.py +++ b/daemon/.ycm_extra_conf.py @@ -46,6 +46,7 @@ flags = [ '-DWITH_TRANSCODING', '-DHAVE_BCG729', '-DHAVE_MQTT', + '-DHAVE_CUDECS', '-D__csh_lookup(x)=str_hash(x)', '-DCSH_LOOKUP(x)=' + csh_lookup_str, '-O2', diff --git a/daemon/Makefile b/daemon/Makefile index bb57894fd..636c0dce0 100644 --- a/daemon/Makefile +++ b/daemon/Makefile @@ -75,6 +75,7 @@ endif ifeq ($(with_transcoding),yes) include ../lib/g729.Makefile +include ../lib/cudecs.Makefile endif include ../lib/mqtt.Makefile diff --git a/daemon/codec.c b/daemon/codec.c index 39d0b101d..edfac2cd6 100644 --- a/daemon/codec.c +++ b/daemon/codec.c @@ -173,6 +173,7 @@ struct codec_ssrc_handler { struct codec_handler *handler; decoder_t *decoder; encoder_t *encoder; + codec_chain_t *chain; format_t encoder_format; int bitrate; int ptime; @@ -3523,6 +3524,8 @@ static void __delay_buffer_free(void *p) { mutex_destroy(&dbuf->lock); } static void __dtx_setup(struct codec_ssrc_handler *ch) { + if (!ch->decoder) + return; if (!decoder_has_dtx(ch->decoder)) return; @@ -3765,6 +3768,21 @@ static struct ssrc_entry *__ssrc_handler_transcode_new(void *p) { .channels = h->dest_pt.channels, .format = -1, }; + + // see if there's a complete codec chain usable for this + if (!h->pcm_dtmf_detect) + ch->chain = codec_chain_new(h->source_pt.codec_def, &dec_format, + h->dest_pt.codec_def, &enc_format, + ch->bitrate, ch->ptime); + + if (ch->chain) { + ilogs(codec, LOG_DEBUG, "Using codec chain to transcode from " STR_FORMAT " to " STR_FORMAT, + STR_FMT(&h->source_pt.encoding_with_params), + STR_FMT(&h->dest_pt.encoding_with_params)); + + return &ch->h; + } + ch->encoder = encoder_new(); if (!ch->encoder) goto err; @@ -4096,10 +4114,20 @@ static int __rtp_decode(struct codec_ssrc_handler *ch, struct codec_ssrc_handler struct transcode_packet *packet, struct media_packet *mp) { int ret = 0; - if (packet) - ret = decoder_input_data_ptime(ch->decoder, packet->payload, packet->ts, &mp->ptime, - ch->handler->packet_decoded, - ch, mp); + if (packet) { + if (ch->chain) { + static const struct fraction chain_fact = {1,1}; + AVPacket *pkt = codec_chain_input_data(ch->chain, packet->payload, packet->ts); + assert(pkt != NULL); + packet_encoded_packetize(pkt, ch, mp, packetizer_passthrough, NULL, &chain_fact, + packet_encoded_tx); + av_packet_unref(pkt); + } + else + ret = decoder_input_data_ptime(ch->decoder, packet->payload, packet->ts, &mp->ptime, + ch->handler->packet_decoded, + ch, mp); + } __buffer_delay_seq(input_ch->handler->delay_buffer, mp, -1); return ret; } @@ -4111,7 +4139,7 @@ static int packet_decode(struct codec_ssrc_handler *ch, struct codec_ssrc_handle if (!ch->csch.first_ts) ch->csch.first_ts = packet->ts; - if (ch->decoder->def->dtmf) { + if (ch->decoder && ch->decoder->def->dtmf) { if (packet_dtmf_event(ch, input_ch, packet, mp) == -1) goto out; } diff --git a/lib/.ycm_extra_conf.py b/lib/.ycm_extra_conf.py index bc17c71a0..79674acb1 100644 --- a/lib/.ycm_extra_conf.py +++ b/lib/.ycm_extra_conf.py @@ -42,6 +42,7 @@ flags = [ '-DRE_PLUGIN_DIR="/usr/lib/rtpengine"', '-DWITH_IPTABLES_OPTION', '-DHAVE_BCG729', + '-DHAVE_CUDECS', '-D__csh_lookup(x)=str_hash(x)', '-DCSH_LOOKUP(x)=' + csh_lookup_str, '-O2', diff --git a/lib/auxlib.c b/lib/auxlib.c index cbdb72bbf..07f07aa29 100644 --- a/lib/auxlib.c +++ b/lib/auxlib.c @@ -215,6 +215,9 @@ void config_load(int *argc, char ***argv, GOptionEntry *app_entries, const char { "thread-stack", 0,0, G_OPTION_ARG_INT, &rtpe_common_config_ptr->thread_stack, "Thread stack size in kB", "INT" }, { "poller-size", 0,0, G_OPTION_ARG_INT, &rtpe_common_config_ptr->poller_size, "Max poller items per iteration", "INT" }, { "evs-lib-path", 0,0, G_OPTION_ARG_FILENAME, &rtpe_common_config_ptr->evs_lib_path, "Location of .so for 3GPP EVS codec", "FILE" }, +#ifdef HAVE_CUDECS + { "cudecs", 0,0, G_OPTION_ARG_NONE, &rtpe_common_config_ptr->cudecs, "Enable usage of CUDA codecs", NULL }, +#endif { NULL, } }; #undef ll diff --git a/lib/auxlib.h b/lib/auxlib.h index e03439236..ab685ecf5 100644 --- a/lib/auxlib.h +++ b/lib/auxlib.h @@ -36,6 +36,7 @@ struct rtpengine_common_config { int poller_size; int max_log_line_length; char *evs_lib_path; + int cudecs; }; extern struct rtpengine_common_config *rtpe_common_config_ptr; diff --git a/lib/codeclib.c b/lib/codeclib.c index 3154a545c..43259fcf0 100644 --- a/lib/codeclib.c +++ b/lib/codeclib.c @@ -11,6 +11,11 @@ #include #endif #include +#ifdef HAVE_CUDECS +#include +#include +#include +#endif #include "str.h" #include "log.h" #include "loglib.h" @@ -32,7 +37,6 @@ -static packetizer_f packetizer_passthrough; // pass frames as they arrive in AVPackets static packetizer_f packetizer_samplestream; // flat stream of samples static packetizer_f packetizer_amr; @@ -139,6 +143,34 @@ static select_encoder_format_f evs_select_encoder_format; +struct codec_chain_s { +#ifdef HAVE_CUDECS + union { + struct { + gpu_pcmu2opus_runner *runner; + gpu_float2opus *enc; + } pcmu2opus; + struct { + gpu_pcma2opus_runner *runner; + gpu_float2opus *enc; + } pcma2opus; + struct { + gpu_opus2pcmu_runner *runner; + gpu_opus2float *dec; + } opus2pcmu; + struct { + gpu_opus2pcma_runner *runner; + gpu_opus2float *dec; + } opus2pcma; + } u; + AVPacket *avpkt; + int (*run)(codec_chain_t *c, const str *data, unsigned long ts, AVPacket *); +#endif +}; + + + + static const codec_type_t codec_type_avcodec = { .def_init = avc_def_init, @@ -239,6 +271,14 @@ static const codec_type_t codec_type_bcg729 = { #endif +#ifdef HAVE_CUDECS +static gpu_pcma2opus_runner *pcma2opus_runner; +static gpu_pcmu2opus_runner *pcmu2opus_runner; +static gpu_opus2pcmu_runner *opus2pcmu_runner; +static gpu_opus2pcma_runner *opus2pcma_runner; +#endif + + static struct codec_def_s __codec_defs[] = { { @@ -1223,6 +1263,31 @@ void codeclib_init(int print) { codecs_ht = g_hash_table_new(str_case_hash, str_case_equal); codecs_ht_by_av = g_hash_table_new(g_direct_hash, g_direct_equal); +#ifdef HAVE_CUDECS + if (rtpe_common_config_ptr->cudecs) { + if (!gpu_init()) + die("Failed to initialise CUDA codecs"); + + pcma2opus_runner = gpu_pcma2opus_runner_new(4, 3000, 160); + if (!pcma2opus_runner) + die("Failed to initialise GPU pcma2opus"); + + pcmu2opus_runner = gpu_pcmu2opus_runner_new(4, 3000, 160); + if (!pcmu2opus_runner) + die("Failed to initialise GPU pcmu2opus"); + + opus2pcmu_runner = gpu_opus2pcmu_runner_new(4, 3000, 160); + if (!opus2pcmu_runner) + die("Failed to initialise GPU opus2pcmu"); + + opus2pcma_runner = gpu_opus2pcma_runner_new(4, 3000, 160); + if (!opus2pcma_runner) + die("Failed to initialise GPU opus2pcma"); + + ilog(LOG_DEBUG, "CUDA codecs initialised"); + } +#endif + for (int i = 0; i < G_N_ELEMENTS(__codec_defs); i++) { // add to hash table struct codec_def_s *def = &__codec_defs[i]; @@ -1771,7 +1836,7 @@ int encoder_input_fifo(encoder_t *enc, AVFrame *frame, } -static int packetizer_passthrough(AVPacket *pkt, GString *buf, str *output, encoder_t *enc) { +int packetizer_passthrough(AVPacket *pkt, GString *buf, str *output, encoder_t *enc) { if (!pkt) return -1; if (output->len < pkt->size) { @@ -4589,3 +4654,171 @@ static void evs_def_init(struct codec_def_s *def) { static int evs_dtx(decoder_t *dec, GQueue *out, int ptime) { return 0; } + + + + + + +#ifdef HAVE_CUDECS +int codec_chain_pcmu2opus_run(codec_chain_t *c, const str *data, unsigned long ts, AVPacket *pkt) { + ssize_t ret = gpu_pcmu2opus_runner_do(c->u.pcmu2opus.runner, c->u.pcmu2opus.enc, + (unsigned char *) data->s, data->len, + pkt->data, pkt->size); + assert(ret > 0); + + pkt->size = ret; + pkt->duration = data->len * 6L; + pkt->pts = ts * 6L; + + return 0; +} + +int codec_chain_pcma2opus_run(codec_chain_t *c, const str *data, unsigned long ts, AVPacket *pkt) { + ssize_t ret = gpu_pcma2opus_runner_do(c->u.pcma2opus.runner, c->u.pcma2opus.enc, + (unsigned char *) data->s, data->len, + pkt->data, pkt->size); + assert(ret > 0); + + pkt->size = ret; + pkt->duration = data->len * 6L; + pkt->pts = ts * 6L; + + return 0; +} + +int codec_chain_opus2pcmu_run(codec_chain_t *c, const str *data, unsigned long ts, AVPacket *pkt) { + ssize_t ret = gpu_opus2pcmu_runner_do(c->u.opus2pcmu.runner, c->u.opus2pcmu.dec, + (unsigned char *) data->s, data->len, + pkt->data, pkt->size); + assert(ret > 0); + + pkt->size = ret; + pkt->duration = ret; + pkt->pts = ts / 6L; + + return 0; +} + +int codec_chain_opus2pcma_run(codec_chain_t *c, const str *data, unsigned long ts, AVPacket *pkt) { + ssize_t ret = gpu_opus2pcma_runner_do(c->u.opus2pcma.runner, c->u.opus2pcma.dec, + (unsigned char *) data->s, data->len, + pkt->data, pkt->size); + assert(ret > 0); + + pkt->size = ret; + pkt->duration = ret; + pkt->pts = ts / 6L; + + return 0; +} +#endif + + + +codec_chain_t *codec_chain_new(codec_def_t *src, format_t *src_format, codec_def_t *dst, + format_t *dst_format, int bitrate, int ptime) +{ +#ifdef HAVE_CUDECS + if (!strcmp(dst->rtpname, "opus") && !strcmp(src->rtpname, "PCMA")) { + if (src_format->clockrate != 8000) + return NULL; + if (src_format->channels != 1) + return NULL; + if (dst_format->channels != 2) + return NULL; + if (dst_format->clockrate != 48000) + return NULL; + + if (!pcma2opus_runner) + return NULL; + + codec_chain_t *ret = g_slice_alloc0(sizeof(*ret)); + ret->u.pcma2opus.enc = gpu_float2opus_new(bitrate); + ret->u.pcma2opus.runner = pcma2opus_runner; + ret->avpkt = av_packet_alloc(); + ret->run = codec_chain_pcma2opus_run; + + return ret; + } + else if (!strcmp(dst->rtpname, "opus") && !strcmp(src->rtpname, "PCMU")) { + if (src_format->clockrate != 8000) + return NULL; + if (src_format->channels != 1) + return NULL; + if (dst_format->channels != 1) + return NULL; + if (dst_format->clockrate != 48000) + return NULL; + + if (!pcmu2opus_runner) + return NULL; + + codec_chain_t *ret = g_slice_alloc0(sizeof(*ret)); + ret->u.pcmu2opus.enc = gpu_float2opus_new(bitrate); + ret->u.pcmu2opus.runner = pcmu2opus_runner; + ret->avpkt = av_packet_alloc(); + ret->run = codec_chain_pcmu2opus_run; + + return ret; + } + else if (!strcmp(dst->rtpname, "PCMU") && !strcmp(src->rtpname, "opus")) { + if (dst_format->clockrate != 8000) + return NULL; + if (dst_format->channels != 1) + return NULL; + if (src_format->channels != 1) + return NULL; + if (src_format->clockrate != 48000) + return NULL; + + if (!opus2pcmu_runner) + return NULL; + + codec_chain_t *ret = g_slice_alloc0(sizeof(*ret)); + ret->u.opus2pcmu.dec = gpu_opus2float_new(); + ret->u.opus2pcmu.runner = opus2pcmu_runner; + ret->avpkt = av_packet_alloc(); + ret->run = codec_chain_opus2pcmu_run; + + return ret; + } + else if (!strcmp(dst->rtpname, "PCMA") && !strcmp(src->rtpname, "opus")) { + if (dst_format->clockrate != 8000) + return NULL; + if (dst_format->channels != 1) + return NULL; + if (src_format->channels != 2) + return NULL; + if (src_format->clockrate != 48000) + return NULL; + + if (!opus2pcma_runner) + return NULL; + + codec_chain_t *ret = g_slice_alloc0(sizeof(*ret)); + ret->u.opus2pcma.dec = gpu_opus2float_new(); + ret->u.opus2pcma.runner = opus2pcma_runner; + ret->avpkt = av_packet_alloc(); + ret->run = codec_chain_opus2pcma_run; + + return ret; + } +#endif + + return NULL; +} + +AVPacket *codec_chain_input_data(codec_chain_t *c, const str *data, unsigned long ts) { +#ifdef HAVE_CUDECS + av_new_packet(c->avpkt, MAX_OPUS_FRAME_SIZE * MAX_OPUS_FRAMES_PER_PACKET + MAX_OPUS_HEADER_SIZE); + + int ret = c->run(c, data, ts, c->avpkt); + assert(ret == 0); + + return c->avpkt; + +#else + return NULL; +#endif +} diff --git a/lib/codeclib.h b/lib/codeclib.h index 47a1bb8f8..4ee4219ca 100644 --- a/lib/codeclib.h +++ b/lib/codeclib.h @@ -95,6 +95,7 @@ typedef struct seq_packet_s seq_packet_t; typedef union codec_options_u codec_options_t; typedef struct encoder_callback_s encoder_callback_t; typedef struct dtx_method_s dtx_method_t; +typedef struct codec_chain_s codec_chain_t; typedef int packetizer_f(AVPacket *, GString *, str *, encoder_t *); typedef void format_init_f(struct rtp_payload_type *); @@ -415,6 +416,8 @@ int packet_sequencer_next_ok(packet_sequencer_t *ps); void *packet_sequencer_force_next_packet(packet_sequencer_t *ps); int packet_sequencer_insert(packet_sequencer_t *ps, seq_packet_t *); +packetizer_f packetizer_passthrough; // pass frames as they arrive in AVPackets + void frame_fill_tone_samples(enum AVSampleFormat fmt, void *samples, unsigned int offset, unsigned int num, unsigned int freq, unsigned int volume, unsigned int sample_rate, unsigned int channels); @@ -422,6 +425,11 @@ void frame_fill_dtmf_samples(enum AVSampleFormat fmt, void *samples, unsigned in unsigned int event, unsigned int volume, unsigned int sample_rate, unsigned int channels); +codec_chain_t *codec_chain_new(codec_def_t *src, format_t *src_format, codec_def_t *dst, + format_t *dst_format, int bitrate, int ptime); +AVPacket *codec_chain_input_data(codec_chain_t *c, const str *data, unsigned long ts); + + #include "auxlib.h" diff --git a/lib/cudecs.Makefile b/lib/cudecs.Makefile new file mode 100644 index 000000000..5d021e691 --- /dev/null +++ b/lib/cudecs.Makefile @@ -0,0 +1,9 @@ +ifneq (,$(filter pkg.ngcp-rtpengine.cudecs,${DEB_BUILD_PROFILES})) +ifneq (,$(wildcard $(CUDECS_HOME)/usr/include/cudecs/g711opus.h)) +CFLAGS+= -DHAVE_CUDECS -I$(CUDECS_HOME)/usr/include +LDLIBS+= -L$(CUDECS_HOME)/usr/lib -lcudecs +else ifneq (,$(wildcard /usr/include/cudecs/g711opus.h)) +CFLAGS+= -DHAVE_CUDECS +LDLIBS+= -lcudecs +endif +endif diff --git a/perf-tester/Makefile b/perf-tester/Makefile index c667ce802..b9df02d07 100644 --- a/perf-tester/Makefile +++ b/perf-tester/Makefile @@ -37,6 +37,7 @@ LDLIBS += $(shell pkg-config --libs ncursesw) LDLIBS += $(shell pkg-config --libs openssl) include ../lib/g729.Makefile +include ../lib/cudecs.Makefile SRCS = main.c log.c LIBSRCS = codeclib.strhash.c loglib.c auxlib.c resample.c str.c dtmflib.c rtplib.c poller.c ssllib.c diff --git a/perf-tester/main.c b/perf-tester/main.c index f0aed481b..86a5801d5 100644 --- a/perf-tester/main.c +++ b/perf-tester/main.c @@ -45,6 +45,7 @@ struct stream { unsigned long long output_ts; decoder_t *decoder; encoder_t *encoder; + codec_chain_t *chain; struct testparams in_params; struct testparams out_params; uint fixture_idx; @@ -325,7 +326,13 @@ static void readable(int fd, void *o, uintptr_t x) { str frame; str_init_len(&frame, (char *) data->data, data->size); - decoder_input_data(s->decoder, &frame, s->input_ts, got_frame, s, NULL); + + if (!s->chain) + decoder_input_data(s->decoder, &frame, s->input_ts, got_frame, s, NULL); + else { + AVPacket *pkt = codec_chain_input_data(s->chain, &frame, s->input_ts); + got_packet_pkt(s, pkt); + } s->input_ts += data->duration; @@ -437,15 +444,19 @@ static void new_stream_params( format_t actual_enc_format; - s->encoder = encoder_new(); - int res = encoder_config_fmtp(s->encoder, out_def, bitrate, 20, &dec_format, &enc_format, - &actual_enc_format, - NULL, NULL, NULL); - assert(res == 0); // TODO: handle failures gracefully + s->chain = codec_chain_new(in_def, &dec_format, out_def, &enc_format, bitrate, 20); - s->decoder = decoder_new_fmtp(in_def, dec_format.clockrate, dec_format.channels, 20, - &actual_enc_format, NULL, NULL, NULL); // TODO: support different options (fmtp etc) - assert(s->decoder != NULL); // TODO: handle failures gracefully + if (!s->chain) { + s->encoder = encoder_new(); + int res = encoder_config_fmtp(s->encoder, out_def, bitrate, 20, &dec_format, &enc_format, + &actual_enc_format, + NULL, NULL, NULL); + assert(res == 0); // TODO: handle failures gracefully + + s->decoder = decoder_new_fmtp(in_def, dec_format.clockrate, dec_format.channels, 20, + &actual_enc_format, NULL, NULL, NULL); // TODO: support different options (fmtp etc) + assert(s->decoder != NULL); // TODO: handle failures gracefully + } // arm timer struct itimerspec timer = { @@ -458,7 +469,7 @@ static void new_stream_params( (ssl_random() % ptime) * 1000, }, }; - res = timerfd_settime(s->timer_fd, 0, &timer, NULL); + int res = timerfd_settime(s->timer_fd, 0, &timer, NULL); if (res != 0) abort();