From 7eab841093d3a91a38f9836b1c54199821a35429 Mon Sep 17 00:00:00 2001 From: George Joseph Date: Thu, 13 Feb 2020 12:39:58 -0700 Subject: [PATCH] res_pjsip_outbound_registration: Fix SRV failover on timeout In order to retry outbound registrations for some situations, we need access to the tdata from the original request. For instance, for 401/407 responses we need it to properly construct the subsequent request with the authentication. We also need it if we're iterating over a DNS SRV response record set so we can skip entries we've already tried. We've been getting the tdata from the server response rdata and transaction but that only works for the failures where there was actually a response (4XX, 5XX, etc). For timeouts there's no response and therefore no rdata or transaction from which to get the tdata. When processing a single A/AAAA record for a server, this wasn't an issue as we just retried that same server after the retry timer expired. If we got an SRV record set for the server though, without the state from the tdata, we just kept trying the first entry in the set repeatedly instead of skipping to the next one in the list. * Added a "last_tdata" member to the client state structure to keep track of the sent tdata. * Updated registration_client_send() to save the tdata it used into the client_state. * Updated sip_outbound_registration_response_cb() to use the tdata saved in client_state when we don't get a response from the server. We still use the tdata from the transaction when we DO get a response from the server so we can properly handle 4XX responses where our new request depends on it. General note on timeouts: Although res_pjsip_outbound_registration skips to the next record immediately when a timeout occurs during SRV set traversal, it's pjproject that determines how long to wait before a timeout is declared. As with other SIP message types, pjproject will continue trying the same server at an interval specified by "timer_t1" until "timer_b" expires. Both of those timers are set in the pjsip.conf "system" section. ASTERISK-28746 Change-Id: I199b8274392d17661dd3ce3b4d69a3968368fa06 --- res/res_pjsip_outbound_registration.c | 49 +++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/res/res_pjsip_outbound_registration.c b/res/res_pjsip_outbound_registration.c index 9ea6054212..4d0e38e612 100644 --- a/res/res_pjsip_outbound_registration.c +++ b/res/res_pjsip_outbound_registration.c @@ -334,6 +334,14 @@ struct sip_outbound_registration_client_state { * module unload. */ pjsip_regc *client; + /*! + * \brief Last tdata sent + * We need the original tdata to resend a request on auth failure + * or timeout. On an auth failure, we use the original tdata + * to initialize the new tdata for the authorized response. On a timeout + * we need it to skip failed SRV entries if any. + */ + pjsip_tx_data *last_tdata; /*! \brief Timer entry for retrying on temporal responses */ pj_timer_entry timer; /*! \brief Optional line parameter placed into Contact */ @@ -543,6 +551,13 @@ static pj_status_t registration_client_send(struct sip_outbound_registration_cli /* Due to the message going out the callback may now be invoked, so bump the count */ ao2_ref(client_state, +1); + /* + * We also bump tdata in expectation of saving it to client_state->last_tdata. + * We have to do it BEFORE pjsip_regc_send because if it succeeds, it decrements + * the ref count on its own. + */ + pjsip_tx_data_add_ref(tdata); + /* * Set the transport in case transports were reloaded. * When pjproject removes the extraneous error messages produced, @@ -552,13 +567,26 @@ static pj_status_t registration_client_send(struct sip_outbound_registration_cli pjsip_regc_set_transport(client_state->client, &selector); status = pjsip_regc_send(client_state->client, tdata); - /* If the attempt to send the message failed and the callback was not invoked we need to - * drop the reference we just added + /* + * If the attempt to send the message failed and the callback was not invoked we need to + * drop the references we just added */ if ((status != PJ_SUCCESS) && !(*callback_invoked)) { + pjsip_tx_data_dec_ref(tdata); ao2_ref(client_state, -1); + return status; } + /* + * Decref the old last_data before replacing it. + * BTW, it's quite possible that last_data == tdata + * if we're trying successive servers in an SRV set. + */ + if (client_state->last_tdata) { + pjsip_tx_data_dec_ref(client_state->last_tdata); + } + client_state->last_tdata = tdata; + return status; } @@ -1077,11 +1105,25 @@ static void sip_outbound_registration_response_cb(struct pjsip_regc_cbparam *par retry_after = pjsip_msg_find_hdr(param->rdata->msg_info.msg, PJSIP_H_RETRY_AFTER, NULL); response->retry_after = retry_after ? retry_after->ivalue : 0; + + /* + * If we got a response from the server, we have to use the tdata + * from the transaction, not the tdata saved when we sent the + * request. If we use the saved tdata, we won't process responses + * like 423 Interval Too Brief correctly and we'll wind up sending + * the bad Expires value again. + */ + pjsip_tx_data_dec_ref(client_state->last_tdata); + tsx = pjsip_rdata_get_tsx(param->rdata); response->old_request = tsx->last_tx; pjsip_tx_data_add_ref(response->old_request); pjsip_rx_data_clone(param->rdata, 0, &response->rdata); + } else { + /* old_request steals the reference */ + response->old_request = client_state->last_tdata; } + client_state->last_tdata = NULL; /* * Transfer response reference to serializer task so the @@ -1127,6 +1169,9 @@ static void sip_outbound_registration_client_state_destroy(void *obj) ast_taskprocessor_unreference(client_state->serializer); ast_free(client_state->transport_name); ast_free(client_state->registration_name); + if (client_state->last_tdata) { + pjsip_tx_data_dec_ref(client_state->last_tdata); + } } /*! \brief Allocator function for registration state */