Add a canary process, for high priority mode (asterisk -p) to ensure that if

Asterisk goes into a busy loop, the machine will be recoverable.  We'd still
need to do a restart to put Asterisk back into high priority mode, but at
least a reboot won't be required. (Closes issue #11559)


git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@93804 65c4cc65-6c06-0410-ace0-fbb531ad65f3
1.6.0
Tilghman Lesher 18 years ago
parent 7a40251f08
commit 53436f42f4

@ -233,6 +233,8 @@ static char *_argv[256];
static int shuttingdown; static int shuttingdown;
static int restartnow; static int restartnow;
static pthread_t consolethread = AST_PTHREADT_NULL; static pthread_t consolethread = AST_PTHREADT_NULL;
static int canary_pid = 0;
static char canary_filename[128];
static char randompool[256]; static char randompool[256];
@ -2625,6 +2627,35 @@ static void *monitor_sig_flags(void *unused)
return NULL; return NULL;
} }
static void *canary_thread(void *unused)
{
struct stat canary_stat;
struct timeval tv;
/* Give the canary time to sing */
sleep(120);
for (;;) {
stat(canary_filename, &canary_stat);
tv = ast_tvnow();
if (tv.tv_sec > canary_stat.st_mtime + 60) {
ast_log(LOG_WARNING, "Canary is dead!!! Reducing priority\n");
ast_set_priority(0);
pthread_exit(NULL);
}
/* Check the canary once a minute */
sleep(60);
}
}
/* Used by libc's atexit(3) function */
static void canary_exit(void)
{
if (canary_pid > 0)
kill(canary_pid, SIGKILL);
}
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
int c; int c;
@ -2808,10 +2839,49 @@ int main(int argc, char *argv[])
if ((!runuser) && !ast_strlen_zero(ast_config_AST_RUN_USER)) if ((!runuser) && !ast_strlen_zero(ast_config_AST_RUN_USER))
runuser = ast_config_AST_RUN_USER; runuser = ast_config_AST_RUN_USER;
/* Must install this signal handler up here to ensure that if the canary
* fails to execute that it doesn't kill the Asterisk process.
*/
signal(SIGCHLD, child_handler);
#ifndef __CYGWIN__ #ifndef __CYGWIN__
if (isroot) if (isroot) {
ast_set_priority(ast_opt_high_priority); ast_set_priority(ast_opt_high_priority);
if (ast_opt_high_priority) {
snprintf(canary_filename, sizeof(canary_filename), "%s/alt.asterisk.canary.tweet.tweet.tweet", ast_config_AST_RUN_DIR);
canary_pid = fork();
if (canary_pid == 0) {
char canary_binary[128], *lastslash;
int fd;
/* Reset signal handler */
signal(SIGCHLD, SIG_DFL);
for (fd = 0; fd < 100; fd++)
close(fd);
execlp("astcanary", "astcanary", canary_filename, NULL);
/* If not found, try the same path as used to execute asterisk */
ast_copy_string(canary_binary, argv[0], sizeof(canary_binary));
if ((lastslash = strrchr(canary_binary, '/'))) {
ast_copy_string(lastslash + 1, "astcanary", sizeof(canary_binary) + canary_binary - (lastslash + 1));
execl(canary_binary, "astcanary", canary_filename, NULL);
}
/* Should never happen */
_exit(1);
} else if (canary_pid > 0) {
pthread_t dont_care;
ast_pthread_create_detached(&dont_care, NULL, canary_thread, NULL);
}
/* Kill the canary when we exit */
atexit(canary_exit);
}
}
if (isroot && rungroup) { if (isroot && rungroup) {
struct group *gr; struct group *gr;
@ -2975,7 +3045,6 @@ int main(int argc, char *argv[])
signal(SIGINT, __quit_handler); signal(SIGINT, __quit_handler);
signal(SIGTERM, __quit_handler); signal(SIGTERM, __quit_handler);
signal(SIGHUP, hup_handler); signal(SIGHUP, hup_handler);
signal(SIGCHLD, child_handler);
signal(SIGPIPE, SIG_IGN); signal(SIGPIPE, SIG_IGN);
/* ensure that the random number generators are seeded with a different value every time /* ensure that the random number generators are seeded with a different value every time

@ -17,7 +17,7 @@ ASTTOPDIR?=..
.PHONY: clean all uninstall .PHONY: clean all uninstall
# to get check_expr, add it to the ALL_UTILS list # to get check_expr, add it to the ALL_UTILS list
ALL_UTILS:=astman smsq stereorize streamplayer aelparse muted check_expr conf2ael hashtest2 hashtest ALL_UTILS:=astman smsq stereorize streamplayer aelparse muted check_expr conf2ael hashtest2 hashtest astcanary
UTILS:=$(ALL_UTILS) UTILS:=$(ALL_UTILS)
LIBS += $(BKTR_LIB) # astobj2 with devmode uses backtrace LIBS += $(BKTR_LIB) # astobj2 with devmode uses backtrace

@ -0,0 +1,84 @@
/*
* Asterisk -- An open source telephony toolkit.
*
* Copyright (C) 2007, Digium, Inc.
*
* Tilghman Lesher <tlesher AT digium DOT com>
*
* See http://www.asterisk.org for more information about
* the Asterisk project. Please do not directly contact
* any of the maintainers of this project for assistance;
* the project provides a web site, mailing lists and IRC
* channels for your use.
*
* This program is free software, distributed under the terms of
* the GNU General Public License Version 2. See the LICENSE file
* at the top of the source tree.
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <utime.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
/*!\brief
* At one time, canaries were carried along with coal miners down
* into a mine. Their purpose was to alert the miners when they
* had drilled into a pocket of methane gas or another noxious
* substance. The canary, being the most sensitive animal would
* immediately fall over. Seeing this, the miners could take
* action to escape the mine, seeing an imminent danger.
*
* This process serves a similar purpose, though with the realtime
* priority being the reason. When a thread starts running away
* with the processor, it is typically difficult to tell what
* thread caused the problem, as the machine acts as if it is
* locked up (in fact, what has happened is that Asterisk runs at
* a higher priority than even the login shell, so the runaway
* thread hogs all available CPU time.
*
* If that happens, this canary process will cease to get any
* process time, which we can monitor with a realtime thread in
* Asterisk. Should that happen, that monitoring thread may take
* immediate action to slow down Asterisk to regular priority,
* thus allowing an administrator to login to the system and
* restart Asterisk or perhaps take another course of action
* (such as retrieving a backtrace to let the developers know
* what precisely went wrong).
*
* Note that according to POSIX.1, all threads inside a single
* process must share the same priority, so when the monitoring
* thread deprioritizes itself, it deprioritizes all threads at
* the same time. This is also why this canary must exist as a
* completely separate process and not simply as a thread within
* Asterisk itself.
*/
int main(int argc, char *argv[])
{
int fd;
/* Run at normal priority */
setpriority(PRIO_PROCESS, 0, 0);
for (;;) {
/* Update the modification times (checked from Asterisk) */
if (utime(argv[1], NULL)) {
/* Recreate the file if it doesn't exist */
if ((fd = open(argv[1], O_RDWR | O_TRUNC | O_CREAT)) > -1)
close(fd);
else
exit(1);
continue;
}
/* Run occasionally */
sleep(5);
}
/* Never reached */
return 0;
}
Loading…
Cancel
Save