Merge pull request #340 from kiwix/perf_kiwix_serve

Perf kiwix serve
pull/9/head
Kelson 10 years ago committed by GitHub
commit a53dea6d1e

@ -144,79 +144,162 @@ bool isVerbose() {
static Bytef *compr = (Bytef *)malloc(COMPRESSOR_BUFFER_SIZE);
static uLongf comprLen;
static int accessHandlerCallback(void *cls,
struct MHD_Connection * connection,
const char * url,
const char * method,
const char * version,
const char * upload_data,
size_t * upload_data_size,
void ** ptr) {
/* Unexpected method */
if (0 != strcmp(method, "GET") && 0 != strcmp(method, "POST"))
return MHD_NO;
static
bool compress_content(string &content,
const string &mimeType)
{
/* Compute the lengh */
unsigned int contentLength = content.size();
/* The first time only the headers are valid, do not respond in the first round... */
static int dummy;
if (&dummy != *ptr) {
*ptr = &dummy;
return MHD_YES;
}
/* Should be deflate */
bool deflated =
contentLength > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE &&
contentLength < COMPRESSOR_BUFFER_SIZE &&
(mimeType.find("text/") != string::npos ||
mimeType.find("application/javascript") != string::npos ||
mimeType.find("application/json") != string::npos);
/* Debug */
if (isVerbose()) {
std::cout << "Requesting " << url << std::endl;
/* Compress the content if necessary */
if (deflated) {
pthread_mutex_lock(&compressorLock);
comprLen = COMPRESSOR_BUFFER_SIZE;
compress(compr, &comprLen, (const Bytef*)(content.data()), contentLength);
if (comprLen > 2 && comprLen < contentLength) {
/* /!\ Internet Explorer has a bug with deflate compression.
It can not handle the first two bytes (compression headers)
We need to chunk them off (move the content 2bytes)
It has no incidence on other browsers
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
compr += 2;
content = string((char *)compr, comprLen);
contentLength = comprLen;
} else {
deflated = false;
}
/* Check if the response can be compressed */
const string acceptEncodingHeaderValue = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_ACCEPT_ENCODING) ?
MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_ACCEPT_ENCODING) : "";
const bool acceptEncodingDeflate = !acceptEncodingHeaderValue.empty() && acceptEncodingHeaderValue.find("deflate") != string::npos;
pthread_mutex_unlock(&compressorLock);
}
return deflated;
}
/* Check if range is requested */
const string acceptRangeHeaderValue = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_RANGE) ?
MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_RANGE) : "";
const bool acceptRange = !acceptRangeHeaderValue.empty();
/* Prepare the variables */
struct MHD_Response *response;
std::string content;
std::string mimeType;
std::string httpRedirection;
unsigned int contentLength = 0;
bool cacheEnabled = true;
int httpResponseCode = MHD_HTTP_OK;
std::string urlStr = string(url);
static
struct MHD_Response* build_response(const void* data,
unsigned int length,
const std::string& httpRedirection,
const std::string& mimeType,
bool deflated,
bool cacheEnabled)
{
/* Create the response */
struct MHD_Response * response = MHD_create_response_from_data(length,
const_cast<void*>(data),
MHD_NO,
MHD_YES);
/* Get searcher and reader */
std::string humanReadableBookId = "";
if (!(urlStr.size() > 5 && urlStr.substr(0, 6) == "/skin/")) {
if (!strcmp(url, "/search") || !strcmp(url, "/suggest") || !strcmp(url, "/random")) {
const char* tmpGetValue = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "content");
humanReadableBookId = (tmpGetValue != NULL ? string(tmpGetValue) : "");
/* Make a redirection if necessary otherwise send the content */
if (!httpRedirection.empty()) {
MHD_add_response_header(response, MHD_HTTP_HEADER_LOCATION, httpRedirection.c_str());
} else {
humanReadableBookId = urlStr.substr(1, urlStr.find("/", 1) != string::npos ?
urlStr.find("/", 1) - 1 : urlStr.size() - 2);
if (!humanReadableBookId.empty()) {
urlStr = urlStr.substr(urlStr.find("/", 1) != string::npos ?
urlStr.find("/", 1) : humanReadableBookId.size());
/* Add if necessary the content-encoding */
if (deflated) {
MHD_add_response_header(response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
}
/* Tell the client that byte ranges are accepted */
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
/* Specify the mime type */
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_TYPE, mimeType.c_str());
}
/* Force to close the connection - cf. 100% CPU usage with v. 4.4 (in Lucid) */
//MHD_add_response_header(response, MHD_HTTP_HEADER_CONNECTION, "close");
/* Allow cross-domain requests */
//MHD_add_response_header(response, MHD_HTTP_HEADER_ACCESS_CONTROL_ALLOW_ORIGIN, "*");
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
if (cacheEnabled) { /* Force cache */
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL, "max-age=2723040, public");
} else { /* Prevent cache (for random page) */
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL, "no-cache, no-store, must-revalidate");
}
return response;
}
pthread_mutex_lock(&mapLock);
kiwix::Searcher *searcher = searchers.find(humanReadableBookId) != searchers.end() ?
searchers.find(humanReadableBookId)->second : NULL;
kiwix::Reader *reader = readers.find(humanReadableBookId) != readers.end() ?
readers.find(humanReadableBookId)->second : NULL;
if (reader == NULL) {
humanReadableBookId="";
ssize_t callback_reader_from_blob(void *cls,
uint64_t pos,
char *buf,
size_t max)
{
zim::Blob* blob = static_cast<zim::Blob*>(cls);
pthread_mutex_lock(&readerLock);
size_t max_size_to_set = min(max, blob->size()-pos);
if (max_size_to_set <= 0)
{
pthread_mutex_unlock(&readerLock);
return MHD_CONTENT_READER_END_WITH_ERROR;
}
pthread_mutex_unlock(&mapLock);
/* Get suggestions */
if (!strcmp(url, "/suggest") && reader != NULL) {
memcpy(buf, blob->data()+pos, max_size_to_set);
pthread_mutex_unlock(&readerLock);
return max_size_to_set;
}
void callback_free_blob(void *cls)
{
zim::Blob* blob = static_cast<zim::Blob*>(cls);
pthread_mutex_lock(&readerLock);
delete blob;
pthread_mutex_unlock(&readerLock);
}
static
struct MHD_Response* build_callback_response_from_blob(zim::Blob& blob,
const std::string& mimeType)
{
pthread_mutex_lock(&readerLock);
zim::Blob* p_blob = new zim::Blob(blob);
struct MHD_Response * response = MHD_create_response_from_callback(blob.size(),
16384,
callback_reader_from_blob,
p_blob,
callback_free_blob);
pthread_mutex_unlock(&readerLock);
/* Tell the client that byte ranges are accepted */
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
/* Specify the mime type */
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_TYPE, mimeType.c_str());
/* Allow cross-domain requests */
//MHD_add_response_header(response, MHD_HTTP_HEADER_ACCESS_CONTROL_ALLOW_ORIGIN, "*");
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL, "max-age=2723040, public");
return response;
}
static
struct MHD_Response* handle_suggest(struct MHD_Connection * connection,
int& httpResponseCode,
kiwix::Reader *reader,
kiwix::Searcher *searcher,
const std::string& urlStr,
const std::string& humanReadableBookId,
bool acceptEncodingDeflate)
{
std::string content;
std::string mimeType;
unsigned int maxSuggestionCount = 10;
unsigned int suggestionCount = 0;
std::string suggestion;
@ -246,16 +329,37 @@ static int accessHandlerCallback(void *cls,
content += "]";
mimeType = "application/json; charset=utf-8";
}
bool deflated = acceptEncodingDeflate && compress_content(content, mimeType);
return build_response(content.data(), content.size(), "", mimeType, deflated, true);
}
/* Get static skin stuff */
else if (urlStr.substr(0, 6) == "/skin/") {
content = getResourceAsString(urlStr.substr(6));
mimeType = getMimeTypeForFile(urlStr);
}
static
struct MHD_Response* handle_skin(struct MHD_Connection * connection,
int& httpResponseCode,
kiwix::Reader *reader,
kiwix::Searcher *searcher,
const std::string& urlStr,
const std::string& humanReadableBookId,
bool acceptEncodingDeflate)
{
std::string content = getResourceAsString(urlStr.substr(6));
std::string mimeType = getMimeTypeForFile(urlStr);
bool deflated = acceptEncodingDeflate && compress_content(content, mimeType);
return build_response(content.data(), content.size(), "", mimeType, deflated, true);
}
/* Display the search restults */
else if (!strcmp(url, "/search")) {
static
struct MHD_Response* handle_search(struct MHD_Connection * connection,
int& httpResponseCode,
kiwix::Reader *reader,
kiwix::Searcher *searcher,
const std::string& urlStr,
const std::string& humanReadableBookId,
bool acceptEncodingDeflate)
{
std::string content;
std::string mimeType;
std::string httpRedirection;
/* Retrieve the pattern to search */
const char* pattern = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "pattern");
@ -277,6 +381,8 @@ static int accessHandlerCallback(void *cls,
/* If article found then redirect directly to it */
if (!patternCorrespondingUrl.empty()) {
httpRedirection = "/" + humanReadableBookId + "/" + patternCorrespondingUrl;
httpResponseCode = MHD_HTTP_FOUND;
return build_response("", 0, httpRedirection, "", false, true);
}
}
@ -302,48 +408,115 @@ static int accessHandlerCallback(void *cls,
}
mimeType = "text/html; charset=utf-8";
}
/* Display a random article */
else if (!strcmp(url, "/random")) {
cacheEnabled = false;
introduceTaskbar(content, humanReadableBookId);
bool deflated = acceptEncodingDeflate && compress_content(content, mimeType);
return build_response(content.data(), content.size(), httpRedirection, mimeType, deflated, true);
}
static
struct MHD_Response* handle_random(struct MHD_Connection * connection,
int& httpResponseCode,
kiwix::Reader *reader,
kiwix::Searcher *searcher,
const std::string& urlStr,
const std::string& humanReadableBookId,
bool acceptEncodingDeflate)
{
std::string httpRedirection;
bool cacheEnabled = false;
httpResponseCode = MHD_HTTP_FOUND;
if (reader != NULL) {
pthread_mutex_lock(&readerLock);
std::string randomUrl = reader->getRandomPageUrl();
pthread_mutex_unlock(&readerLock);
httpRedirection = "/" + humanReadableBookId + "/" + kiwix::urlEncode(randomUrl);
}
}
return build_response("", 0, httpRedirection, "", false, false);
}
/* Display the content of a ZIM content (article, image, ...) */
else if (reader != NULL) {
static
struct MHD_Response* handle_content(struct MHD_Connection * connection,
int& httpResponseCode,
kiwix::Reader *reader,
kiwix::Searcher *searcher,
const std::string& urlStr,
const std::string& humanReadableBookId,
bool acceptEncodingDeflate)
{
std::string baseUrl;
std::string content;
std::string mimeType;
unsigned int contentLength;
try {
bool found = false;
zim::Article article;
pthread_mutex_lock(&readerLock);
bool found = reader->getContentByDecodedUrl(urlStr, content, contentLength, mimeType, baseUrl);
pthread_mutex_unlock(&readerLock);
try {
found = reader->getArticleObjectByDecodedUrl(urlStr, article);
if (found) {
if (isVerbose()) {
cout << "Found " << urlStr << endl;
cout << "content size: " << contentLength << endl;
cout << "mimeType: " << mimeType << endl;
/* If redirect */
unsigned int loopCounter = 0;
while (article.isRedirect() && loopCounter++<42) {
article = article.getRedirectArticle();
}
} else {
/* To many loop */
if (loopCounter == 42)
found = false;
}
} catch (const std::exception& e) {
std::cerr << e.what() << std::endl;
found = false;
}
pthread_mutex_unlock(&readerLock);
if (!found) {
if (isVerbose())
cout << "Failed to find " << urlStr << endl;
content = "<!DOCTYPE html>\n<html><head><meta content=\"text/html;charset=UTF-8\" http-equiv=\"content-type\" /><title>Content not found</title></head><body><h1>Not Found</h1><p>The requested URL \"" + urlStr + "\" was not found on this server.</p></body></html>";
mimeType = "text/html";
httpResponseCode = MHD_HTTP_NOT_FOUND;
introduceTaskbar(content, humanReadableBookId);
bool deflated = acceptEncodingDeflate && compress_content(content, mimeType);
return build_response(content.data(), content.size(), "", mimeType, deflated, false);
}
} catch (const std::exception& e) {
std::cerr << e.what() << std::endl;
try {
pthread_mutex_lock(&readerLock);
mimeType = article.getMimeType();
pthread_mutex_unlock(&readerLock);
} catch (exception &e) {
mimeType = "application/octet-stream";
}
if (isVerbose()) {
cout << "Found " << urlStr << endl;
cout << "mimeType: " << mimeType << endl;
}
pthread_mutex_lock(&readerLock);
zim::Blob raw_content = article.getData();
pthread_mutex_unlock(&readerLock);
if (mimeType.find("text/") != string::npos ||
mimeType.find("application/javascript") != string::npos ||
mimeType.find("application/json") != string::npos)
{
pthread_mutex_lock(&readerLock);
content = string(raw_content.data(), raw_content.size());
pthread_mutex_unlock(&readerLock);
/* Special rewrite URL in case of ZIM file use intern *asbolute* url like /A/Kiwix */
if (mimeType.find("text/html") != string::npos) {
if (content.find("<body") == std::string::npos &&
content.find("<BODY") == std::string::npos) {
content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>";
}
baseUrl = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl();
content = replaceRegex(content, "$1$2" + humanReadableBookId + "/$3/",
"(href|src)(=[\"|\']{0,1}/)([A-Z|\\-])/");
content = replaceRegex(content, "$1$2" + humanReadableBookId + "/$3/",
@ -351,100 +524,171 @@ static int accessHandlerCallback(void *cls,
content = replaceRegex(content,
"<head><base href=\"/" + humanReadableBookId + baseUrl + "\" />",
"<head>");
introduceTaskbar(content, humanReadableBookId);
} else if (mimeType.find("text/css") != string::npos) {
content = replaceRegex(content, "$1$2" + humanReadableBookId + "/$3/",
"(url|URL)(\\([\"|\']{0,1}/)([A-Z|\\-])/");
}
bool deflated = acceptEncodingDeflate && compress_content(content, mimeType);
return build_response(content.data(), content.size(), "", mimeType, deflated, true);
}
else
{
return build_callback_response_from_blob(raw_content, mimeType);
}
}
/* Display the global Welcome page */
else {
static
struct MHD_Response* handle_default(struct MHD_Connection * connection,
int& httpResponseCode,
kiwix::Reader *reader,
kiwix::Searcher *searcher,
const std::string& urlStr,
const std::string& humanReadableBookId,
bool acceptEncodingDeflate)
{
pthread_mutex_lock(&welcomeLock);
content = welcomeHTML;
std::string content = welcomeHTML;
pthread_mutex_unlock(&welcomeLock);
mimeType = "text/html; charset=utf-8";
}
/* Introduce Taskbar */
if (!humanReadableBookId.empty() && mimeType.find("text/html") != string::npos) {
introduceTaskbar(content, humanReadableBookId);
}
/* Compute the lengh */
contentLength = content.size();
std::string mimeType = "text/html; charset=utf-8";
/* Should be deflate */
bool deflated =
contentLength > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE &&
contentLength < COMPRESSOR_BUFFER_SIZE &&
acceptEncodingDeflate &&
(mimeType.find("text/") != string::npos ||
mimeType.find("application/javascript") != string::npos ||
mimeType.find("application/json") != string::npos);
bool deflated = acceptEncodingDeflate && compress_content(content, mimeType);
return build_response(content.data(), content.size(), "", mimeType, deflated, true);
}
/* Compress the content if necessary */
if (deflated) {
pthread_mutex_lock(&compressorLock);
comprLen = COMPRESSOR_BUFFER_SIZE;
compress(compr, &comprLen, (const Bytef*)(content.data()), contentLength);
static int accessHandlerCallback(void *cls,
struct MHD_Connection * connection,
const char * url,
const char * method,
const char * version,
const char * upload_data,
size_t * upload_data_size,
void ** ptr)
{
/* Unexpected method */
if (0 != strcmp(method, "GET") && 0 != strcmp(method, "POST"))
return MHD_NO;
if (comprLen > 2 && comprLen < contentLength) {
/* The first time only the headers are valid, do not respond in the first round... */
static int dummy;
if (&dummy != *ptr) {
*ptr = &dummy;
return MHD_YES;
}
/* /!\ Internet Explorer has a bug with deflate compression.
It can not handle the first two bytes (compression headers)
We need to chunk them off (move the content 2bytes)
It has no incidence on other browsers
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
compr += 2;
/* clear context pointer */
*ptr = NULL;
content = string((char *)compr, comprLen);
contentLength = comprLen;
} else {
deflated = false;
/* Debug */
if (isVerbose()) {
std::cout << "Requesting " << url << std::endl;
}
pthread_mutex_unlock(&compressorLock);
}
/* Check if the response can be compressed */
const char* acceptEncodingHeaderValue = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_ACCEPT_ENCODING);
const bool acceptEncodingDeflate = acceptEncodingHeaderValue && string(acceptEncodingHeaderValue).find("deflate") != string::npos;
/* Create the response */
response = MHD_create_response_from_data(contentLength,
(void *)content.data(),
MHD_NO,
MHD_YES);
/* Check if range is requested */
const char* acceptRangeHeaderValue = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_RANGE);
const bool acceptRange = acceptRangeHeaderValue != NULL;
/* Make a redirection if necessary otherwise send the content */
if (!httpRedirection.empty()) {
MHD_add_response_header(response, MHD_HTTP_HEADER_LOCATION, httpRedirection.c_str());
httpResponseCode = MHD_HTTP_FOUND;
/* Prepare the variables */
struct MHD_Response *response;
int httpResponseCode = MHD_HTTP_OK;
std::string urlStr = string(url);
/* Get searcher and reader */
std::string humanReadableBookId = "";
if (!(urlStr.size() > 5 && urlStr.substr(0, 6) == "/skin/")) {
if (!strcmp(url, "/search") || !strcmp(url, "/suggest") || !strcmp(url, "/random")) {
const char* tmpGetValue = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "content");
humanReadableBookId = (tmpGetValue != NULL ? string(tmpGetValue) : "");
} else {
humanReadableBookId = urlStr.substr(1, urlStr.find("/", 1) != string::npos ?
urlStr.find("/", 1) - 1 : urlStr.size() - 2);
if (!humanReadableBookId.empty()) {
urlStr = urlStr.substr(urlStr.find("/", 1) != string::npos ?
urlStr.find("/", 1) : humanReadableBookId.size());
}
}
}
/* Add if necessary the content-encoding */
if (deflated) {
MHD_add_response_header(response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
pthread_mutex_lock(&mapLock);
kiwix::Searcher *searcher = searchers.find(humanReadableBookId) != searchers.end() ?
searchers.find(humanReadableBookId)->second : NULL;
kiwix::Reader *reader = readers.find(humanReadableBookId) != readers.end() ?
readers.find(humanReadableBookId)->second : NULL;
if (reader == NULL) {
humanReadableBookId="";
}
pthread_mutex_unlock(&mapLock);
/* Tell the client that byte ranges are accepted */
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
/* Get suggestions */
if (!strcmp(url, "/suggest") && reader != NULL) {
response = handle_suggest(connection,
httpResponseCode,
reader,
searcher,
urlStr,
humanReadableBookId,
acceptEncodingDeflate);
}
/* Specify the mime type */
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_TYPE, mimeType.c_str());
/* Get static skin stuff */
else if (urlStr.substr(0, 6) == "/skin/") {
response = handle_skin(connection,
httpResponseCode,
reader,
searcher,
urlStr,
humanReadableBookId,
acceptEncodingDeflate);
}
/* clear context pointer */
*ptr = NULL;
/* Display the search restults */
else if (!strcmp(url, "/search")) {
response = handle_search(connection,
httpResponseCode,
reader,
searcher,
urlStr,
humanReadableBookId,
acceptEncodingDeflate);
}
/* Force to close the connection - cf. 100% CPU usage with v. 4.4 (in Lucid) */
//MHD_add_response_header(response, MHD_HTTP_HEADER_CONNECTION, "close");
/* Display a random article */
else if (!strcmp(url, "/random")) {
response = handle_random(connection,
httpResponseCode,
reader,
searcher,
urlStr,
humanReadableBookId,
acceptEncodingDeflate);
}
/* Allow cross-domain requests */
//MHD_add_response_header(response, MHD_HTTP_HEADER_ACCESS_CONTROL_ALLOW_ORIGIN, "*");
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
/* Display the content of a ZIM content (article, image, ...) */
else if (reader != NULL) {
response = handle_content(connection,
httpResponseCode,
reader,
searcher,
urlStr,
humanReadableBookId,
acceptEncodingDeflate);
}
if (cacheEnabled) { /* Force cache */
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL, "max-age=2723040, public");
} else { /* Prevent cache (for random page) */
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL, "no-cache, no-store, must-revalidate");
/* Display the global Welcome page */
else {
response = handle_default(connection,
httpResponseCode,
reader,
searcher,
urlStr,
humanReadableBookId,
acceptEncodingDeflate);
}
/* Queue the response */

Loading…
Cancel
Save