+ improvement of kiwix-serve to be able to correctly deal with many files at the same time

pull/9/head
kelson42 14 years ago
parent f213d0273d
commit d2b8ebc68d

@ -43,6 +43,7 @@ typedef int off_t;
#include <getopt.h> #include <getopt.h>
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <map>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
@ -60,6 +61,8 @@ typedef int off_t;
using namespace std; using namespace std;
static string welcomeHTML;
static const string HTMLScripts = " \ static const string HTMLScripts = " \
<style type=\"text/css\"> \n \ <style type=\"text/css\"> \n \
\n \ \n \
@ -179,12 +182,13 @@ string urlEncode(const string &c) {
} }
static bool verboseFlag = false; static bool verboseFlag = false;
static kiwix::Reader* reader; static std::map<std::string, kiwix::Reader*> readers;
static kiwix::Searcher* searcher; static std::map<std::string, kiwix::Searcher*> searchers;
static pthread_mutex_t readerLock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t readerLock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t mapLock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t welcomeLock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t searcherLock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t searcherLock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t compressorLock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t compressorLock = PTHREAD_MUTEX_INITIALIZER;
static bool hasSearchIndex = false;
/* For compression */ /* For compression */
#define COMPRESSOR_BUFFER_SIZE 5000000 #define COMPRESSOR_BUFFER_SIZE 5000000
@ -199,7 +203,6 @@ static int accessHandlerCallback(void *cls,
const char * upload_data, const char * upload_data,
size_t * upload_data_size, size_t * upload_data_size,
void ** ptr) { void ** ptr) {
/* Unexpected method */ /* Unexpected method */
if (0 != strcmp(method, "GET")) if (0 != strcmp(method, "GET"))
return MHD_NO; return MHD_NO;
@ -214,7 +217,7 @@ static int accessHandlerCallback(void *cls,
/* Check if the response can be compressed */ /* Check if the response can be compressed */
const string acceptEncodingHeaderValue = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_ACCEPT_ENCODING) ? const string acceptEncodingHeaderValue = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_ACCEPT_ENCODING) ?
MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_ACCEPT_ENCODING) : ""; MHD_lookup_connection_value(connection, MHD_HEADER_KIND, MHD_HTTP_HEADER_ACCEPT_ENCODING) : "";
const bool acceptEncodingDeflate = (!acceptEncodingHeaderValue.empty() && acceptEncodingHeaderValue.find("deflate") != string::npos ? true : false ); const bool acceptEncodingDeflate = !acceptEncodingHeaderValue.empty() && acceptEncodingHeaderValue.find("deflate") != string::npos;
/* Prepare the variables */ /* Prepare the variables */
struct MHD_Response *response; struct MHD_Response *response;
@ -223,8 +226,24 @@ static int accessHandlerCallback(void *cls,
unsigned int contentLength = 0; unsigned int contentLength = 0;
bool found = true; bool found = true;
int httpResponseCode = MHD_HTTP_OK; int httpResponseCode = MHD_HTTP_OK;
std::string urlStr = string(url);
/* Get searcher and reader */
std::string humanReadableBookId = urlStr.substr(1, urlStr.find("/", 1) != string::npos ? urlStr.find("/", 1) - 1 : urlStr.size() - 2);
pthread_mutex_lock(&mapLock);
kiwix::Searcher *searcher = searchers.find(humanReadableBookId) != searchers.end() ?
searchers.find(humanReadableBookId)->second : NULL;
kiwix::Reader *reader = readers.find(humanReadableBookId) != readers.end() ?
readers.find(humanReadableBookId)->second : NULL;
pthread_mutex_unlock(&mapLock);
if (!strcmp(url, "/search") && hasSearchIndex) { if (!humanReadableBookId.empty()) {
urlStr = urlStr.substr(urlStr.find("/", 1) != string::npos ? urlStr.find("/", 1) : humanReadableBookId.size());
}
/* Display the search restults */
if (!strcmp(url, "/search") && searcher != NULL) {
if (searcher != NULL) {
const char* pattern = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "pattern"); const char* pattern = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "pattern");
const char* start = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "start"); const char* start = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "start");
const char* end = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "end"); const char* end = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "end");
@ -237,12 +256,8 @@ static int accessHandlerCallback(void *cls,
if (end != NULL) if (end != NULL)
endNumber = atoi(end); endNumber = atoi(end);
std::string urlStr; /* Get the results */
std::string titleStr;
/* Mutex lock */
pthread_mutex_lock(&searcherLock); pthread_mutex_lock(&searcherLock);
try { try {
std::string patternString = string(pattern); std::string patternString = string(pattern);
searcher->search(patternString, startNumber, endNumber, verboseFlag); searcher->search(patternString, startNumber, endNumber, verboseFlag);
@ -251,26 +266,18 @@ static int accessHandlerCallback(void *cls,
} catch (const std::exception& e) { } catch (const std::exception& e) {
std::cerr << e.what() << std::endl; std::cerr << e.what() << std::endl;
} }
pthread_mutex_unlock(&searcherLock);
content += "</body></html>\n"; content += "</body></html>\n";
mimeType = "text/html; charset=utf-8"; mimeType = "text/html; charset=utf-8";
/* Mutex unlock */
pthread_mutex_unlock(&searcherLock);
} else { } else {
content = "<html><head><title>Error</title></head><body><h1>Unable to find a full text search index for this content.</h1></body></html>";
}
}
/* urlstr */ /* Display the content of a ZIM article */
std::string urlStr = string(url); else if (reader != NULL) {
/* Mutex Lock */
pthread_mutex_lock(&readerLock); pthread_mutex_lock(&readerLock);
/* Load the article from the ZIM file */
if (verboseFlag)
cout << "Loading '" << urlStr << "'... " << endl;
try { try {
found = reader->getContentByUrl(urlStr, content, contentLength, mimeType); found = reader->getContentByUrl(urlStr, content, contentLength, mimeType);
@ -291,16 +298,26 @@ static int accessHandlerCallback(void *cls,
} catch (const std::exception& e) { } catch (const std::exception& e) {
std::cerr << e.what() << std::endl; std::cerr << e.what() << std::endl;
} }
/* Mutex unlock */
pthread_mutex_unlock(&readerLock); pthread_mutex_unlock(&readerLock);
}
/* Rewrite the content (add the search box) */ /* Rewrite the content (add the search box) */
if (hasSearchIndex && mimeType.find("text/html") != string::npos) { if (mimeType.find("text/html") != string::npos) {
/* Special rewrite URL in case of ZIM file use intern *asbolute* url like /A/Kiwix */
replaceRegex(content, "$1=\"/" + humanReadableBookId + "/$3/", "(href|src)(=\"/)([A-Z|\-])/");
if (searcher != NULL) {
appendToFirstOccurence(content, "<head>", HTMLScripts); appendToFirstOccurence(content, "<head>", HTMLScripts);
appendToFirstOccurence(content, "<body[^>]*>", HTMLDiv); appendToFirstOccurence(content, "<body[^>]*>", HTMLDiv);
} }
}
}
/* Display the global Welcome page */
else {
pthread_mutex_lock(&welcomeLock);
content = welcomeHTML;
pthread_mutex_unlock(&welcomeLock);
}
/* Compute the lengh */ /* Compute the lengh */
contentLength = content.size(); contentLength = content.size();
@ -351,6 +368,7 @@ int main(int argc, char **argv) {
struct MHD_Daemon *daemon; struct MHD_Daemon *daemon;
string zimPath; string zimPath;
string libraryPath; string libraryPath;
string templatePath;
string indexPath; string indexPath;
string rootPath; string rootPath;
int serverPort = 80; int serverPort = 80;
@ -423,9 +441,7 @@ int main(int argc, char **argv) {
exit(1); exit(1);
} }
void *page = NULL; /* Setup the library manager and get the list of books */
/* Setup the library manager */
if (libraryFlag) { if (libraryFlag) {
try { try {
libraryManager.readFile(libraryPath, true); libraryManager.readFile(libraryPath, true);
@ -434,25 +450,11 @@ int main(int argc, char **argv) {
exit(1); exit(1);
} }
/* Get a ZIM file path */ /* Check if the library is not empty (or only remote books)*/
/* TODO: This currently work only with one content in the library */ if (libraryManager.getBookCount(true, false)==0) {
kiwix::Book currentBook; cerr << "The XML library file '" << libraryPath << "' is empty (or has only remote books)." << endl;
vector<string> booksIds = libraryManager.getBooksIds();
vector<string>::iterator itr;
for ( itr = booksIds.begin(); itr != booksIds.end(); ++itr ) {
libraryManager.getBookById(*itr, currentBook);
cout << currentBook.getHumanReadableIdFromPath() << endl;
}
if (libraryManager.getCurrentBook(currentBook)) {
zimPath = currentBook.path;
indexPath = currentBook.indexPath;
} else {
cerr << "The XML library file '" << libraryPath << "' is empty." << endl;
exit(1); exit(1);
} }
} else { } else {
if (!libraryManager.addBookFromPath(zimPath, zimPath, "", false)) { if (!libraryManager.addBookFromPath(zimPath, zimPath, "", false)) {
cerr << "Unable to add the ZIM file '" << libraryPath << "' to the internal library." << endl; cerr << "Unable to add the ZIM file '" << libraryPath << "' to the internal library." << endl;
@ -460,40 +462,7 @@ int main(int argc, char **argv) {
} }
} }
/* Instanciate the ZIM file handler */ /* Try to load the result template */
try {
reader = new kiwix::Reader(zimPath);
} catch (...) {
cerr << "Unable to open the ZIM file '" << zimPath << "'." << endl;
exit(1);
}
/* Instanciate the ZIM index (if necessary) */
if (indexPath != "") {
/* Try with the XapianSearcher */
try {
searcher = new kiwix::XapianSearcher(indexPath);
hasSearchIndex = true;
} catch (...) {
cerr << "Unable to open the search index '" << zimPath << "' with the XapianSearcher." << endl;
}
#ifndef _WIN32
/* Try with the CluceneSearcher */
if (!hasSearchIndex) {
try {
searcher = new kiwix::CluceneSearcher(indexPath);
hasSearchIndex = true;
} catch (...) {
cerr << "Unable to open the search index '" << zimPath << "' with the CluceneSearcher." << endl;
exit(1);
}
}
#endif
/* searcher configuration */
if (hasSearchIndex) {
/* Change the current dir to binary dir */ /* Change the current dir to binary dir */
/* Non portable linux solution */ /* Non portable linux solution */
@ -503,7 +472,6 @@ int main(int argc, char **argv) {
chdir(removeLastPathElement(rootPath).c_str()); chdir(removeLastPathElement(rootPath).c_str());
#endif #endif
/* Try to load the result template */
try { try {
#ifdef _WIN32 #ifdef _WIN32
@ -517,9 +485,8 @@ int main(int argc, char **argv) {
vector<string>::const_iterator templatePathsIt; vector<string>::const_iterator templatePathsIt;
bool templateFound = false; bool templateFound = false;
for(templatePathsIt=templatePaths.begin(); !templateFound && templatePathsIt != templatePaths.end(); templatePathsIt++) { for(templatePathsIt=templatePaths.begin(); !templateFound && templatePathsIt != templatePaths.end(); templatePathsIt++) {
string templatePath = computeAbsolutePath(removeLastPathElement(rootPath), *templatePathsIt); templatePath = computeAbsolutePath(removeLastPathElement(rootPath), *templatePathsIt);
if (fileExists(templatePath)) { if (fileExists(templatePath)) {
searcher->setResultTemplatePath(templatePath);
templateFound = true; templateFound = true;
} }
} }
@ -531,13 +498,69 @@ int main(int argc, char **argv) {
exit(1); exit(1);
} }
/* Instance the readers and searcher and build the corresponding maps */
vector<string> booksIds = libraryManager.getBooksIds();
vector<string>::iterator itr;
kiwix::Book currentBook;
for ( itr = booksIds.begin(); itr != booksIds.end(); ++itr ) {
libraryManager.getBookById(*itr, currentBook);
string humanReadableId = currentBook.getHumanReadableIdFromPath();
zimPath = currentBook.path;
indexPath = currentBook.indexPath;
/* Instanciate the ZIM file handler */
kiwix::Reader *reader = NULL;
try {
reader = new kiwix::Reader(zimPath);
} catch (...) {
cerr << "Unable to open the ZIM file '" << zimPath << "'." << endl;
exit(1);
}
readers[humanReadableId] = reader;
/* Instanciate the ZIM index (if necessary) */
kiwix::Searcher *searcher = NULL;
if (indexPath != "") {
bool hasSearchIndex = false;
/* Try with the XapianSearcher */
try {
searcher = new kiwix::XapianSearcher(indexPath);
hasSearchIndex = true;
} catch (...) {
cerr << "Unable to open the search index '" << zimPath << "' with the XapianSearcher." << endl;
}
#ifndef _WIN32
/* Try with the CluceneSearcher */
if (!hasSearchIndex) {
try {
searcher = new kiwix::CluceneSearcher(indexPath);
} catch (...) {
cerr << "Unable to open the search index '" << zimPath << "' with the CluceneSearcher." << endl;
exit(1);
}
}
#endif
searcher->setProtocolPrefix("/"); searcher->setProtocolPrefix("/");
searcher->setSearchProtocolPrefix("/search?"); searcher->setSearchProtocolPrefix("/search?");
searcher->setResultTemplatePath(templatePath);
searchers[humanReadableId] = searcher;
}
} }
} else { /* Compute the Welcome HTML */
hasSearchIndex = false; welcomeHTML = "<html><head><title>Welcome to Kiwix Server</title></head><body><ul>";
for ( itr = booksIds.begin(); itr != booksIds.end(); ++itr ) {
libraryManager.getBookById(*itr, currentBook);
string humanReadableId = currentBook.getHumanReadableIdFromPath();
welcomeHTML += "<p>";
welcomeHTML += "<h1><a href='/" + humanReadableId + "/'>" + currentBook.title + "</a></h1>";
welcomeHTML += "</p><hr/>";
} }
welcomeHTML += "</ol></body></html>";
#ifndef _WIN32 #ifndef _WIN32
/* Fork if necessary */ /* Fork if necessary */
@ -559,10 +582,13 @@ int main(int argc, char **argv) {
/* Mutex init */ /* Mutex init */
pthread_mutex_init(&readerLock, NULL); pthread_mutex_init(&readerLock, NULL);
pthread_mutex_init(&mapLock, NULL);
pthread_mutex_init(&welcomeLock, NULL);
pthread_mutex_init(&searcherLock, NULL); pthread_mutex_init(&searcherLock, NULL);
pthread_mutex_init(&compressorLock, NULL); pthread_mutex_init(&compressorLock, NULL);
/* Start the HTTP daemon */ /* Start the HTTP daemon */
void *page = NULL;
daemon = MHD_start_daemon(MHD_USE_SELECT_INTERNALLY, daemon = MHD_start_daemon(MHD_USE_SELECT_INTERNALLY,
serverPort, serverPort,
NULL, NULL,
@ -579,6 +605,7 @@ int main(int argc, char **argv) {
/* Run endless */ /* Run endless */
bool waiting = true; bool waiting = true;
do { do {
cout << "Waiting a request" << endl;
if (PPID > 0) { if (PPID > 0) {
#ifdef _WIN32 #ifdef _WIN32

Loading…
Cancel
Save