forked from kiwix/kiwix-tools
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1195 lines
38 KiB
1195 lines
38 KiB
/*
|
|
* Copyright 2009-2016 Emmanuel Engelhart <kelson@kiwix.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 3 of the License, or
|
|
* any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
* MA 02110-1301, USA.
|
|
*/
|
|
|
|
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
|
|
|
|
#ifdef __APPLE__
|
|
#import <sys/sysctl.h>
|
|
#import <sys/types.h>
|
|
#define MIBSIZE 4
|
|
#endif
|
|
|
|
#ifdef _WIN32
|
|
|
|
#if !defined(__MINGW32__) && (_MSC_VER < 1600)
|
|
#include "stdint4win.h"
|
|
#endif
|
|
#include <winsock2.h>
|
|
#include <ws2tcpip.h> // otherwise socklen_t is not a recognized type
|
|
//#include <Windows.h> // otherwise int is not a recognized type
|
|
// typedef int off_t;
|
|
// typedef SSIZE_T ssize_t;
|
|
typedef UINT64 uint64_t;
|
|
typedef UINT16 uint16_t;
|
|
extern "C" {
|
|
#include <microhttpd.h>
|
|
}
|
|
|
|
#endif
|
|
|
|
#include <getopt.h>
|
|
#include <kiwix/common/otherTools.h>
|
|
#include <kiwix/common/pathTools.h>
|
|
#include <kiwix/common/regexTools.h>
|
|
#include <kiwix/common/stringTools.h>
|
|
#include <kiwix/manager.h>
|
|
#include <kiwix/reader.h>
|
|
#include <kiwix/searcher.h>
|
|
#include <pthread.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <zim/article.h>
|
|
#include <zim/file.h>
|
|
#include <zim/fileiterator.h>
|
|
#include <zim/zim.h>
|
|
#include <zlib.h>
|
|
#include <atomic>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <iostream>
|
|
#include <map>
|
|
#include <sstream>
|
|
#include <thread>
|
|
#include <string>
|
|
#include <vector>
|
|
#include "server-resources.h"
|
|
|
|
#ifndef _WIN32
|
|
#include <arpa/inet.h>
|
|
#include <ifaddrs.h>
|
|
#include <microhttpd.h>
|
|
#include <netdb.h>
|
|
#include <stdint.h>
|
|
#include <sys/socket.h>
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#include "request_context.h"
|
|
|
|
#ifdef interface
|
|
#undef interface
|
|
#endif
|
|
|
|
using namespace std;
|
|
|
|
static bool noLibraryButtonFlag = false;
|
|
static bool noSearchBarFlag = false;
|
|
static string welcomeHTML;
|
|
static std::atomic_bool isVerbose(false);
|
|
static std::string rootLocation = "";
|
|
static std::map<std::string, std::string> extMimeTypes;
|
|
static std::map<std::string, kiwix::Reader*> readers;
|
|
static std::map<std::string, kiwix::Searcher*> searchers;
|
|
static kiwix::Searcher* globalSearcher = nullptr;
|
|
static pthread_mutex_t searchLock = PTHREAD_MUTEX_INITIALIZER;
|
|
static pthread_mutex_t compressorLock = PTHREAD_MUTEX_INITIALIZER;
|
|
static pthread_mutex_t regexLock = PTHREAD_MUTEX_INITIALIZER;
|
|
|
|
/* Try to get the mimeType from the file extension */
|
|
static std::string getMimeTypeForFile(const std::string& filename)
|
|
{
|
|
std::string mimeType = "text/plain";
|
|
unsigned int pos = filename.find_last_of(".");
|
|
|
|
if (pos != std::string::npos) {
|
|
std::string extension = filename.substr(pos + 1);
|
|
|
|
auto it = extMimeTypes.find(extension);
|
|
if (it != extMimeTypes.end()) {
|
|
mimeType = it->second;
|
|
} else {
|
|
it = extMimeTypes.find(kiwix::lcAll(extension));
|
|
if (it != extMimeTypes.end()) {
|
|
mimeType = it->second;
|
|
}
|
|
}
|
|
}
|
|
|
|
return mimeType;
|
|
}
|
|
|
|
|
|
static bool startswith(const std::string& base, const std::string& start)
|
|
{
|
|
return start.length() <= base.length()
|
|
&& std::equal(start.begin(), start.end(), base.begin());
|
|
}
|
|
|
|
|
|
void introduceTaskbar(string& content, const string& humanReadableBookId)
|
|
{
|
|
pthread_mutex_lock(®exLock);
|
|
if (!noSearchBarFlag) {
|
|
content = appendToFirstOccurence(
|
|
content,
|
|
"<head>",
|
|
RESOURCE::include_html_part + (noLibraryButtonFlag
|
|
? "<style>#kiwix_serve_taskbar_library_button { display: none }</style>"
|
|
: "")
|
|
);
|
|
if ( humanReadableBookId.empty() ) {
|
|
content = appendToFirstOccurence(
|
|
content,
|
|
"<body[^>]*>",
|
|
RESOURCE::global_taskbar_html_part);
|
|
} else {
|
|
content = appendToFirstOccurence(
|
|
content,
|
|
"<body[^>]*>",
|
|
replaceRegex(
|
|
RESOURCE::taskbar_html_part,
|
|
humanReadableBookId,
|
|
"__CONTENT__"));
|
|
}
|
|
content = replaceRegex(content, rootLocation, "__ROOT_LOCATION__");
|
|
content = replaceRegex(content, replaceRegex(humanReadableBookId, "%26", "&"), "__CONTENT_ESCAPED__");
|
|
}
|
|
pthread_mutex_unlock(®exLock);
|
|
}
|
|
|
|
static bool compress_content(string& content, const string& mimeType)
|
|
{
|
|
static std::vector<Bytef> compr_buffer;
|
|
|
|
/* Should be deflate */
|
|
bool deflated = mimeType.find("text/") != string::npos
|
|
|| mimeType.find("application/javascript") != string::npos
|
|
|| mimeType.find("application/json") != string::npos;
|
|
|
|
if (!deflated)
|
|
return false;
|
|
|
|
/* Compute the lengh */
|
|
unsigned int contentLength = content.size();
|
|
|
|
/* If the content is too short, no need to compress it */
|
|
if (contentLength <= KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE)
|
|
return false;
|
|
|
|
uLong bufferBound = compressBound(contentLength);
|
|
|
|
/* Compress the content if necessary */
|
|
pthread_mutex_lock(&compressorLock);
|
|
compr_buffer.reserve(bufferBound);
|
|
uLongf comprLen = compr_buffer.capacity();
|
|
int err = compress(&compr_buffer[0],
|
|
&comprLen,
|
|
(const Bytef*)(content.data()),
|
|
contentLength);
|
|
|
|
if (err == Z_OK && comprLen > 2 && comprLen < (contentLength + 2)) {
|
|
/* /!\ Internet Explorer has a bug with deflate compression.
|
|
It can not handle the first two bytes (compression headers)
|
|
We need to chunk them off (move the content 2bytes)
|
|
It has no incidence on other browsers
|
|
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
|
|
content = string((char*)&compr_buffer[2], comprLen - 2);
|
|
} else {
|
|
deflated = false;
|
|
}
|
|
|
|
pthread_mutex_unlock(&compressorLock);
|
|
return deflated;
|
|
}
|
|
|
|
|
|
static struct MHD_Response* build_response(const void* data,
|
|
unsigned int length,
|
|
const std::string& httpRedirection,
|
|
const std::string& mimeType,
|
|
bool deflated,
|
|
bool cacheEnabled)
|
|
{
|
|
/* Create the response */
|
|
struct MHD_Response* response = MHD_create_response_from_data(
|
|
length, const_cast<void*>(data), MHD_NO, MHD_YES);
|
|
|
|
/* Make a redirection if necessary otherwise send the content */
|
|
if (!httpRedirection.empty()) {
|
|
MHD_add_response_header(
|
|
response, MHD_HTTP_HEADER_LOCATION, httpRedirection.c_str());
|
|
} else {
|
|
/* Add if necessary the content-encoding */
|
|
if (deflated) {
|
|
MHD_add_response_header(
|
|
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
|
|
MHD_add_response_header(
|
|
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
|
|
}
|
|
|
|
/* Tell the client that byte ranges are accepted */
|
|
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
|
|
|
|
/* Specify the mime type */
|
|
MHD_add_response_header(
|
|
response, MHD_HTTP_HEADER_CONTENT_TYPE, mimeType.c_str());
|
|
}
|
|
|
|
/* Force to close the connection - cf. 100% CPU usage with v. 4.4 (in Lucid)
|
|
*/
|
|
// MHD_add_response_header(response, MHD_HTTP_HEADER_CONNECTION, "close");
|
|
|
|
/* Allow cross-domain requests */
|
|
// MHD_add_response_header(response,
|
|
// MHD_HTTP_HEADER_ACCESS_CONTROL_ALLOW_ORIGIN, "*");
|
|
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
|
|
|
|
if (cacheEnabled) { /* Force cache */
|
|
MHD_add_response_header(
|
|
response, MHD_HTTP_HEADER_CACHE_CONTROL, "max-age=2723040, public");
|
|
} else { /* Prevent cache (for random page) */
|
|
MHD_add_response_header(response,
|
|
MHD_HTTP_HEADER_CACHE_CONTROL,
|
|
"no-cache, no-store, must-revalidate");
|
|
}
|
|
return response;
|
|
}
|
|
|
|
|
|
static struct MHD_Response* build_404(RequestContext* request,
|
|
const std::string& humanReadableBookId) {
|
|
std::string content
|
|
= "<!DOCTYPE html>\n<html><head><meta "
|
|
"content=\"text/html;charset=UTF-8\" http-equiv=\"content-type\" "
|
|
"/><title>Content not found</title></head><body><h1>Not "
|
|
"Found</h1><p>The requested URL \""
|
|
+ request->get_full_url() + "\" was not found on this server.</p></body></html>";
|
|
auto mimeType = "text/html";
|
|
request->httpResponseCode = MHD_HTTP_NOT_FOUND;
|
|
introduceTaskbar(content, humanReadableBookId);
|
|
bool deflated
|
|
= request->can_compress() && compress_content(content, mimeType);
|
|
return build_response(
|
|
content.data(), content.size(), "", mimeType, deflated, false);
|
|
}
|
|
|
|
static struct MHD_Response* build_homepage(RequestContext* request)
|
|
{
|
|
std::string content = welcomeHTML;
|
|
|
|
std::string mimeType = "text/html; charset=utf-8";
|
|
|
|
bool deflated = request->can_compress() && compress_content(content, mimeType);
|
|
return build_response(
|
|
content.data(), content.size(), "", mimeType, deflated, false);
|
|
}
|
|
|
|
struct RunningResponse {
|
|
zim::Article* article;
|
|
int range_start;
|
|
|
|
RunningResponse(zim::Article* article,
|
|
int range_start) :
|
|
article(article),
|
|
range_start(range_start)
|
|
{}
|
|
|
|
~RunningResponse() {
|
|
delete article;
|
|
}
|
|
};
|
|
|
|
|
|
ssize_t callback_reader_from_article(void* cls,
|
|
uint64_t pos,
|
|
char* buf,
|
|
size_t max)
|
|
{
|
|
RunningResponse* response = static_cast<RunningResponse*>(cls);
|
|
|
|
size_t max_size_to_set = min<size_t>(
|
|
max,
|
|
response->article->getArticleSize() - pos - response->range_start);
|
|
|
|
if (max_size_to_set <= 0) {
|
|
return MHD_CONTENT_READER_END_WITH_ERROR;
|
|
}
|
|
|
|
zim::Blob blob = response->article->getData(response->range_start+pos, max_size_to_set);
|
|
memcpy(buf, blob.data(), max_size_to_set);
|
|
return max_size_to_set;
|
|
}
|
|
|
|
void callback_free_article(void* cls)
|
|
{
|
|
RunningResponse* response = static_cast<RunningResponse*>(cls);
|
|
delete response;
|
|
}
|
|
|
|
static struct MHD_Response* build_callback_response_from_article(
|
|
zim::Article& article, int range_start, int range_len, const std::string& mimeType)
|
|
{
|
|
RunningResponse* run_response =
|
|
new RunningResponse(new zim::Article(article), range_start);
|
|
|
|
struct MHD_Response* response
|
|
= MHD_create_response_from_callback(article.getArticleSize(),
|
|
16384,
|
|
callback_reader_from_article,
|
|
run_response,
|
|
callback_free_article);
|
|
/* Tell the client that byte ranges are accepted */
|
|
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
|
|
std::ostringstream oss;
|
|
oss << "bytes " << range_start << "-" << range_start + range_len - 1 << "/" << article.getArticleSize();
|
|
|
|
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str());
|
|
|
|
MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_LENGTH,
|
|
std::to_string(range_len).c_str());
|
|
|
|
/* Specify the mime type */
|
|
MHD_add_response_header(
|
|
response, MHD_HTTP_HEADER_CONTENT_TYPE, mimeType.c_str());
|
|
|
|
/* Allow cross-domain requests */
|
|
// MHD_add_response_header(response,
|
|
// MHD_HTTP_HEADER_ACCESS_CONTROL_ALLOW_ORIGIN, "*");
|
|
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
|
|
|
|
MHD_add_response_header(
|
|
response, MHD_HTTP_HEADER_CACHE_CONTROL, "max-age=2723040, public");
|
|
|
|
return response;
|
|
}
|
|
|
|
std::pair<kiwix::Reader*, kiwix::Searcher*>
|
|
get_from_humanReadableBookId(const std::string& humanReadableBookId) {
|
|
kiwix::Searcher* searcher
|
|
= searchers.find(humanReadableBookId) != searchers.end()
|
|
? searchers.find(humanReadableBookId)->second
|
|
: globalSearcher;
|
|
kiwix::Reader* reader = readers.find(humanReadableBookId) != readers.end()
|
|
? readers.find(humanReadableBookId)->second
|
|
: NULL;
|
|
return std::pair<kiwix::Reader*, kiwix::Searcher*>(reader, searcher);
|
|
}
|
|
|
|
static struct MHD_Response* handle_suggest(RequestContext* request)
|
|
{
|
|
if (isVerbose.load()) {
|
|
printf("** running handle_suggest\n");
|
|
}
|
|
|
|
std::string content;
|
|
std::string mimeType;
|
|
unsigned int maxSuggestionCount = 10;
|
|
unsigned int suggestionCount = 0;
|
|
std::string suggestion;
|
|
|
|
std::string humanReadableBookId;
|
|
std::string term;
|
|
try {
|
|
humanReadableBookId = request->get_argument("content");
|
|
term = request->get_argument("term");
|
|
} catch (const std::out_of_range&) {
|
|
return build_homepage(request);
|
|
}
|
|
|
|
if (isVerbose.load()) {
|
|
printf("Searching suggestions for: \"%s\"\n", term.c_str());
|
|
}
|
|
|
|
auto reader_searcher = get_from_humanReadableBookId(humanReadableBookId);
|
|
auto reader = reader_searcher.first;
|
|
auto searcher = reader_searcher.second;
|
|
|
|
pthread_mutex_lock(&searchLock);
|
|
/* Get the suggestions */
|
|
content = "[";
|
|
if (reader != nullptr) {
|
|
/* Get the suggestions */
|
|
reader->searchSuggestionsSmart(term, maxSuggestionCount);
|
|
while (reader->getNextSuggestion(suggestion)) {
|
|
kiwix::stringReplacement(suggestion, "\"", "\\\"");
|
|
content += (content == "[" ? "" : ",");
|
|
content += "{\"value\":\"" + suggestion + "\",\"label\":\"" + suggestion
|
|
+ "\"}";
|
|
suggestionCount++;
|
|
}
|
|
}
|
|
pthread_mutex_unlock(&searchLock);
|
|
|
|
/* Propose the fulltext search if possible */
|
|
if (searcher != NULL) {
|
|
content += (suggestionCount == 0 ? "" : ",");
|
|
content += "{\"value\":\"" + term
|
|
+ " \", \"label\":\"containing '" + term
|
|
+ "'...\"}";
|
|
}
|
|
|
|
content += "]";
|
|
mimeType = "application/json; charset=utf-8";
|
|
bool deflated = request->can_compress() && compress_content(content, mimeType);
|
|
return build_response(
|
|
content.data(), content.size(), "", mimeType, deflated, true);
|
|
}
|
|
|
|
static struct MHD_Response* handle_skin(RequestContext* request)
|
|
{
|
|
if (isVerbose.load()) {
|
|
printf("** running handle_skin\n");
|
|
}
|
|
|
|
std::string content;
|
|
auto resourceName = request->get_url().substr(6);
|
|
try {
|
|
content = getResource(resourceName);
|
|
} catch (const ResourceNotFound& e) {
|
|
return build_404(request, "");
|
|
}
|
|
std::string mimeType = getMimeTypeForFile(resourceName);
|
|
bool deflated = request->can_compress() && compress_content(content, mimeType);
|
|
return build_response(
|
|
content.data(), content.size(), "", mimeType, deflated, true);
|
|
}
|
|
|
|
static struct MHD_Response* handle_search(RequestContext* request)
|
|
{
|
|
if (isVerbose.load()) {
|
|
printf("** running handle_search\n");
|
|
}
|
|
|
|
std::string content;
|
|
std::string mimeType;
|
|
std::string httpRedirection;
|
|
|
|
std::string humanReadableBookId;
|
|
std::string patternString;
|
|
try {
|
|
humanReadableBookId = request->get_argument("content");
|
|
patternString = request->get_argument("pattern");
|
|
} catch (const std::out_of_range&) {
|
|
return build_homepage(request);
|
|
}
|
|
|
|
/* Retrive geo search */
|
|
bool has_geo_query = false;
|
|
float latitude;
|
|
float longitude;
|
|
float distance;
|
|
try {
|
|
latitude = request->get_argument<float>("latitude");
|
|
longitude = request->get_argument<float>("longitude");
|
|
distance = request->get_argument<float>("distance");
|
|
has_geo_query = true;
|
|
} catch(const std::out_of_range&) {}
|
|
catch(const std::invalid_argument&) {}
|
|
|
|
/* Search results for searches from the welcome page should not
|
|
be cached
|
|
*/
|
|
auto reader_searcher = get_from_humanReadableBookId(humanReadableBookId);
|
|
auto reader = reader_searcher.first;
|
|
auto searcher = reader_searcher.second;
|
|
bool cacheEnabled = !(searcher == globalSearcher);
|
|
|
|
/* Try first to load directly the article */
|
|
if (reader != nullptr) {
|
|
std::string patternCorrespondingUrl;
|
|
auto variants = reader->getTitleVariants(patternString);
|
|
auto variantsItr = variants.begin();
|
|
|
|
while (patternCorrespondingUrl.empty() && variantsItr != variants.end()) {
|
|
reader->getPageUrlFromTitle(*variantsItr, patternCorrespondingUrl);
|
|
variantsItr++;
|
|
}
|
|
|
|
/* If article found then redirect directly to it */
|
|
if (!patternCorrespondingUrl.empty()) {
|
|
httpRedirection
|
|
= rootLocation + "/" + humanReadableBookId + "/" + patternCorrespondingUrl;
|
|
request->httpResponseCode = MHD_HTTP_FOUND;
|
|
return build_response("", 0, httpRedirection, "", false, true);
|
|
}
|
|
}
|
|
|
|
/* Make the search */
|
|
if (reader_searcher.second != nullptr &&
|
|
(!patternString.empty() || has_geo_query)) {
|
|
auto start = 0;
|
|
try {
|
|
start = request->get_argument<unsigned int>("start");
|
|
} catch (const std::exception&) {}
|
|
auto end = 25;
|
|
try {
|
|
end = request->get_argument<unsigned int>("end");
|
|
} catch (const std::exception&) {}
|
|
|
|
/* Get the results */
|
|
pthread_mutex_lock(&searchLock);
|
|
try {
|
|
if (patternString.empty()) {
|
|
searcher->geo_search(latitude, longitude, distance,
|
|
start, end, isVerbose.load());
|
|
} else {
|
|
searcher->search(patternString,
|
|
start, end, isVerbose.load());
|
|
}
|
|
content = searcher->getHtml();
|
|
} catch (const std::exception& e) {
|
|
std::cerr << e.what() << std::endl;
|
|
}
|
|
pthread_mutex_unlock(&searchLock);
|
|
} else {
|
|
content = "<!DOCTYPE html>\n<html><head><meta content=\"text/html;charset=UTF-8\" http-equiv=\"content-type\" /><title>Fulltext search unavailable</title></head><body><h1>Not Found</h1><p>There is no article with the title <b>\"" + kiwix::encodeDiples(patternString) + "\"</b> and the fulltext search engine is not available for this content.</p></body></html>";
|
|
request->httpResponseCode = MHD_HTTP_NOT_FOUND;
|
|
}
|
|
|
|
mimeType = "text/html; charset=utf-8";
|
|
|
|
introduceTaskbar(content, humanReadableBookId);
|
|
|
|
bool deflated = request->can_compress() && compress_content(content, mimeType);
|
|
return build_response(content.data(),
|
|
content.size(),
|
|
httpRedirection,
|
|
mimeType,
|
|
deflated,
|
|
cacheEnabled);
|
|
}
|
|
|
|
static struct MHD_Response* handle_random(RequestContext* request)
|
|
{
|
|
if (isVerbose.load()) {
|
|
printf("** running handle_random\n");
|
|
}
|
|
|
|
std::string httpRedirection;
|
|
request->httpResponseCode = MHD_HTTP_FOUND;
|
|
std::string humanReadableBookId;
|
|
try {
|
|
humanReadableBookId = request->get_argument("content");
|
|
} catch (const std::out_of_range&) {
|
|
return build_homepage(request);
|
|
}
|
|
|
|
auto reader = get_from_humanReadableBookId(humanReadableBookId).first;
|
|
if (reader == nullptr) {
|
|
return build_homepage(request);
|
|
}
|
|
|
|
std::string randomUrl = reader->getRandomPageUrl();
|
|
httpRedirection
|
|
= rootLocation + "/" + humanReadableBookId + "/" + kiwix::urlEncode(randomUrl);
|
|
return build_response("", 0, httpRedirection, "", false, false);
|
|
}
|
|
|
|
static struct MHD_Response* handle_content(RequestContext* request)
|
|
{
|
|
if (isVerbose.load()) {
|
|
printf("** running handle_content\n");
|
|
}
|
|
|
|
std::string baseUrl;
|
|
std::string content;
|
|
std::string mimeType;
|
|
|
|
bool found = false;
|
|
zim::Article article;
|
|
|
|
std::string humanReadableBookId;
|
|
try {
|
|
humanReadableBookId = request->get_url_part(0);
|
|
} catch (const std::out_of_range& e) {
|
|
return build_homepage(request);
|
|
}
|
|
|
|
auto reader = get_from_humanReadableBookId(humanReadableBookId).first;
|
|
if (reader == nullptr) {
|
|
return build_homepage(request);
|
|
}
|
|
|
|
auto urlStr = request->get_url().substr(humanReadableBookId.size()+1);
|
|
|
|
try {
|
|
found = reader->getArticleObjectByDecodedUrl(urlStr, article);
|
|
|
|
if (found) {
|
|
/* If redirect */
|
|
unsigned int loopCounter = 0;
|
|
while (article.isRedirect() && loopCounter++ < 42) {
|
|
article = article.getRedirectArticle();
|
|
}
|
|
|
|
/* To many loop */
|
|
if (loopCounter == 42)
|
|
found = false;
|
|
}
|
|
} catch (const std::exception& e) {
|
|
std::cerr << e.what() << std::endl;
|
|
found = false;
|
|
}
|
|
|
|
if (!found) {
|
|
if (isVerbose.load())
|
|
printf("Failed to find %s\n", urlStr.c_str());
|
|
|
|
return build_404(request, humanReadableBookId);
|
|
}
|
|
|
|
try {
|
|
mimeType = article.getMimeType();
|
|
} catch (exception& e) {
|
|
mimeType = "application/octet-stream";
|
|
}
|
|
|
|
if (isVerbose.load()) {
|
|
printf("Found %s\n", urlStr.c_str());
|
|
printf("mimeType: %s\n", mimeType.c_str());
|
|
}
|
|
|
|
if (mimeType.find("text/") != string::npos
|
|
|| mimeType.find("application/javascript") != string::npos
|
|
|| mimeType.find("application/json") != string::npos) {
|
|
zim::Blob raw_content = article.getData();
|
|
content = string(raw_content.data(), raw_content.size());
|
|
|
|
/* Special rewrite URL in case of ZIM file use intern *asbolute* url like
|
|
* /A/Kiwix */
|
|
if (mimeType.find("text/html") != string::npos) {
|
|
baseUrl = "/" + std::string(1, article.getNamespace()) + "/"
|
|
+ article.getUrl();
|
|
pthread_mutex_lock(®exLock);
|
|
content = replaceRegex(content,
|
|
"$1$2" + rootLocation + "/" + humanReadableBookId + "/$3/",
|
|
"(href|src)(=[\"|\']{0,1})/([A-Z|\\-])/");
|
|
content = replaceRegex(content,
|
|
"$1$2" + rootLocation + "/" + humanReadableBookId + "/$3/",
|
|
"(@import[ ]+)([\"|\']{0,1})/([A-Z|\\-])/");
|
|
content = replaceRegex(
|
|
content,
|
|
"<head><base href=\"" + rootLocation + "/" + humanReadableBookId + baseUrl + "\" />",
|
|
"<head>");
|
|
pthread_mutex_unlock(®exLock);
|
|
introduceTaskbar(content, humanReadableBookId);
|
|
} else if (mimeType.find("text/css") != string::npos) {
|
|
pthread_mutex_lock(®exLock);
|
|
content = replaceRegex(content,
|
|
"$1$2" + rootLocation + "/" + humanReadableBookId + "/$3/",
|
|
"(url|URL)(\\([\"|\']{0,1})/([A-Z|\\-])/");
|
|
pthread_mutex_unlock(®exLock);
|
|
}
|
|
|
|
bool deflated
|
|
= request->can_compress() && compress_content(content, mimeType);
|
|
return build_response(
|
|
content.data(), content.size(), "", mimeType, deflated, true);
|
|
} else {
|
|
int range_len;
|
|
if (request->get_range().second == -1) {
|
|
range_len = article.getArticleSize() - request->get_range().first;
|
|
} else {
|
|
range_len = request->get_range().second - request->get_range().first;
|
|
}
|
|
return build_callback_response_from_article(
|
|
article,
|
|
request->get_range().first,
|
|
range_len,
|
|
mimeType);
|
|
}
|
|
}
|
|
|
|
int print_key_value (void *cls, enum MHD_ValueKind kind,
|
|
const char *key, const char *value)
|
|
{
|
|
printf (" - %s: '%s'\n", key, value);
|
|
return MHD_YES;
|
|
}
|
|
|
|
static int accessHandlerCallback(void* cls,
|
|
struct MHD_Connection* connection,
|
|
const char* url,
|
|
const char* method,
|
|
const char* version,
|
|
const char* upload_data,
|
|
size_t* upload_data_size,
|
|
void** ptr)
|
|
{
|
|
RequestContext request(connection, rootLocation, url, method, version);
|
|
/* Unexpected method */
|
|
if (request.get_method() != RequestMethod::GET && request.get_method() != RequestMethod::POST) {
|
|
return MHD_NO;
|
|
}
|
|
|
|
if (isVerbose.load()) {
|
|
printf("======================\n");
|
|
request.print_debug_info();
|
|
}
|
|
|
|
/* Prepare the variables */
|
|
struct MHD_Response* response;
|
|
request.httpResponseCode = request.has_range() ? MHD_HTTP_PARTIAL_CONTENT : MHD_HTTP_OK;
|
|
|
|
if (! request.is_valid_url()) {
|
|
response = build_homepage(&request);
|
|
} else {
|
|
if (startswith(request.get_url(), "/skin/")) {
|
|
response = handle_skin(&request);
|
|
} else if (request.get_url() == "/search") {
|
|
response = handle_search(&request);
|
|
} else if (request.get_url() == "/suggest") {
|
|
response = handle_suggest(&request);
|
|
} else if (request.get_url() == "/random") {
|
|
response = handle_random(&request);
|
|
} else {
|
|
response = handle_content(&request);
|
|
}
|
|
}
|
|
|
|
/* Queue the response */
|
|
if (isVerbose.load()) {
|
|
printf("Response :\n");
|
|
printf("httpResponseCode : %d\n", request.httpResponseCode);
|
|
printf("headers :\n");
|
|
MHD_get_response_headers(response, print_key_value, nullptr);
|
|
printf("----------------------\n");
|
|
}
|
|
int ret = MHD_queue_response(connection, request.httpResponseCode, response);
|
|
MHD_destroy_response(response);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int main(int argc, char** argv)
|
|
{
|
|
struct MHD_Daemon* daemon;
|
|
vector<string> zimPathes;
|
|
string libraryPath;
|
|
string indexPath;
|
|
string rootPath;
|
|
string interface;
|
|
int serverPort = 80;
|
|
int daemonFlag = false;
|
|
int libraryFlag = false;
|
|
string PPIDString;
|
|
unsigned int PPID = 0;
|
|
unsigned int nb_threads = std::thread::hardware_concurrency();
|
|
kiwix::Manager libraryManager;
|
|
|
|
static struct option long_options[]
|
|
= {{"daemon", no_argument, 0, 'd'},
|
|
{"verbose", no_argument, 0, 'v'},
|
|
{"library", no_argument, 0, 'l'},
|
|
{"nolibrarybutton", no_argument, 0, 'm'},
|
|
{"nosearchbar", no_argument, 0, 'n'},
|
|
{"index", required_argument, 0, 'i'},
|
|
{"attachToProcess", required_argument, 0, 'a'},
|
|
{"port", required_argument, 0, 'p'},
|
|
{"interface", required_argument, 0, 'f'},
|
|
{"threads", required_argument, 0, 't'},
|
|
{"urlRootLocation", required_argument, 0, 'r'},
|
|
{0, 0, 0, 0}};
|
|
|
|
/* Argument parsing */
|
|
while (true) {
|
|
int option_index = 0;
|
|
int c
|
|
= getopt_long(argc, argv, "mndvli:a:p:f:t:r:", long_options, &option_index);
|
|
|
|
if (c != -1) {
|
|
switch (c) {
|
|
case 'd':
|
|
daemonFlag = true;
|
|
break;
|
|
case 'v':
|
|
isVerbose.store(true);
|
|
break;
|
|
case 'l':
|
|
libraryFlag = true;
|
|
break;
|
|
case 'n':
|
|
noSearchBarFlag = true;
|
|
break;
|
|
case 'm':
|
|
noLibraryButtonFlag = true;
|
|
break;
|
|
case 'i':
|
|
indexPath = optarg;
|
|
break;
|
|
case 'p':
|
|
serverPort = atoi(optarg);
|
|
break;
|
|
case 'a':
|
|
PPIDString = string(optarg);
|
|
PPID = atoi(optarg);
|
|
break;
|
|
case 'f':
|
|
interface = string(optarg);
|
|
break;
|
|
case 't':
|
|
nb_threads = atoi(optarg);
|
|
break;
|
|
case 'r':
|
|
rootLocation = string(optarg);
|
|
|
|
/* prepend prefix "/" if not provided*/
|
|
if (rootLocation[0] != '/'){
|
|
rootLocation = "/" + rootLocation;
|
|
}
|
|
|
|
/* remove the trailing slash if provided*/
|
|
if (rootLocation.back() == '/'){
|
|
rootLocation.erase(rootLocation.size() - 1);
|
|
}
|
|
|
|
break;
|
|
}
|
|
} else {
|
|
if (optind <= argc) {
|
|
if (libraryFlag) {
|
|
libraryPath = argv[optind++];
|
|
} else {
|
|
while (optind < argc)
|
|
zimPathes.push_back(std::string(argv[optind++]));
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Print usage)) if necessary */
|
|
if (zimPathes.empty() && libraryPath.empty()) {
|
|
cerr << "Usage: kiwix-serve [--index=INDEX_PATH] [--port=PORT] [--verbose] "
|
|
"[--nosearchbar] [--nolibrarybutton] [--daemon] "
|
|
"[--attachToProcess=PID] [--interface=IF_NAME] "
|
|
"[--urlRootLocation=/URL_ROOT] "
|
|
"[--threads=NB_THREAD(" << nb_threads << ")] ZIM_PATH+"
|
|
<< endl;
|
|
cerr << " kiwix-serve --library [--port=PORT] [--verbose] [--daemon] "
|
|
"[--nosearchbar] [--nolibrarybutton] [--attachToProcess=PID] "
|
|
"[--interface=IF_NAME] [--urlRootLocation=/URL_ROOT] "
|
|
"[--threads=NB_THREAD(" << nb_threads << ")] LIBRARY_PATH "
|
|
<< endl;
|
|
cerr << "\n If you set more than one ZIM_PATH, you cannot set a "
|
|
"INDEX_PATH."
|
|
<< endl;
|
|
exit(1);
|
|
}
|
|
|
|
if ((zimPathes.size() > 1) && !indexPath.empty()) {
|
|
cerr << "You cannot set a indexPath if you also set several zimPathes";
|
|
exit(1);
|
|
}
|
|
|
|
/* Setup the library manager and get the list of books */
|
|
if (libraryFlag) {
|
|
vector<string> libraryPaths = kiwix::split(libraryPath, ";");
|
|
vector<string>::iterator itr;
|
|
|
|
for (itr = libraryPaths.begin(); itr != libraryPaths.end(); ++itr) {
|
|
if (!itr->empty()) {
|
|
bool retVal = false;
|
|
|
|
try {
|
|
string libraryPath
|
|
= isRelativePath(*itr)
|
|
? computeAbsolutePath(getCurrentDirectory(), *itr)
|
|
: *itr;
|
|
retVal = libraryManager.readFile(libraryPath, true);
|
|
} catch (...) {
|
|
retVal = false;
|
|
}
|
|
|
|
if (!retVal) {
|
|
cerr << "Unable to open the XML library file '" << *itr << "'."
|
|
<< endl;
|
|
exit(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Check if the library is not empty (or only remote books)*/
|
|
if (libraryManager.getBookCount(true, false) == 0) {
|
|
cerr << "The XML library file '" << libraryPath
|
|
<< "' is empty (or has only remote books)." << endl;
|
|
}
|
|
} else {
|
|
std::vector<std::string>::iterator it;
|
|
for (it = zimPathes.begin(); it != zimPathes.end(); it++) {
|
|
if (!libraryManager.addBookFromPath(*it, *it, "", false)) {
|
|
cerr << "Unable to add the ZIM file '" << *it
|
|
<< "' to the internal library." << endl;
|
|
exit(1);
|
|
}
|
|
}
|
|
if (!indexPath.empty()) {
|
|
libraryManager.setBookIndex(libraryManager.getBooksIds()[0], indexPath);
|
|
}
|
|
}
|
|
|
|
/* Instance the readers and searcher and build the corresponding maps */
|
|
vector<string> booksIds = libraryManager.getBooksIds();
|
|
vector<string>::iterator itr;
|
|
kiwix::Book currentBook;
|
|
globalSearcher = new kiwix::Searcher();
|
|
globalSearcher->setProtocolPrefix(rootLocation + "/");
|
|
globalSearcher->setSearchProtocolPrefix(rootLocation + "/" + "search?");
|
|
for (itr = booksIds.begin(); itr != booksIds.end(); ++itr) {
|
|
bool zimFileOk = false;
|
|
libraryManager.getBookById(*itr, currentBook);
|
|
std::string zimPath = currentBook.pathAbsolute;
|
|
|
|
if (!zimPath.empty()) {
|
|
indexPath = currentBook.indexPathAbsolute;
|
|
|
|
/* Instanciate the ZIM file handler */
|
|
kiwix::Reader* reader = NULL;
|
|
try {
|
|
reader = new kiwix::Reader(zimPath);
|
|
zimFileOk = true;
|
|
} catch (...) {
|
|
cerr << "Unable to open the ZIM file '" << zimPath << "'." << endl;
|
|
}
|
|
|
|
if (zimFileOk) {
|
|
string humanReadableId = currentBook.getHumanReadableIdFromPath();
|
|
readers[humanReadableId] = reader;
|
|
|
|
if ( reader->hasFulltextIndex()) {
|
|
kiwix::Searcher* searcher = new kiwix::Searcher();
|
|
searcher->setProtocolPrefix(rootLocation + "/");
|
|
searcher->setSearchProtocolPrefix(rootLocation + "/" + "search?");
|
|
searcher->add_reader(reader, humanReadableId);
|
|
globalSearcher->add_reader(reader, humanReadableId);
|
|
searchers[humanReadableId] = searcher;
|
|
} else if ( !indexPath.empty() ) {
|
|
try {
|
|
kiwix::Searcher* searcher = new kiwix::Searcher(indexPath, reader, humanReadableId);
|
|
searcher->setProtocolPrefix(rootLocation + "/");
|
|
searcher->setSearchProtocolPrefix(rootLocation + "/" + "search?");
|
|
searchers[humanReadableId] = searcher;
|
|
} catch (...) {
|
|
cerr << "Unable to open the search index '" << indexPath << "'."
|
|
<< endl;
|
|
searchers[humanReadableId] = nullptr;
|
|
}
|
|
} else {
|
|
searchers[humanReadableId] = nullptr;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Compute the Welcome HTML */
|
|
string welcomeBooksHtml
|
|
= ""
|
|
"<div class='book__list'>";
|
|
for (itr = booksIds.begin(); itr != booksIds.end(); ++itr) {
|
|
libraryManager.getBookById(*itr, currentBook);
|
|
|
|
if (!currentBook.path.empty()
|
|
&& readers.find(currentBook.getHumanReadableIdFromPath())
|
|
!= readers.end()) {
|
|
welcomeBooksHtml += ""
|
|
"<a href='" + rootLocation + "/" + currentBook.getHumanReadableIdFromPath() + "/'>"
|
|
"<div class='book'>"
|
|
"<div class='book__background' style='background-image: url(data:" + currentBook.faviconMimeType+ ";base64," + currentBook.favicon + ");'>"
|
|
"<div class='book__title' title='" + currentBook.title + "'>" + currentBook.title + "</div>"
|
|
"<div class='book__description' title='" + currentBook.description + "'>" + currentBook.description + "</div>"
|
|
"<div class='book__info'>"
|
|
"" + kiwix::beautifyInteger(atoi(currentBook.articleCount.c_str())) + " articles, " + kiwix::beautifyInteger(atoi(currentBook.mediaCount.c_str())) + " medias"
|
|
"</div>"
|
|
"</div>"
|
|
"</div>"
|
|
"</a>";
|
|
}
|
|
}
|
|
welcomeBooksHtml += ""
|
|
"</div>";
|
|
|
|
pthread_mutex_lock(®exLock);
|
|
welcomeHTML
|
|
= replaceRegex(RESOURCE::home_html_tmpl, welcomeBooksHtml, "__BOOKS__");
|
|
pthread_mutex_unlock(®exLock);
|
|
|
|
introduceTaskbar(welcomeHTML, "");
|
|
|
|
#ifndef _WIN32
|
|
/* Fork if necessary */
|
|
if (daemonFlag) {
|
|
pid_t pid;
|
|
|
|
/* Fork off the parent process */
|
|
pid = fork();
|
|
if (pid < 0) {
|
|
exit(1);
|
|
}
|
|
|
|
/* If we got a good PID, then
|
|
we can exit the parent process. */
|
|
if (pid > 0) {
|
|
exit(0);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* Mutex init */
|
|
pthread_mutex_init(&searchLock, NULL);
|
|
pthread_mutex_init(&compressorLock, NULL);
|
|
pthread_mutex_init(®exLock, NULL);
|
|
|
|
/* Hard coded mimetypes */
|
|
extMimeTypes["html"] = "text/html";
|
|
extMimeTypes["htm"] = "text/html";
|
|
extMimeTypes["png"] = "image/png";
|
|
extMimeTypes["tiff"] = "image/tiff";
|
|
extMimeTypes["tif"] = "image/tiff";
|
|
extMimeTypes["jpeg"] = "image/jpeg";
|
|
extMimeTypes["jpg"] = "image/jpeg";
|
|
extMimeTypes["gif"] = "image/gif";
|
|
extMimeTypes["svg"] = "image/svg+xml";
|
|
extMimeTypes["txt"] = "text/plain";
|
|
extMimeTypes["xml"] = "text/xml";
|
|
extMimeTypes["pdf"] = "application/pdf";
|
|
extMimeTypes["ogg"] = "application/ogg";
|
|
extMimeTypes["js"] = "application/javascript";
|
|
extMimeTypes["css"] = "text/css";
|
|
extMimeTypes["otf"] = "application/vnd.ms-opentype";
|
|
extMimeTypes["ttf"] = "application/font-ttf";
|
|
extMimeTypes["woff"] = "application/font-woff";
|
|
extMimeTypes["vtt"] = "text/vtt";
|
|
|
|
/* Start the HTTP daemon */
|
|
void* page = NULL;
|
|
if (interface.length() > 0) {
|
|
#ifndef _WIN32
|
|
|
|
/* TBD IPv6 support */
|
|
struct sockaddr_in sockAddr;
|
|
struct ifaddrs *ifaddr, *ifa;
|
|
int family, n;
|
|
|
|
/* Search all available interfaces */
|
|
if (getifaddrs(&ifaddr) == -1) {
|
|
cerr << "Getifaddrs() failed while searching for '" << interface << "'"
|
|
<< endl;
|
|
exit(1);
|
|
}
|
|
|
|
/* Init 'sockAddr' with zeros */
|
|
memset(&sockAddr, 0, sizeof(sockAddr));
|
|
|
|
/* Try to find interfaces in the list of available interfaces */
|
|
for (ifa = ifaddr, n = 0; ifa != NULL; ifa = ifa->ifa_next, n++) {
|
|
/* Ignore if no IP attributed to the interface */
|
|
if (ifa->ifa_addr == NULL)
|
|
continue;
|
|
|
|
/* Check if the interface is the right one */
|
|
family = ifa->ifa_addr->sa_family;
|
|
if (family == AF_INET) {
|
|
if (strcasecmp(ifa->ifa_name, interface.c_str()) == 0) {
|
|
sockAddr.sin_family = family;
|
|
sockAddr.sin_port = htons(serverPort);
|
|
sockAddr.sin_addr.s_addr
|
|
= ((struct sockaddr_in*)ifa->ifa_addr)->sin_addr.s_addr;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Free 'ifaddr' */
|
|
freeifaddrs(ifaddr);
|
|
|
|
/* Dies if interface was not found in the list */
|
|
if (sockAddr.sin_family == 0) {
|
|
cerr << "Unable to find interface '" << interface << "'" << endl;
|
|
exit(1);
|
|
}
|
|
|
|
daemon = MHD_start_daemon(MHD_USE_POLL_INTERNALLY,
|
|
serverPort,
|
|
NULL,
|
|
NULL,
|
|
&accessHandlerCallback,
|
|
page,
|
|
MHD_OPTION_SOCK_ADDR, &sockAddr,
|
|
MHD_OPTION_THREAD_POOL_SIZE, nb_threads,
|
|
MHD_OPTION_END);
|
|
#else
|
|
cerr << "Setting 'interface' not yet implemented for Windows" << endl;
|
|
exit(1);
|
|
#endif
|
|
|
|
} else {
|
|
daemon = MHD_start_daemon(MHD_USE_POLL_INTERNALLY,
|
|
serverPort,
|
|
NULL,
|
|
NULL,
|
|
&accessHandlerCallback,
|
|
page,
|
|
MHD_OPTION_THREAD_POOL_SIZE, nb_threads,
|
|
MHD_OPTION_END);
|
|
}
|
|
|
|
if (daemon == NULL) {
|
|
cerr << "Unable to instantiate the HTTP daemon. The port " << serverPort
|
|
<< " is maybe already occupied or need more permissions to be open. "
|
|
"Please try as root or with a port number higher or equal to 1024."
|
|
<< endl;
|
|
exit(1);
|
|
}
|
|
|
|
/* Run endless (until PPID dies) */
|
|
bool waiting = true;
|
|
do {
|
|
if (PPID > 0) {
|
|
#ifdef _WIN32
|
|
HANDLE process = OpenProcess(SYNCHRONIZE, FALSE, PPID);
|
|
DWORD ret = WaitForSingleObject(process, 0);
|
|
CloseHandle(process);
|
|
if (ret == WAIT_TIMEOUT) {
|
|
#elif __APPLE__
|
|
int mib[MIBSIZE];
|
|
struct kinfo_proc kp;
|
|
size_t len = sizeof(kp);
|
|
|
|
mib[0] = CTL_KERN;
|
|
mib[1] = KERN_PROC;
|
|
mib[2] = KERN_PROC_PID;
|
|
mib[3] = PPID;
|
|
|
|
int ret = sysctl(mib, MIBSIZE, &kp, &len, NULL, 0);
|
|
if (ret != -1 && len > 0) {
|
|
#else /* Linux & co */
|
|
string procPath = "/proc/" + string(PPIDString);
|
|
if (access(procPath.c_str(), F_OK) != -1) {
|
|
#endif
|
|
} else {
|
|
waiting = false;
|
|
}
|
|
}
|
|
|
|
kiwix::sleep(1000);
|
|
} while (waiting);
|
|
|
|
delete globalSearcher;
|
|
|
|
/* Stop the daemon */
|
|
MHD_stop_daemon(daemon);
|
|
|
|
/* Mutex destroy */
|
|
pthread_mutex_destroy(&searchLock);
|
|
pthread_mutex_destroy(&compressorLock);
|
|
pthread_mutex_destroy(®exLock);
|
|
exit(0);
|
|
}
|