removed old zimlib tree

pull/9/head
renaud gaudin 13 years ago
parent e027ec4e8d
commit 0b0d2fc4d0

@ -1,97 +0,0 @@
/*
* Copyright (C) 2006 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_ARTICLE_H
#define ZIM_ARTICLE_H
#include <string>
#include <zim/zim.h>
#include <zim/dirent.h>
#include <zim/file.h>
#include <limits>
#include <iosfwd>
namespace zim
{
class Article
{
private:
File file;
size_type idx;
public:
Article()
: idx(std::numeric_limits<size_type>::max())
{ }
Article(const File& file_, size_type idx_)
: file(file_),
idx(idx_)
{ }
Dirent getDirent() const { return const_cast<File&>(file).getDirent(idx); }
std::string getParameter() const { return getDirent().getParameter(); }
std::string getTitle() const { return getDirent().getTitle(); }
std::string getUrl() const { return getDirent().getUrl(); }
std::string getLongUrl() const { return getDirent().getLongUrl(); }
uint16_t getLibraryMimeType() const { return getDirent().getMimeType(); }
const std::string&
getMimeType() const { return file.getMimeType(getLibraryMimeType()); }
bool isRedirect() const { return getDirent().isRedirect(); }
char getNamespace() const { return getDirent().getNamespace(); }
size_type getRedirectIndex() const { return getDirent().getRedirectIndex(); }
Article getRedirectArticle() const { return Article(file, getRedirectIndex()); }
size_type getArticleSize() const;
bool operator< (const Article& a) const
{ return getNamespace() < a.getNamespace()
|| (getNamespace() == a.getNamespace()
&& getTitle() < a.getTitle()); }
Cluster getCluster() const
{ return file.getCluster(getDirent().getClusterNumber()); }
Blob getData() const
{
Dirent dirent = getDirent();
return dirent.isRedirect() ? Blob()
: const_cast<File&>(file).getBlob(dirent.getClusterNumber(), dirent.getBlobNumber());
}
std::string getPage(bool layout = true, unsigned maxRecurse = 10);
void getPage(std::ostream&, bool layout = true, unsigned maxRecurse = 10);
const File& getFile() const { return file; }
File& getFile() { return file; }
size_type getIndex() const { return idx; }
bool good() const { return idx != std::numeric_limits<size_type>::max(); }
};
}
#endif // ZIM_ARTICLE_H

@ -1,48 +0,0 @@
/*
* Copyright (C) 2007 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_ARTICLESEARCH_H
#define ZIM_ARTICLESEARCH_H
#include <vector>
#include <zim/file.h>
#include <zim/fileiterator.h>
#include <zim/article.h>
namespace zim
{
class ArticleSearch
{
public:
typedef std::vector<Article> Results;
private:
File articleFile;
std::string titles;
public:
explicit ArticleSearch(const File& articleFile_)
: articleFile(articleFile_)
{ }
Results search(const std::string& expr);
};
}
#endif // ZIM_ARTICLESEARCH_H

@ -1,64 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_BLOB_H
#define ZIM_BLOB_H
#include <iostream>
#include <zim/cluster.h>
namespace zim
{
class Blob
{
const char* _data;
unsigned _size;
SmartPtr<ClusterImpl> _cluster;
public:
Blob()
: _data(0), _size(0)
{ }
Blob(const char* data, unsigned size)
: _data(data),
_size(size)
{ }
Blob(ClusterImpl* cluster, const char* data, unsigned size)
: _data(data),
_size(size),
_cluster(cluster)
{ }
const char* data() const { return _data; }
const char* end() const { return _data + _size; }
unsigned size() const { return _size; }
};
inline std::ostream& operator<< (std::ostream& out, const Blob& blob)
{
if (blob.data())
out.write(blob.data(), blob.size());
return out;
}
}
#endif // ZIM_BLOB_H

@ -1,353 +0,0 @@
/*
* Copyright (C) 2008 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_CACHE_H
#define ZIM_CACHE_H
#include <map>
#include <limits>
#include <iostream>
#ifdef _WIN32
#define NOMINMAX
# include <windows.h>
#undef NOMINMAX
#undef max
#endif
namespace zim
{
/**
Implements a container for caching elements.
The cache holds a list of key-value-pairs. There are 2 main operations for
accessing the cache: put and get. Put takes a key and a value and puts the
element into the list. Get takes a key and optional a value. If the value
for the key is found, it is returned. The passed value otherwise. By
default the value is constructed with the empty ctor of the value-type.
The cache has a maximum size, after which key-value-pairs are dropped,
when a new item is put into the cache.
The algorithm for this cache is as follows:
- when the cache is not full, new elements are appended
- new elements are put into the middle of the list otherwise
- the last element of the list is then dropped
- when getting a value and the value is found, it is put to the
beginning of the list
When elements are searched, a linear search is done using the ==-operator
of the key type.
The caching algorithm keeps elements, which are fetched more than once in
the first half of the list. In the second half the elements are either new
or the elements are pushed from the first half to the second half by other
elements, which are found in the cache.
You should be aware, that the key type should be simple. Comparing keys
must be cheap. Copying elements (both key and value) must be possible and
should be cheap, since they are moved in the underlying container.
*/
template <typename Key, typename Value>
class Cache
{
struct Data
{
bool winner;
unsigned serial;
Value value;
Data() { }
Data(bool winner_, unsigned serial_, const Value& value_)
: winner(winner_),
serial(serial_),
value(value_)
{ }
};
typedef std::map<Key, Data> DataType;
DataType data;
typename DataType::size_type maxElements;
unsigned serial;
unsigned hits;
unsigned misses;
unsigned _nextSerial()
{
if (serial == std::numeric_limits<unsigned>::max())
{
for (typename DataType::iterator it = data.begin(); it != data.end(); ++it)
it->second.serial = 0;
serial = 1;
}
return serial++;
}
typename DataType::iterator _getOldest(bool winner)
{
typename DataType::iterator foundElement = data.begin();
typename DataType::iterator it = data.begin();
for (++it; it != data.end(); ++it)
if (it->second.winner == winner
&& (foundElement->second.winner != winner || it->second.serial < foundElement->second.serial))
foundElement = it;
return foundElement;
}
typename DataType::iterator _getNewest(bool winner)
{
typename DataType::iterator foundElement = data.begin();
typename DataType::iterator it = data.begin();
for (++it; it != data.end(); ++it)
if (it->second.winner == winner
&& (foundElement->second.winner != winner || it->second.serial > foundElement->second.serial))
foundElement = it;
return foundElement;
}
// drop one element
void _dropLooser()
{
// look for the oldest element in the list of loosers to drop it
data.erase(_getOldest(false));
}
void _makeLooser()
{
// look for the oldest element in the list of winners to make it a looser
typename DataType::iterator it = _getOldest(true);
it->second.winner = false;
it->second.serial = _nextSerial();
}
public:
typedef typename DataType::size_type size_type;
typedef Value value_type;
explicit Cache(size_type maxElements_)
: maxElements(maxElements_ + (maxElements_ & 1)),
serial(0),
hits(0),
misses(0)
{ }
/// returns the number of elements currently in the cache
size_type size() const { return data.size(); }
/// returns the maximum number of elements in the cache
size_type getMaxElements() const { return maxElements; }
void setMaxElements(size_type maxElements_)
{
size_type numWinners = size() < maxElements / 2 ? size() : maxElements / 2;
maxElements_ += (maxElements_ & 1);
if (maxElements_ > maxElements)
{
maxElements = maxElements_;
while (numWinners < maxElements / 2)
{
_getNewest(false)->winner = true;
++numWinners;
}
}
else
{
while (maxElements > maxElements_)
{
_dropLooser();
_dropLooser();
_makeLooser();
maxElements -= 2;
}
while (numWinners > maxElements / 2)
{
_getNewest(true)->winner = false;
--numWinners;
}
}
}
/// removes a element from the cache and returns true, if found
bool erase(const Key& key)
{
typename DataType::iterator it = data.find(key);
if (it == data.end())
return false;
if (it->second.winner)
_getNewest(false)->winner=true;
data.erase(it);
return true;
}
/// clears the cache.
void clear(bool stats = false)
{
data.clear();
if (stats)
hits = misses = 0;
}
/// puts a new element in the cache. If the element is already found in
/// the cache, it is considered a cache hit and pushed to the top of the
/// list.
void put(const Key& key, const Value& value)
{
typename DataType::iterator it;
if (data.size() < maxElements)
{
data.insert(data.begin(),
typename DataType::value_type(key,
Data(data.size() < maxElements / 2, _nextSerial(), value)));
}
else if ((it = data.find(key)) == data.end())
{
// element not found
_dropLooser();
data.insert(data.begin(),
typename DataType::value_type(key,
Data(false, _nextSerial(), value)));
}
else
{
// element found
it->second.serial = _nextSerial();
if (!it->second.winner)
{
// move element to the winner part
it->second.winner = true;
_makeLooser();
}
}
}
/// puts a new element on the top of the cache. If the element is already
/// found in the cache, it is considered a cache hit and pushed to the
/// top of the list. This method actually overrides the need, that a element
/// needs a hit to get to the top of the cache.
void put_top(const Key& key, const Value& value)
{
typename DataType::iterator it;
if (data.size() < maxElements)
{
if (data.size() >= maxElements / 2)
_makeLooser();
data.insert(data.begin(),
typename DataType::value_type(key,
Data(true, _nextSerial(), value)));
}
else if ((it = data.find(key)) == data.end())
{
// element not found
_dropLooser();
_makeLooser();
data.insert(data.begin(),
typename DataType::value_type(key,
Data(true, _nextSerial(), value)));
}
else
{
// element found
it->second.serial = _nextSerial();
if (!it->second.winner)
{
// move element to the winner part
it->second.winner = true;
_makeLooser();
}
}
}
Value* getptr(const Key& key)
{
typename DataType::iterator it = data.find(key);
if (it == data.end())
return 0;
it->second.serial = _nextSerial();
if (!it->second.winner)
{
// move element to the winner part
it->second.winner = true;
_makeLooser();
}
return &it->second.value;
}
/// returns a pair of values - a flag, if the value was found and the
/// value if found or the passed default otherwise. If the value is
/// found it is a cahce hit and pushed to the top of the list.
std::pair<bool, Value> getx(const Key& key, Value def = Value())
{
Value* v = getptr(key);
return v ? std::pair<bool, Value>(true, *v)
: std::pair<bool, Value>(false, def);
}
/// returns the value to a key or the passed default value if not found.
/// If the value is found it is a cahce hit and pushed to the top of the
/// list.
Value get(const Key& key, Value def = Value())
{
return getx(key, def).second;
}
/// returns the number of hits.
unsigned getHits() const { return hits; }
/// returns the number of misses.
unsigned getMisses() const { return misses; }
/// returns the cache hit ratio between 0 and 1.
double hitRatio() const { return hits+misses > 0 ? static_cast<double>(hits)/static_cast<double>(hits+misses) : 0; }
/// returns the ratio, between held elements and maximum elements.
double fillfactor() const { return static_cast<double>(data.size()) / static_cast<double>(maxElements); }
/*
void dump(std::ostream& out) const
{
out << "cache max size=" << maxElements << " current size=" << size() << '\n';
for (typename DataType::const_iterator it = data.begin(); it != data.end(); ++it)
{
out << "\tkey=\"" << it->first << "\" value=\"" << it->second.value << "\" serial=" << it->second.serial << " winner=" << it->second.winner << '\n';
}
out << "--------\n";
}
*/
};
}
#endif // ZIM_CACHE_H

@ -1,107 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_CLUSTER_H
#define ZIM_CLUSTER_H
#include <zim/zim.h>
#include <zim/refcounted.h>
#include <zim/smartptr.h>
#include <iosfwd>
#include <vector>
namespace zim
{
class Blob;
class Cluster;
class ClusterImpl : public RefCounted
{
friend std::istream& operator>> (std::istream& in, ClusterImpl& blobImpl);
friend std::ostream& operator<< (std::ostream& out, const ClusterImpl& blobImpl);
typedef std::vector<size_type> Offsets;
typedef std::vector<char> Data;
CompressionType compression;
Offsets offsets;
Data data;
void read(std::istream& in);
void write(std::ostream& out) const;
public:
ClusterImpl();
void setCompression(CompressionType c) { compression = c; }
CompressionType getCompression() const { return compression; }
bool isCompressed() const { return compression == zimcompZip || compression == zimcompBzip2 || compression == zimcompLzma; }
size_type getCount() const { return offsets.size() - 1; }
const char* getData(unsigned n) const { return &data[ offsets[n] ]; }
size_type getSize(unsigned n) const { return offsets[n+1] - offsets[n]; }
size_type getSize() const { return offsets.size() * sizeof(size_type) + data.size(); }
Blob getBlob(size_type n) const;
void clear();
void addBlob(const Blob& blob);
void addBlob(const char* data, unsigned size);
};
class Cluster
{
friend std::istream& operator>> (std::istream& in, Cluster& blob);
friend std::ostream& operator<< (std::ostream& out, const Cluster& blob);
SmartPtr<ClusterImpl> impl;
ClusterImpl* getImpl();
public:
Cluster();
void setCompression(CompressionType c) { getImpl()->setCompression(c); }
CompressionType getCompression() const { return impl ? impl->getCompression() : zimcompNone; }
bool isCompressed() const
{ return impl && (impl->getCompression() == zimcompZip
|| impl->getCompression() == zimcompBzip2
|| impl->getCompression() == zimcompLzma); }
const char* getBlobPtr(size_type n) const { return impl->getData(n); }
size_type getBlobSize(size_type n) const { return impl->getSize(n); }
Blob getBlob(size_type n) const;
size_type count() const { return impl ? impl->getCount() : 0; }
size_type size() const { return impl ? impl->getSize() : 0; }
void clear() { impl = 0; }
void addBlob(const char* data, unsigned size) { getImpl()->addBlob(data, size); }
void addBlob(const Blob& blob) { getImpl()->addBlob(blob); }
operator bool() const { return impl; }
};
std::istream& operator>> (std::istream& in, ClusterImpl& blobImpl);
std::istream& operator>> (std::istream& in, Cluster& blob);
std::ostream& operator<< (std::ostream& out, const ClusterImpl& blobImpl);
std::ostream& operator<< (std::ostream& out, const Cluster& blob);
}
#endif // ZIM_CLUSTER_H

@ -1,125 +0,0 @@
/*
* Copyright (C) 2006 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_DIRENT_H
#define ZIM_DIRENT_H
#include <string>
#include <zim/zim.h>
#include <limits>
namespace zim
{
class Dirent
{
bool redirect;
uint16_t mimeType;
size_type version;
size_type clusterNumber; // only used when redirect is false
size_type blobNumber; // only used when redirect is false
size_type redirectIndex; // only used when redirect is true
char ns;
std::string title;
std::string url;
std::string parameter;
public:
Dirent()
: redirect(false),
mimeType(0),
version(0),
clusterNumber(0),
blobNumber(0),
redirectIndex(0),
ns('\0')
{}
bool isRedirect() const { return redirect; }
uint16_t getMimeType() const { return mimeType; }
size_type getVersion() const { return version; }
void setVersion(size_type v) { version = v; }
size_type getClusterNumber() const { return isRedirect() ? 0 : clusterNumber; }
size_type getBlobNumber() const { return isRedirect() ? 0 : blobNumber; }
void setCluster(size_type clusterNumber_, size_type blobNumber_)
{ clusterNumber = clusterNumber_; blobNumber = blobNumber_; }
size_type getRedirectIndex() const { return isRedirect() ? redirectIndex : 0; }
char getNamespace() const { return ns; }
const std::string& getTitle() const { return title.empty() ? url : title; }
const std::string& getUrl() const { return url; }
std::string getLongUrl() const;
const std::string& getParameter() const { return parameter; }
unsigned getDirentSize() const
{
unsigned ret = (isRedirect() ? 12 : 16) + url.size() + parameter.size() + 2;
if (title != url)
ret += title.size();
return ret;
}
void setTitle(const std::string& title_)
{
title = title_;
}
void setUrl(char ns_, const std::string& url_)
{
ns = ns_;
url = url_;
}
void setParameter(const std::string& parameter_)
{
parameter = parameter_;
}
void setRedirect(size_type idx)
{
redirect = true;
redirectIndex = idx;
mimeType = std::numeric_limits<uint16_t>::max();
clusterNumber = 0;
blobNumber = 0;
}
void setArticle(uint16_t mimeType_, size_type clusterNumber_, size_type blobNumber_)
{
redirect = false;
mimeType = mimeType_;
clusterNumber = clusterNumber_;
blobNumber = blobNumber_;
}
};
std::ostream& operator<< (std::ostream& out, const Dirent& fh);
std::istream& operator>> (std::istream& in, Dirent& fh);
}
#endif // ZIM_DIRENT_H

@ -1,123 +0,0 @@
/*
* Copyright (C) 2006 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ENDIAN_H
#define ENDIAN_H
#include <algorithm>
#include <iostream>
#include <zim/zim.h>
namespace zim
{
/// Returns true, if machine is big-endian (high byte first).
/// e.g. PowerPC
inline bool isBigEndian()
{
const int i = 1;
return *reinterpret_cast<const int8_t*>(&i) == 0;
}
/// Returns true, if machine is little-endian (low byte first).
/// e.g. x86
inline bool isLittleEndian()
{
const int i = 1;
return *reinterpret_cast<const int8_t*>(&i) == 1;
}
////////////////////////////////////////////////////////////////////////
template <typename T>
void toLittleEndian(const T& d, char* dst, bool bigEndian = isBigEndian())
{
if (bigEndian)
{
std::reverse_copy(
reinterpret_cast<const char*>(&d),
reinterpret_cast<const char*>(&d) + sizeof(T),
dst);
}
else
{
std::copy(
reinterpret_cast<const char*>(&d),
reinterpret_cast<const char*>(&d) + sizeof(T),
dst);
}
}
template <typename T>
T fromLittleEndian(const T* ptr, bool bigEndian = isBigEndian())
{
if (bigEndian)
{
T ret;
std::reverse_copy(reinterpret_cast<const int8_t*>(ptr),
reinterpret_cast<const int8_t*>(ptr) + sizeof(T),
reinterpret_cast<int8_t*>(&ret));
return ret;
}
else
{
return *ptr;
}
}
////////////////////////////////////////////////////////////////////////
template <typename T>
void toBigEndian(const T& d, char* dst, bool bigEndian = isBigEndian())
{
if (bigEndian)
{
std::copy(
reinterpret_cast<const char*>(&d),
reinterpret_cast<const char*>(&d) + sizeof(T),
dst);
}
else
{
std::reverse_copy(
reinterpret_cast<const char*>(&d),
reinterpret_cast<const char*>(&d) + sizeof(T),
dst);
}
}
template <typename T>
T fromBigEndian(const T* ptr, bool bigEndian = isBigEndian())
{
if (bigEndian)
{
return *ptr;
}
else
{
T ret;
std::reverse_copy(reinterpret_cast<const int8_t*>(ptr),
reinterpret_cast<const int8_t*>(ptr) + sizeof(T),
reinterpret_cast<int8_t*>(&ret));
return ret;
}
}
}
#endif // ENDIAN_H

@ -1,38 +0,0 @@
/*
* Copyright (C) 2006 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_ERROR_H
#define ZIM_ERROR_H
#include <stdexcept>
namespace zim
{
class ZimFileFormatError : public std::runtime_error
{
public:
explicit ZimFileFormatError(const std::string& msg)
: std::runtime_error(msg)
{ }
};
}
#endif // ZIM_ERROR_H

@ -1,103 +0,0 @@
/*
* Copyright (C) 2006,2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_FILE_H
#define ZIM_FILE_H
#include <string>
#include <iterator>
#include <zim/zim.h>
#include <zim/fileimpl.h>
#include <zim/blob.h>
#include <zim/smartptr.h>
namespace zim
{
class Article;
class File
{
SmartPtr<FileImpl> impl;
public:
File()
{ }
explicit File(const std::string& fname)
: impl(new FileImpl(fname.c_str()))
{ }
const std::string& getFilename() const { return impl->getFilename(); }
const Fileheader& getFileheader() const { return impl->getFileheader(); }
offset_type getFilesize() const { return impl->getFilesize(); }
Dirent getDirent(size_type idx) { return impl->getDirent(idx); }
Dirent getDirentByTitle(size_type idx) { return impl->getDirentByTitle(idx); }
size_type getCountArticles() const { return impl->getCountArticles(); }
Article getArticle(size_type idx) const;
Article getArticle(char ns, const std::string& url);
Article getArticleByUrl(const std::string& url);
Article getArticleByTitle(size_type idx);
Article getArticleByTitle(char ns, const std::string& title);
Cluster getCluster(size_type idx) const { return impl->getCluster(idx); }
size_type getCountClusters() const { return impl->getCountClusters(); }
offset_type getClusterOffset(size_type idx) const { return impl->getClusterOffset(idx); }
Blob getBlob(size_type clusterIdx, size_type blobIdx)
{ return getCluster(clusterIdx).getBlob(blobIdx); }
size_type getNamespaceBeginOffset(char ch)
{ return impl->getNamespaceBeginOffset(ch); }
size_type getNamespaceEndOffset(char ch)
{ return impl->getNamespaceEndOffset(ch); }
size_type getNamespaceCount(char ns)
{ return getNamespaceEndOffset(ns) - getNamespaceBeginOffset(ns); }
std::string getNamespaces()
{ return impl->getNamespaces(); }
bool hasNamespace(char ch);
class const_iterator;
const_iterator begin();
const_iterator beginByTitle();
const_iterator end();
std::pair<bool, const_iterator> findx(char ns, const std::string& url);
std::pair<bool, const_iterator> findx(const std::string& url);
std::pair<bool, const_iterator> findxByTitle(char ns, const std::string& title);
const_iterator findByTitle(char ns, const std::string& title);
const_iterator find(char ns, const std::string& url);
const_iterator find(const std::string& url);
bool good() const { return impl.getPointer() != 0; }
time_t getMTime() const { return impl->getMTime(); }
const std::string& getMimeType(uint16_t idx) const { return impl->getMimeType(idx); }
std::string getChecksum() { return impl->getChecksum(); }
bool verify() { return impl->verify(); }
};
std::string urldecode(const std::string& url);
}
#endif // ZIM_FILE_H

@ -1,108 +0,0 @@
/*
* Copyright (C) 2008 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_FILEHEADER_H
#define ZIM_FILEHEADER_H
#include <zim/zim.h>
#include <zim/endian.h>
#include <zim/uuid.h>
#include <iosfwd>
#include <limits>
#ifdef _WIN32
#define NOMINMAX
# include <windows.h>
#undef NOMINMAX
#undef max
#endif
namespace zim
{
class Fileheader
{
public:
static const size_type zimMagic;
static const size_type zimVersion;
static const size_type size;
private:
Uuid uuid;
size_type articleCount;
offset_type titleIdxPos;
offset_type urlPtrPos;
offset_type mimeListPos;
size_type blobCount;
offset_type blobPtrPos;
size_type mainPage;
size_type layoutPage;
offset_type checksumPos;
public:
Fileheader()
: articleCount(0),
titleIdxPos(0),
urlPtrPos(0),
blobCount(0),
blobPtrPos(0),
mainPage(std::numeric_limits<size_type>::max()),
layoutPage(std::numeric_limits<size_type>::max()),
checksumPos(std::numeric_limits<offset_type>::max())
{}
const Uuid& getUuid() const { return uuid; }
void setUuid(const Uuid& uuid_) { uuid = uuid_; }
size_type getArticleCount() const { return articleCount; }
void setArticleCount(size_type s) { articleCount = s; }
offset_type getTitleIdxPos() const { return titleIdxPos; }
void setTitleIdxPos(offset_type p) { titleIdxPos = p; }
offset_type getUrlPtrPos() const { return urlPtrPos; }
void setUrlPtrPos(offset_type p) { urlPtrPos = p; }
offset_type getMimeListPos() const { return mimeListPos; }
void setMimeListPos(offset_type p) { mimeListPos = p; }
size_type getClusterCount() const { return blobCount; }
void setClusterCount(size_type s) { blobCount = s; }
offset_type getClusterPtrPos() const { return blobPtrPos; }
void setClusterPtrPos(offset_type p) { blobPtrPos = p; }
bool hasMainPage() const { return mainPage != std::numeric_limits<size_type>::max(); }
size_type getMainPage() const { return mainPage; }
void setMainPage(size_type s) { mainPage = s; }
bool hasLayoutPage() const { return layoutPage != std::numeric_limits<size_type>::max(); }
size_type getLayoutPage() const { return layoutPage; }
void setLayoutPage(size_type s) { layoutPage = s; }
bool hasChecksum() const { return getMimeListPos() >= 80; }
offset_type getChecksumPos() const { return hasChecksum() ? checksumPos : 0; }
void setChecksumPos(offset_type p) { checksumPos = p; }
};
std::ostream& operator<< (std::ostream& out, const Fileheader& fh);
std::istream& operator>> (std::istream& in, Fileheader& fh);
}
#endif // ZIM_FILEHEADER_H

@ -1,90 +0,0 @@
/*
* Copyright (C) 2006 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_FILEIMPL_H
#define ZIM_FILEIMPL_H
#include <string>
#include <vector>
#include <map>
#include <zim/fstream.h>
#include <zim/refcounted.h>
#include <zim/zim.h>
#include <zim/fileheader.h>
#include <zim/cache.h>
#include <zim/dirent.h>
#include <zim/cluster.h>
namespace zim
{
class FileImpl : public RefCounted
{
ifstream zimFile;
Fileheader header;
std::string filename;
Cache<size_type, Dirent> direntCache;
Cache<offset_type, Cluster> clusterCache;
typedef std::map<char, size_type> NamespaceCache;
NamespaceCache namespaceBeginCache;
NamespaceCache namespaceEndCache;
std::string namespaces;
typedef std::vector<std::string> MimeTypes;
MimeTypes mimeTypes;
offset_type getOffset(offset_type ptrOffset, size_type idx);
public:
explicit FileImpl(const char* fname);
time_t getMTime() const { return zimFile.getMTime(); }
const std::string& getFilename() const { return filename; }
const Fileheader& getFileheader() const { return header; }
offset_type getFilesize() const { return zimFile.fsize(); }
Dirent getDirent(size_type idx);
Dirent getDirentByTitle(size_type idx);
size_type getIndexByTitle(size_type idx);
size_type getCountArticles() const { return header.getArticleCount(); }
Cluster getCluster(size_type idx);
size_type getCountClusters() const { return header.getClusterCount(); }
offset_type getClusterOffset(size_type idx) { return getOffset(header.getClusterPtrPos(), idx); }
size_type getNamespaceBeginOffset(char ch);
size_type getNamespaceEndOffset(char ch);
size_type getNamespaceCount(char ns)
{ return getNamespaceEndOffset(ns) - getNamespaceBeginOffset(ns); }
std::string getNamespaces();
bool hasNamespace(char ch);
const std::string& getMimeType(uint16_t idx) const;
std::string getChecksum();
bool verify();
};
}
#endif // ZIM_FILEIMPL_H

@ -1,107 +0,0 @@
/*
* Copyright (C) 2006 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_FILEITERATOR_H
#define ZIM_FILEITERATOR_H
#include <iterator>
#include <zim/article.h>
namespace zim
{
class File::const_iterator : public std::iterator<std::bidirectional_iterator_tag, Article>
{
public:
enum Mode {
UrlIterator,
ArticleIterator
};
private:
File* file;
size_type idx;
mutable Article article;
Mode mode;
bool is_end() const { return file == 0 || idx >= file->getCountArticles(); }
public:
explicit const_iterator(File* file_ = 0, size_type idx_ = 0, Mode mode_ = UrlIterator)
: file(file_),
idx(idx_),
mode(mode_)
{ }
size_type getIndex() const { return idx; }
const File& getFile() const { return *file; }
bool operator== (const const_iterator& it) const
{ return (is_end() && it.is_end())
|| (file == it.file && idx == it.idx); }
bool operator!= (const const_iterator& it) const
{ return !operator==(it); }
const_iterator& operator++()
{
++idx;
article = Article();
return *this;
}
const_iterator operator++(int)
{
const_iterator it = *this;
operator++();
return it;
}
const_iterator& operator--()
{
--idx;
article = Article();
return *this;
}
const_iterator operator--(int)
{
const_iterator it = *this;
operator--();
return it;
}
const Article& operator*() const
{
if (!article.good())
article = mode == UrlIterator ? file->getArticle(idx)
: file->getArticleByTitle(idx);
return article;
}
pointer operator->() const
{
operator*();
return &article;
}
};
}
#endif // ZIM_FILEITERATOR_H

@ -1,103 +0,0 @@
/*
* Copyright (C) 2010 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_FSTREAM_H
#define ZIM_FSTREAM_H
#include <iostream>
#include <vector>
#include <zim/zim.h>
#include <zim/smartptr.h>
#include <zim/cache.h>
#include <zim/refcounted.h>
namespace zim
{
class streambuf : public std::streambuf
{
struct FileInfo : public RefCounted
{
std::string fname;
zim::offset_type fsize;
FileInfo() { }
FileInfo(const std::string& fname_, int fd);
};
struct OpenfileInfo : public RefCounted
{
std::string fname;
int fd;
explicit OpenfileInfo(const std::string& fname);
~OpenfileInfo();
};
typedef SmartPtr<FileInfo> FileInfoPtr;
typedef std::vector<FileInfoPtr> FilesType;
typedef SmartPtr<OpenfileInfo> OpenfileInfoPtr;
typedef Cache<std::string, OpenfileInfoPtr> OpenFilesCacheType;
std::vector<char> buffer;
FilesType files;
OpenFilesCacheType openFilesCache;
OpenfileInfoPtr currentFile;
zim::offset_type currentPos;
std::streambuf::int_type overflow(std::streambuf::int_type ch);
std::streambuf::int_type underflow();
int sync();
void setCurrentFile(const std::string& fname, zim::offset_type off);
mutable time_t mtime;
public:
streambuf(const std::string& fname, unsigned bufsize, unsigned openFilesCache);
void seekg(zim::offset_type off);
void setBufsize(unsigned s)
{ buffer.resize(s); }
zim::offset_type fsize() const;
time_t getMTime() const;
};
class ifstream : public std::istream
{
streambuf myStreambuf;
public:
explicit ifstream(const std::string& fname, unsigned bufsize = 8192, unsigned openFilesCache = 5)
: std::istream(0),
myStreambuf(fname, bufsize, openFilesCache)
{
init(&myStreambuf);
}
void seekg(zim::offset_type off) { myStreambuf.seekg(off); }
void setBufsize(unsigned s) { myStreambuf.setBufsize(s); }
zim::offset_type fsize() const { return myStreambuf.fsize(); }
time_t getMTime() const { return myStreambuf.getMTime(); }
};
}
#endif // ZIM_FSTREAM_H

@ -1,72 +0,0 @@
/*
* Copyright (C) 2007 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_INDEXARTICLE_H
#define ZIM_INDEXARTICLE_H
#include <zim/article.h>
#include <vector>
namespace zim
{
class IndexArticle : public Article
{
public:
struct Entry
{
unsigned index;
unsigned pos;
};
typedef std::vector<Entry> EntriesType;
private:
EntriesType entries[4];
bool categoriesRead;
void readEntries();
void readEntriesZ(); // directmedia style zint-compression
void readEntriesB(); // article compressed style
static bool noOffset;
public:
IndexArticle(const Article& article)
: Article(article),
categoriesRead(false)
{ }
unsigned getCategoryCount(unsigned cat)
{ readEntries(); return entries[cat].size(); }
const EntriesType& getCategory(unsigned cat)
{ readEntries(); return entries[cat]; }
unsigned getTotalCount()
{
readEntries();
unsigned c = 0;
for (unsigned cat = 0; cat < 4; ++cat)
c += entries[cat].size();
return c;
}
static void setNoOffset(bool sw = true) { noOffset = sw; }
static bool getNoOffset() { return noOffset; }
};
}
#endif // ZIM_INDEXARTICLE_H

@ -1,94 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_LZMASTREAM_H
#define ZIM_LZMASTREAM_H
#include <iostream>
#include <stdexcept>
#include <lzma.h>
#include <vector>
namespace zim
{
class LzmaError : public std::runtime_error
{
lzma_ret ret;
public:
LzmaError(lzma_ret ret_, const std::string& msg)
: std::runtime_error(msg),
ret(ret_)
{ }
lzma_ret getRetcode() const { return ret; }
};
class LzmaStreamBuf : public std::streambuf
{
lzma_stream stream;
std::vector<char_type> obuffer;
std::streambuf* sink;
public:
LzmaStreamBuf(std::streambuf* sink_,
uint32_t preset = 3 | LZMA_PRESET_EXTREME,
lzma_check check = LZMA_CHECK_CRC32 /* LZMA_CHECK_NONE */,
unsigned bufsize = 8192);
~LzmaStreamBuf();
/// see std::streambuf
int_type overflow(int_type c);
/// see std::streambuf
int_type underflow();
/// see std::streambuf
int sync();
/// end stream
int end();
void setSink(std::streambuf* sink_) { sink = sink_; }
};
class LzmaStream : public std::ostream
{
LzmaStreamBuf streambuf;
public:
explicit LzmaStream(std::streambuf* sink,
uint32_t preset = 3 | LZMA_PRESET_EXTREME,
lzma_check check = LZMA_CHECK_CRC32 /* LZMA_CHECK_NONE */,
unsigned bufsize = 8192)
: std::ostream(0),
streambuf(sink, preset, check, bufsize)
{ init(&streambuf); }
explicit LzmaStream(std::ostream& sink,
uint32_t preset = 3 | LZMA_PRESET_EXTREME,
lzma_check check = LZMA_CHECK_CRC32 /* LZMA_CHECK_NONE */,
unsigned bufsize = 8192)
: std::ostream(0),
streambuf(sink.rdbuf(), preset, check, bufsize)
{ init(&streambuf); }
void end();
void setSink(std::streambuf* sink) { streambuf.setSink(sink); }
void setSink(std::ostream& sink) { streambuf.setSink(sink.rdbuf()); }
};
}
#endif // ZIM_LZMASTREAM_H

@ -1,45 +0,0 @@
/*
* Copyright (C) 2006 Tommi Maekitalo
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* As a special exception, you may use this file as part of a free
* software library without restriction. Specifically, if other files
* instantiate templates or use macros or inline functions from this
* file, or you compile this file and link it with other files to
* produce an executable, this file does not by itself cause the
* resulting executable to be covered by the GNU General Public
* License. This exception does not however invalidate any other
* reasons why the executable file might be covered by the GNU Library
* General Public License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef ZIM_NONCOPYABLE_H
#define ZIM_NONCOPYABLE_H
namespace zim
{
class NonCopyable
{
private:
NonCopyable(const NonCopyable&); // no implementation
NonCopyable& operator=(const NonCopyable&); // no implementation
public:
NonCopyable() { }
};
}
#endif // ZIM_NONCOPYABLE_H

@ -1,59 +0,0 @@
/*
* Copyright (C) 2005 Tommi Maekitalo
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* As a special exception, you may use this file as part of a free
* software library without restriction. Specifically, if other files
* instantiate templates or use macros or inline functions from this
* file, or you compile this file and link it with other files to
* produce an executable, this file does not by itself cause the
* resulting executable to be covered by the GNU General Public
* License. This exception does not however invalidate any other
* reasons why the executable file might be covered by the GNU Library
* General Public License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef ZIM_REFCOUNTED_H
#define ZIM_REFCOUNTED_H
#include <zim/noncopyable.h>
namespace zim
{
class RefCounted : private NonCopyable
{
unsigned rc;
public:
RefCounted()
: rc(0)
{ }
explicit RefCounted(unsigned refs_)
: rc(refs_)
{ }
virtual ~RefCounted() { }
virtual unsigned addRef() { return ++rc; }
virtual void release() { if (--rc == 0) delete this; }
unsigned refs() const { return rc; }
};
}
#endif // ZIM_REFCOUNTED_H

@ -1,122 +0,0 @@
/*
* Copyright (C) 2007 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_SEARCH_H
#define ZIM_SEARCH_H
#include <zim/article.h>
#include <vector>
#include <map>
namespace zim
{
class SearchResult
{
Article article;
mutable double priority;
struct WordAttr
{
unsigned count;
unsigned addweight;
WordAttr() : count(0), addweight(1) { }
};
typedef std::map<std::string, WordAttr> WordListType; // map word => count and addweight
typedef std::map<size_type, std::string> PosListType; // map position => word
WordListType wordList;
PosListType posList;
public:
SearchResult() : priority(0) { }
explicit SearchResult(const Article& article_, unsigned priority_ = 0)
: article(article_),
priority(priority_)
{ }
const Article& getArticle() const { return article; }
double getPriority() const;
void foundWord(const std::string& word, size_type pos, unsigned addweight);
unsigned getCountWords() const { return wordList.size(); }
unsigned getCountPositions() const { return posList.size(); }
};
class Search
{
public:
class Results : public std::vector<SearchResult>
{
std::string expr;
public:
void setExpression(const std::string& e)
{ expr = e; }
const std::string& getExpression() const
{ return expr; }
};
private:
static double weightOcc;
static double weightOccOff;
static double weightPlus;
static double weightDist;
static double weightPos;
static double weightPosRel;
static double weightDistinctWords;
static unsigned searchLimit;
File indexfile;
File articlefile;
public:
Search()
{ }
explicit Search(const File& zimfile)
: indexfile(zimfile),
articlefile(zimfile)
{ }
Search(const File& articlefile_, const File& indexfile_)
: indexfile(indexfile_),
articlefile(articlefile_)
{ }
void search(Results& results, const std::string& expr);
void find(Results& results, char ns, const std::string& praefix, unsigned limit = searchLimit);
void find(Results& results, char ns, const std::string& begin, const std::string& end, unsigned limit = searchLimit);
static double getWeightOcc() { return weightOcc; }
static double getWeightOccOff() { return weightOccOff; }
static double getWeightPlus() { return weightPlus; }
static double getWeightDist() { return weightDist; }
static double getWeightPos() { return weightPos; }
static double getWeightPosRel() { return weightPosRel; }
static double getWeightDistinctWords() { return weightDistinctWords; }
static unsigned getSearchLimit() { return searchLimit; }
static void setWeightOcc(double v) { weightOcc = v; }
static void setWeightOccOff(double v) { weightOccOff = v; }
static void setWeightPlus(double v) { weightPlus = v; }
static void setWeightDist(double v) { weightDist = v; }
static void setWeightPos(double v) { weightPos = v; }
static void setWeightPosRel(double v) { weightPosRel = v; }
static void setWeightDistinctWords(double v) { weightDistinctWords = v; }
static void setSearchLimit(unsigned v) { searchLimit = v; }
};
}
#endif // ZIM_SEARCH_H

@ -1,87 +0,0 @@
/*
* Copyright (C) 2005 Tommi Maekitalo
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* As a special exception, you may use this file as part of a free
* software library without restriction. Specifically, if other files
* instantiate templates or use macros or inline functions from this
* file, or you compile this file and link it with other files to
* produce an executable, this file does not by itself cause the
* resulting executable to be covered by the GNU General Public
* License. This exception does not however invalidate any other
* reasons why the executable file might be covered by the GNU Library
* General Public License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef ZIM_SMARTPTR_H
#define ZIM_SMARTPTR_H
namespace zim
{
template <typename objectType>
class SmartPtr
{
objectType* object;
public:
SmartPtr()
: object(0)
{}
SmartPtr(objectType* ptr)
: object(ptr)
{ if (object) object->addRef(); }
SmartPtr(const SmartPtr& ptr)
: object(ptr.object)
{ if (object) object->addRef(); }
~SmartPtr()
{ if (object) object->release(); }
SmartPtr& operator= (const SmartPtr& ptr)
{
if (object != ptr.object)
{
if (object)
object->release();
object = ptr.object;
if (object)
object->addRef();
}
return *this;
}
/// The object can be dereferenced like the held object
objectType* operator->() const { return object; }
/// The object can be dereferenced like the held object
objectType& operator*() const { return *object; }
bool operator== (const objectType* p) const { return object == p; }
bool operator!= (const objectType* p) const { return object != p; }
bool operator< (const objectType* p) const { return object < p; }
bool operator! () const { return object == 0; }
operator bool () const { return object != 0; }
objectType* getPointer() { return object; }
const objectType* getPointer() const { return object; }
operator objectType* () { return object; }
operator const objectType* () const { return object; }
};
}
#endif // ZIM_SMARTPTR_H

@ -1,83 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_TEMPLATE_H
#define ZIM_TEMPLATE_H
#include <string>
namespace zim
{
class TemplateParser
{
public:
class Event
{
public:
virtual void onData(const std::string& data) = 0;
virtual void onToken(const std::string& token) = 0;
virtual void onLink(char ns, const std::string& url) = 0;
};
private:
Event* event;
std::string data;
std::string::size_type save;
std::string::size_type token;
std::string::size_type token_e;
char ns;
typedef void (TemplateParser::*state_type)(char);
state_type state;
void state_data(char ch);
void state_lt(char ch);
void state_token0(char ch);
void state_token(char ch);
void state_token_end(char ch);
void state_link0(char ch);
void state_link(char ch);
void state_title(char ch);
void state_title_end(char ch);
public:
explicit TemplateParser(Event* ev)
: event(ev),
state(&TemplateParser::state_data)
{ }
void parse(char ch)
{
(this->*state)(ch);
}
void parse(const std::string& s)
{
for (std::string::const_iterator ch = s.begin(); ch != s.end(); ++ch)
parse(*ch);
}
void flush();
};
}
#endif // ZIM_TEMPLATE_H

@ -1,95 +0,0 @@
/*
* Copyright (C) 2008 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <locale>
#include <zim/zim.h>
namespace zim
{
uint32_t tolower(uint32_t ucs);
uint32_t toupper(uint32_t ucs);
std::ctype_base::mask ctypeMask(uint32_t ch);
inline bool isalpha(uint32_t ch)
{
return ctypeMask(ch) & std::ctype_base::alpha;
}
inline bool isalnum(uint32_t ch)
{
return ctypeMask(ch) & std::ctype_base::alnum;
}
inline bool ispunct(uint32_t ch)
{
return ctypeMask(ch) & std::ctype_base::punct;
}
inline bool iscntrl(uint32_t ch)
{
return ctypeMask(ch) & std::ctype_base::cntrl;
}
inline bool isdigit(uint32_t ch)
{
return ctypeMask(ch) & std::ctype_base::digit;
}
inline bool isxdigit(uint32_t ch)
{
return ctypeMask(ch) & std::ctype_base::xdigit;
}
inline bool isgraph(uint32_t ch)
{
return ctypeMask(ch) & std::ctype_base::graph;
}
inline bool islower(uint32_t ch)
{
return ctypeMask(ch) & std::ctype_base::lower;
}
inline bool isupper(uint32_t ch)
{
return ctypeMask(ch) & std::ctype_base::upper;
}
inline bool isprint(uint32_t ch)
{
return ctypeMask(ch) & std::ctype_base::print;
}
inline bool isspace(uint32_t ch)
{
return ctypeMask(ch) & std::ctype_base::space;
}
}

@ -1,91 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_UNLZMASTREAM_H
#define ZIM_UNLZMASTREAM_H
#include <iostream>
#include <stdexcept>
#include <lzma.h>
namespace zim
{
class UnlzmaError : public std::runtime_error
{
lzma_ret ret;
public:
UnlzmaError(lzma_ret ret_, const std::string& msg)
: std::runtime_error(msg),
ret(ret_)
{ }
lzma_ret getRetcode() const { return ret; }
};
class UnlzmaStreamBuf : public std::streambuf
{
lzma_stream stream;
char_type* iobuffer;
unsigned bufsize;
std::streambuf* sinksource;
char_type* ibuffer() { return iobuffer; }
std::streamsize ibuffer_size() { return bufsize >> 1; }
char_type* obuffer() { return iobuffer + ibuffer_size(); }
std::streamsize obuffer_size() { return bufsize >> 1; }
public:
explicit UnlzmaStreamBuf(std::streambuf* sinksource_, unsigned bufsize = 8192);
~UnlzmaStreamBuf();
/// see std::streambuf
int_type overflow(int_type c);
/// see std::streambuf
int_type underflow();
/// see std::streambuf
int sync();
void setSinksource(std::streambuf* sinksource_) { sinksource = sinksource_; }
};
class UnlzmaStream : public std::iostream
{
UnlzmaStreamBuf streambuf;
public:
explicit UnlzmaStream(std::streambuf* sinksource, unsigned bufsize = 8192)
: std::iostream(0),
streambuf(sinksource, bufsize)
{ init(&streambuf); }
explicit UnlzmaStream(std::ios& sinksource, unsigned bufsize = 8192)
: std::iostream(0),
streambuf(sinksource.rdbuf(), bufsize)
{ init(&streambuf); }
void setSinksource(std::streambuf* sinksource) { streambuf.setSinksource(sinksource); }
void setSinksource(std::ios& sinksource) { streambuf.setSinksource(sinksource.rdbuf()); }
void setSink(std::ostream& sink) { streambuf.setSinksource(sink.rdbuf()); }
void setSource(std::istream& source) { streambuf.setSinksource(source.rdbuf()); }
};
}
#endif // ZIM_UNLZMASTREAM_H

@ -1,54 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_UUID_H
#define ZIM_UUID_H
#include <iosfwd>
#include <algorithm>
#include <cstring>
namespace zim
{
struct Uuid
{
Uuid()
{
std::memset(data, 0, 16);
}
Uuid(const char uuid[16])
{
std::copy(uuid, uuid+16, data);
}
static Uuid generate();
bool operator== (const Uuid& other) const
{ return std::equal(data, data+16, other.data); }
unsigned size() const { return 16; }
char data[16];
};
std::ostream& operator<< (std::ostream& out, const Uuid& uuid);
}
#endif // ZIM_UUID_H

@ -1,130 +0,0 @@
/*
* Copyright (C) 2006 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_ZIM_H
#define ZIM_ZIM_H
#include <limits.h>
namespace zim
{
// define 8 bit integer types
//
typedef unsigned char uint8_t;
typedef char int8_t;
// define 16 bit integer types
//
#if USHRT_MAX == 0xffff
typedef unsigned short uint16_t;
typedef short int16_t;
#elif UINT_MAX == 0xffff
typedef unsigned int uint16_t;
typedef int int16_t;
#elif ULONG_MAX == 0xffff
typedef unsigned long uint16_t;
typedef long int16_t;
#else
}
#include <stdint.h>
namespace zim
{
#endif
// define 32 bit integer types
//
#if USHRT_MAX == 0xffffffffUL
typedef unsigned short uint32_t;
typedef short int32_t;
#elif UINT_MAX == 0xffffffffUL
typedef unsigned int uint32_t;
typedef int int32_t;
#elif ULONG_MAX == 0xffffffffUL
typedef unsigned long uint32_t;
typedef long int32_t;
#else
}
#include <stdint.h>
namespace zim
{
#endif
// define 64 bit integer types
//
#if UINT_MAX == 18446744073709551615ULL
typedef unsigned int uint64_t;
typedef int int64_t;
#elif ULONG_MAX == 18446744073709551615ULL
typedef unsigned long uint64_t;
typedef long int64_t;
#elif ULLONG_MAX == 18446744073709551615ULL
typedef unsigned long long uint64_t;
typedef long long int64_t;
#else
}
#include <stdint.h>
namespace zim
{
#endif
typedef uint32_t size_type;
#ifdef _WIN32
typedef __int64 offset_type;
#else
typedef uint64_t offset_type;
#endif
enum CompressionType
{
zimcompDefault,
zimcompNone,
zimcompZip,
zimcompBzip2,
zimcompLzma
};
static const char MimeHtmlTemplate[] = "text/x-zim-htmltemplate";
}
#endif // ZIM_ZIM_H

@ -1,98 +0,0 @@
/*
* Copyright (C) 2007 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_ZINTSTREAM_H
#define ZIM_ZINTSTREAM_H
#include <string>
#include <iostream>
#include <zim/zim.h>
/*
ZInt implements a int compressor and decompressor. The algorithm compresses
small values into fewer bytes.
The idea is to add information about used bytes in the first byte. The number
of additional bytes used is specified by the number of set bits counted from
the most significant bit. So the numbers 0-127 are encoded as is, since they
fit into the 7 low order bits and the high order bit specifies, that no
additional bytes are used. The number starting from 128 up to 16383 need more
than 7 bits, so we need to set the highest order bit to 1 and the next bit to
0, leaving 6 bits of actual data, which is used as the low order bits of the
number.
Since the numbers 0-127 are already encoded in one byte, the 127 is
substracted from the actual number, so a 2 byte zero is actually a 128.
The same logic continues on the 3rd, 4th, ... byte. Up to 7 additional bytes
are used, so the first byte must contain at least one 0.
binary range
------------------------------- --------------------------------------------------
0xxx xxxx 0 - 127
10xx xxxx xxxx xxxx 128 - (2^14+128-1 = 16511)
110x xxxx xxxx xxxx xxxx xxxx 16512 - (2^21+16512-1 = 2113663)
1110 xxxx xxxx xxxx xxxx xxxx xxxx xxxx
2113664 - (2^28+2113664-1 = 270549119)
...
*/
namespace zim
{
class ZIntStream
{
std::istream* _istream;
std::ostream* _ostream;
public:
/// prepare ZIntStream for compression or decompression
explicit ZIntStream(std::iostream& iostream)
: _istream(&iostream),
_ostream(&iostream)
{ }
/// prepare ZIntStream for decompression
explicit ZIntStream(std::istream& istream)
: _istream(&istream),
_ostream(0)
{ }
/// prepare ZIntStream for compression
explicit ZIntStream(std::ostream& ostream)
: _istream(0),
_ostream(&ostream)
{ }
/// decompresses one value from input stream and returns it
size_type get();
ZIntStream& get(size_type &value)
{ value = get(); return *this; }
/// compresses one value to output stream
ZIntStream& put(size_type value);
operator bool() const
{ return (_istream == 0 || *_istream)
&& (_ostream == 0 || *_ostream); }
};
}
#endif // ZIM_ZINTSTREAM_H

@ -1,139 +0,0 @@
/*
* Copyright (C) 2006 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/article.h>
#include <zim/template.h>
#include <sstream>
#include <iostream>
#include <stdexcept>
#include "log.h"
log_define("zim.article")
namespace zim
{
size_type Article::getArticleSize() const
{
Dirent dirent = getDirent();
return file.getCluster(dirent.getClusterNumber())
.getBlobSize(dirent.getBlobNumber());
}
namespace
{
class Ev : public TemplateParser::Event
{
std::ostream& out;
Article& article;
unsigned maxRecurse;
public:
Ev(std::ostream& out_, Article& article_, unsigned maxRecurse_)
: out(out_),
article(article_),
maxRecurse(maxRecurse_)
{ }
void onData(const std::string& data);
void onToken(const std::string& token);
void onLink(char ns, const std::string& title);
};
void Ev::onData(const std::string& data)
{
out << data;
}
void Ev::onToken(const std::string& token)
{
log_trace("onToken(\"" << token << "\")");
if (token == "title")
out << article.getTitle();
else if (token == "url")
out << article.getUrl();
else if (token == "namespace")
out << article.getNamespace();
else if (token == "content")
{
if (maxRecurse <= 0)
throw std::runtime_error("maximum recursive limit is reached");
article.getPage(out, false, maxRecurse - 1);
}
else
{
log_warn("unknown token \"" << token << "\" found in template");
out << "<%" << token << "%>";
}
}
void Ev::onLink(char ns, const std::string& url)
{
if (maxRecurse <= 0)
throw std::runtime_error("maximum recursive limit is reached");
article.getFile().getArticle(ns, url).getPage(out, false, maxRecurse - 1);
}
}
std::string Article::getPage(bool layout, unsigned maxRecurse)
{
std::ostringstream s;
getPage(s, layout, maxRecurse);
return s.str();
}
void Article::getPage(std::ostream& out, bool layout, unsigned maxRecurse)
{
log_trace("Article::getPage(" << layout << ", " << maxRecurse << ')');
if (getMimeType().compare(0, 9, "text/html") == 0 || getMimeType() == MimeHtmlTemplate)
{
if (layout && file.getFileheader().hasLayoutPage())
{
Article layoutPage = file.getArticle(file.getFileheader().getLayoutPage());
Blob data = layoutPage.getData();
Ev ev(out, *this, maxRecurse);
log_debug("call template parser");
TemplateParser parser(&ev);
for (const char* p = data.data(); p != data.end(); ++p)
parser.parse(*p);
parser.flush();
return;
}
else if (getMimeType() == MimeHtmlTemplate)
{
Blob data = getData();
Ev ev(out, *this, maxRecurse);
TemplateParser parser(&ev);
for (const char* p = data.data(); p != data.end(); ++p)
parser.parse(*p);
parser.flush();
return;
}
}
// default case - template cases has return above
out << getData();
}
}

@ -1,52 +0,0 @@
/*
* Copyright (C) 2007 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/articlesearch.h>
namespace zim
{
ArticleSearch::Results ArticleSearch::search(const std::string& expr)
{
Results ret;
// TODO: implement title-cache
#if 0
if (titles.empty())
{
for (File::const_iterator it = articleFile.begin(); it != articleFile.end(); ++it)
{
if (article.isMainArticle()
&& article.getLibraryMimeType() == zim::Dirent::zimMimeTextHtml
&& article.getNamespace() == 'A')
{
titles.push_back(article.getTitle());
}
}
}
#endif
for (File::const_iterator it = articleFile.begin(); it != articleFile.end(); ++it)
{
std::string title = it->getTitle();
if (title.find(expr) != std::string::npos)
ret.push_back(*it);
}
return ret;
}
}

@ -1,317 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/cluster.h>
#include <zim/blob.h>
#include <zim/endian.h>
#include <stdlib.h>
#include <sstream>
#include "log.h"
#include "config.h"
#ifdef ENABLE_ZLIB
#include <zim/deflatestream.h>
#include <zim/inflatestream.h>
#endif
#ifdef ENABLE_BZIP2
#include <zim/bzip2stream.h>
#include <zim/bunzip2stream.h>
#endif
#ifdef ENABLE_LZMA
#include <zim/lzmastream.h>
#include <zim/unlzmastream.h>
#endif
log_define("zim.cluster")
#define log_debug1(e)
namespace zim
{
Cluster::Cluster()
: impl(0)
{ }
ClusterImpl* Cluster::getImpl()
{
if (impl.getPointer() == 0)
impl = new ClusterImpl();
return impl;
}
ClusterImpl::ClusterImpl()
: compression(zimcompDefault)
{
offsets.push_back(0);
}
void ClusterImpl::read(std::istream& in)
{
log_debug1("read");
// read first offset, which specifies, how many offsets we need to read
size_type offset;
in.read(reinterpret_cast<char*>(&offset), sizeof(offset));
if (in.fail())
return;
offset = fromLittleEndian(&offset);
size_type n = offset / 4;
size_type a = offset;
log_debug1("first offset is " << offset << " n=" << n << " a=" << a);
// read offsets
offsets.clear();
data.clear();
offsets.reserve(n);
offsets.push_back(0);
while (--n)
{
in.read(reinterpret_cast<char*>(&offset), sizeof(offset));
if (in.fail())
{
log_debug1("fail at " << n);
return;
}
offset = fromLittleEndian(&offset);
log_debug1("offset=" << offset << '(' << offset-a << ')');
offsets.push_back(offset - a);
}
// last offset points past the end of the cluster, so we know now, how may bytes to read
if (offsets.size() > 1)
{
n = offsets.back() - offsets.front();
data.resize(n);
if (n > 0)
{
log_debug1("read " << n << " bytes of data");
in.read(&(data[0]), n);
}
}
}
void ClusterImpl::write(std::ostream& out) const
{
size_type a = offsets.size() * sizeof(size_type);
for (Offsets::const_iterator it = offsets.begin(); it != offsets.end(); ++it)
{
size_type o = *it;
o += a;
o = fromLittleEndian(&o);
out.write(reinterpret_cast<const char*>(&o), sizeof(size_type));
}
out.write(&(data[0]), data.size());
}
void ClusterImpl::addBlob(const Blob& blob)
{
log_debug1("addBlob(ptr, " << blob.size() << ')');
data.insert(data.end(), blob.data(), blob.end());
offsets.push_back(data.size());
}
Blob ClusterImpl::getBlob(size_type n) const
{
return getSize(n) > 0 ?
Blob(const_cast<ClusterImpl*>(this), getData(n), getSize(n)) : Blob();
}
void ClusterImpl::clear()
{
offsets.clear();
data.clear();
offsets.push_back(0);
}
void ClusterImpl::addBlob(const char* data, unsigned size)
{
addBlob(Blob(data, size));
}
Blob Cluster::getBlob(size_type n) const
{
return impl->getBlob(n);
}
std::istream& operator>> (std::istream& in, ClusterImpl& clusterImpl)
{
log_trace("read cluster");
char c;
in.get(c);
clusterImpl.setCompression(static_cast<CompressionType>(c));
switch (static_cast<CompressionType>(c))
{
case zimcompDefault:
case zimcompNone:
clusterImpl.read(in);
break;
case zimcompZip:
{
#ifdef ENABLE_ZLIB
log_debug("uncompress data (zlib)");
zim::InflateStream is(in);
is.exceptions(std::ios::failbit | std::ios::badbit);
clusterImpl.read(is);
#else
throw std::runtime_error("zlib not enabled in this library");
#endif
break;
}
case zimcompBzip2:
{
#ifdef ENABLE_BZIP2
log_debug("uncompress data (bzip2)");
zim::Bunzip2Stream is(in);
is.exceptions(std::ios::failbit | std::ios::badbit);
clusterImpl.read(is);
#else
throw std::runtime_error("bzip2 not enabled in this library");
#endif
break;
}
case zimcompLzma:
{
#ifdef ENABLE_LZMA
log_debug("uncompress data (lzma)");
zim::UnlzmaStream is(in);
is.exceptions(std::ios::failbit | std::ios::badbit);
clusterImpl.read(is);
#else
throw std::runtime_error("lzma not enabled in this library");
#endif
break;
}
default:
log_error("invalid compression flag " << c);
in.setstate(std::ios::failbit);
break;
}
return in;
}
std::istream& operator>> (std::istream& in, Cluster& cluster)
{
return in >> *cluster.getImpl();
}
std::ostream& operator<< (std::ostream& out, const ClusterImpl& clusterImpl)
{
log_trace("write cluster");
out.put(static_cast<char>(clusterImpl.getCompression()));
switch(clusterImpl.getCompression())
{
case zimcompDefault:
case zimcompNone:
clusterImpl.write(out);
break;
case zimcompZip:
{
#ifdef ENABLE_ZLIB
log_debug("compress data (zlib)");
zim::DeflateStream os(out);
os.exceptions(std::ios::failbit | std::ios::badbit);
clusterImpl.write(os);
os.flush();
#else
throw std::runtime_error("zlib not enabled in this library");
#endif
break;
}
case zimcompBzip2:
{
#ifdef ENABLE_BZIP2
log_debug("compress data (bzip2)");
zim::Bzip2Stream os(out);
os.exceptions(std::ios::failbit | std::ios::badbit);
clusterImpl.write(os);
os.end();
#else
throw std::runtime_error("bzip2 not enabled in this library");
#endif
break;
}
case zimcompLzma:
{
#ifdef ENABLE_LZMA
uint32_t lzmaPreset = 3 | LZMA_PRESET_EXTREME;
/**
* read lzma preset from environment
* ZIM_LZMA_PRESET is a number followed optionally by a
* suffix 'e'. The number gives the preset and the suffix tells,
* if LZMA_PRESET_EXTREME should be set.
* e.g.:
* ZIM_LZMA_LEVEL=9 => 9
* ZIM_LZMA_LEVEL=3e => 3 + extreme
*/
const char* e = ::getenv("ZIM_LZMA_LEVEL");
if (e)
{
char flag = '\0';
std::istringstream s(e);
s >> lzmaPreset >> flag;
if (flag == 'e')
lzmaPreset |= LZMA_PRESET_EXTREME;
}
log_debug("compress data (lzma, " << std::hex << lzmaPreset << ")");
zim::LzmaStream os(out, lzmaPreset);
os.exceptions(std::ios::failbit | std::ios::badbit);
clusterImpl.write(os);
os.end();
#else
throw std::runtime_error("lzma not enabled in this library");
#endif
break;
}
default:
std::ostringstream msg;
msg << "invalid compression flag " << clusterImpl.getCompression();
log_error(msg.str());
throw std::runtime_error(msg.str());
}
return out;
}
std::ostream& operator<< (std::ostream& out, const Cluster& cluster)
{
return out << *cluster.impl;
}
}

@ -1,245 +0,0 @@
/* src/zimlib/src/config.h.in. Generated from configure.ac by autoheader. */
/* set zim cluster cache size to number of cached chunks */
#undef CLUSTER_CACHE_SIZE
/* set zim dirent cache size to number of cached chunks */
#undef DIRENT_CACHE_SIZE
/* defined if lzma compression is enabled */
#undef ENABLE_LZMA
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define to 1 if you have the <fcntl.h> header file. */
#undef HAVE_FCNTL_H
/* Define to 1 if you have the <float.h> header file. */
#undef HAVE_FLOAT_H
/* Define to 1 if you have the `fork' function. */
#undef HAVE_FORK
/* Define to 1 if you have the `getcwd' function. */
#undef HAVE_GETCWD
/* Define to 1 if you have the `gettimeofday' function. */
#undef HAVE_GETTIMEOFDAY
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the `clucene' library (-lclucene). */
#undef HAVE_LIBCLUCENE
/* Define to 1 if you have the <libintl.h> header file. */
#undef HAVE_LIBINTL_H
/* Define to 1 if you have the `lzma' library (-llzma). */
#undef HAVE_LIBLZMA
/* Define to 1 if you have the `microhttpd' library (-lmicrohttpd). */
#undef HAVE_LIBMICROHTTPD
/* Define to 1 if you have the `z' library (-lz). */
#undef HAVE_LIBZ
/* Define to 1 if you have the <limits.h> header file. */
#undef HAVE_LIMITS_H
/* Define to 1 if your system has a GNU libc compatible `malloc' function, and
to 0 otherwise. */
#undef HAVE_MALLOC
/* Define to 1 if you have the `memmove' function. */
#undef HAVE_MEMMOVE
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if you have the `memset' function. */
#undef HAVE_MEMSET
/* Define to 1 if you have the `pow' function. */
#undef HAVE_POW
/* Define to 1 if the system has the type `ptrdiff_t'. */
#undef HAVE_PTRDIFF_T
/* Define to 1 if you have the `regcomp' function. */
#undef HAVE_REGCOMP
/* Define to 1 if you have the `sqrt' function. */
#undef HAVE_SQRT
/* Define to 1 if you have the `stat64' function. */
#undef HAVE_STAT64
/* Define to 1 if stdbool.h conforms to C99. */
#undef HAVE_STDBOOL_H
/* Define to 1 if you have the <stddef.h> header file. */
#undef HAVE_STDDEF_H
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the `strcasecmp' function. */
#undef HAVE_STRCASECMP
/* Define to 1 if you have the `strchr' function. */
#undef HAVE_STRCHR
/* Define to 1 if you have the `strdup' function. */
#undef HAVE_STRDUP
/* Define to 1 if you have the `strerror' function. */
#undef HAVE_STRERROR
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the `strtol' function. */
#undef HAVE_STRTOL
/* Define to 1 if you have the <sys/socket.h> header file. */
#undef HAVE_SYS_SOCKET_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/time.h> header file. */
#undef HAVE_SYS_TIME_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to 1 if you have the `vfork' function. */
#undef HAVE_VFORK
/* Define to 1 if you have the <vfork.h> header file. */
#undef HAVE_VFORK_H
/* Define to 1 if you have the <wchar.h> header file. */
#undef HAVE_WCHAR_H
/* Define to 1 if `fork' works. */
#undef HAVE_WORKING_FORK
/* Define to 1 if `vfork' works. */
#undef HAVE_WORKING_VFORK
/* Define to 1 if the system has the type `_Bool'. */
#undef HAVE__BOOL
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#undef LT_OBJDIR
/* set lzma uncompress memory size to number of MB */
#undef LZMA_MEMORY_SIZE
/* Name of package */
#undef PACKAGE
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Version number of package */
#undef VERSION
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
#undef _UINT32_T
/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
#undef _UINT64_T
/* Define for Solaris 2.5.1 so the uint8_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
#undef _UINT8_T
/* Define to `__inline__' or `__inline' if that's what the C compiler
calls it, or to nothing if 'inline' is not supported under any name. */
#ifndef __cplusplus
#undef inline
#endif
/* Define to the type of a signed integer type of width exactly 16 bits if
such a type exists and the standard includes do not define it. */
#undef int16_t
/* Define to the type of a signed integer type of width exactly 32 bits if
such a type exists and the standard includes do not define it. */
#undef int32_t
/* Define to the type of a signed integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
#undef int64_t
/* Define to the type of a signed integer type of width exactly 8 bits if such
a type exists and the standard includes do not define it. */
#undef int8_t
/* Define to rpl_malloc if the replacement function should be used. */
#undef malloc
/* Define to `long int' if <sys/types.h> does not define. */
#undef off_t
/* Define to `int' if <sys/types.h> does not define. */
#undef pid_t
/* Define to `unsigned int' if <sys/types.h> does not define. */
#undef size_t
/* Define to the type of an unsigned integer type of width exactly 16 bits if
such a type exists and the standard includes do not define it. */
#undef uint16_t
/* Define to the type of an unsigned integer type of width exactly 32 bits if
such a type exists and the standard includes do not define it. */
#undef uint32_t
/* Define to the type of an unsigned integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
#undef uint64_t
/* Define to the type of an unsigned integer type of width exactly 8 bits if
such a type exists and the standard includes do not define it. */
#undef uint8_t
/* Define as `fork' if `vfork' does not work. */
#undef vfork

@ -1,165 +0,0 @@
/*
* Copyright (C) 2006 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/dirent.h>
#include <zim/zim.h>
#include <zim/endian.h>
#include "log.h"
#include <algorithm>
log_define("zim.dirent")
namespace zim
{
//////////////////////////////////////////////////////////////////////
// Dirent
//
std::ostream& operator<< (std::ostream& out, const Dirent& dirent)
{
union
{
char d[16];
long a;
} header;
toLittleEndian(dirent.getMimeType(), header.d);
header.d[2] = static_cast<char>(dirent.getParameter().size());
header.d[3] = dirent.getNamespace();
log_debug("title=" << dirent.getTitle() << " title.size()=" << dirent.getTitle().size());
toLittleEndian(dirent.getVersion(), header.d + 4);
if (dirent.isRedirect())
{
toLittleEndian(dirent.getRedirectIndex(), header.d + 8);
out.write(header.d, 12);
}
else
{
toLittleEndian(dirent.getClusterNumber(), header.d + 8);
toLittleEndian(dirent.getBlobNumber(), header.d + 12);
out.write(header.d, 16);
}
out << dirent.getUrl() << '\0';
std::string t = dirent.getTitle();
if (t != dirent.getUrl())
out << t;
out << '\0' << dirent.getParameter();
return out;
}
std::istream& operator>> (std::istream& in, Dirent& dirent)
{
union
{
long a;
char d[16];
} header;
in.read(header.d, 12);
if (in.fail())
{
log_warn("error reading dirent header");
return in;
}
if (in.gcount() != 12)
{
log_warn("error reading dirent header (2)");
in.setstate(std::ios::failbit);
return in;
}
uint16_t mimeType = fromLittleEndian(reinterpret_cast<const uint16_t*>(header.d));
bool redirect = (mimeType == std::numeric_limits<uint16_t>::max());
char ns = header.d[3];
size_type version = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 4));
dirent.setVersion(version);
if (redirect)
{
size_type redirectIndex = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 8));
log_debug("redirectIndex=" << redirectIndex);
dirent.setRedirect(redirectIndex);
}
else
{
log_debug("read article entry");
in.read(header.d + 12, 4);
if (in.fail())
{
log_warn("error reading article dirent header");
return in;
}
if (in.gcount() != 4)
{
log_warn("error reading article dirent header (2)");
in.setstate(std::ios::failbit);
return in;
}
size_type clusterNumber = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 8));
size_type blobNumber = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 12));
log_debug("mimeType=" << mimeType << " clusterNumber=" << clusterNumber << " blobNumber=" << blobNumber);
dirent.setArticle(mimeType, clusterNumber, blobNumber);
}
char ch;
std::string url;
std::string title;
std::string parameter;
log_debug("read url, title and parameters");
while (in.get(ch) && ch != '\0')
url += ch;
while (in.get(ch) && ch != '\0')
title += ch;
uint8_t extraLen = static_cast<uint8_t>(header.d[2]);
while (extraLen-- > 0 && in.get(ch))
parameter += ch;
dirent.setUrl(ns, url);
dirent.setTitle(title);
dirent.setParameter(parameter);
return in;
}
std::string Dirent::getLongUrl() const
{
log_trace("Dirent::getLongUrl()");
log_debug("namespace=" << getNamespace() << " title=" << getTitle());
return std::string(1, getNamespace()) + '/' + getUrl();
}
}

@ -1,58 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <sstream>
#include <stdlib.h>
namespace zim
{
unsigned envValue(const char* env, unsigned def)
{
const char* v = ::getenv(env);
if (v)
{
std::istringstream s(v);
s >> def;
}
return def;
}
unsigned envMemSize(const char* env, unsigned def)
{
const char* v = ::getenv(env);
if (v)
{
char unit = '\0';
std::istringstream s(v);
s >> def >> unit;
switch (unit)
{
case 'k':
case 'K': def *= 1024; break;
case 'm':
case 'M': def *= 1024 * 1024; break;
case 'g':
case 'G': def *= 1024 * 1024 * 1024; break;
}
}
return def;
}
}

@ -1,29 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_ENVVALUE_H
#define ZIM_ENVVALUE_H
namespace zim
{
unsigned envValue(const char* env, unsigned def);
unsigned envMemSize(const char* env, unsigned def);
}
#endif // ZIM_ENVVALUE_H

@ -1,272 +0,0 @@
/*
* Copyright (C) 2006,2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/file.h>
#include <zim/article.h>
#include "log.h"
#include <zim/fileiterator.h>
log_define("zim.file")
namespace zim
{
namespace
{
int hexval(char ch)
{
if (ch >= '0' && ch <= '9')
return ch - '0';
if (ch >= 'a' && ch <= 'f')
return ch - 'a' + 10;
if (ch >= 'A' && ch <= 'F')
return ch - 'A' + 10;
return -1;
}
}
Article File::getArticle(size_type idx) const
{
return Article(*this, idx);
}
Article File::getArticle(char ns, const std::string& url)
{
log_trace("File::getArticle('" << ns << "', \"" << url << ')');
std::pair<bool, const_iterator> r = findx(ns, url);
return r.first ? *r.second : Article();
}
Article File::getArticleByUrl(const std::string& url)
{
log_trace("File::getArticle(\"" << url << ')');
std::pair<bool, const_iterator> r = findx(url);
return r.first ? *r.second : Article();
}
Article File::getArticleByTitle(size_type idx)
{
return Article(*this, impl->getIndexByTitle(idx));
}
Article File::getArticleByTitle(char ns, const std::string& title)
{
log_trace("File::getArticleByTitle('" << ns << "', \"" << title << ')');
std::pair<bool, const_iterator> r = findxByTitle(ns, title);
return r.first ? *r.second : Article();
}
bool File::hasNamespace(char ch)
{
size_type off = getNamespaceBeginOffset(ch);
return off < getCountArticles() && getDirent(off).getNamespace() == ch;
}
File::const_iterator File::begin()
{ return const_iterator(this, 0); }
File::const_iterator File::beginByTitle()
{ return const_iterator(this, 0, const_iterator::ArticleIterator); }
File::const_iterator File::end()
{ return const_iterator(this, getCountArticles()); }
std::pair<bool, File::const_iterator> File::findx(char ns, const std::string& url)
{
log_debug("find article by url " << ns << " \"" << url << "\", in file \"" << getFilename() << '"');
size_type l = getNamespaceBeginOffset(ns);
size_type u = getNamespaceEndOffset(ns);
if (l == u)
{
log_debug("namespace " << ns << " not found");
return std::pair<bool, const_iterator>(false, end());
}
unsigned itcount = 0;
while (u - l > 1)
{
++itcount;
size_type p = l + (u - l) / 2;
Dirent d = getDirent(p);
int c = ns < d.getNamespace() ? -1
: ns > d.getNamespace() ? 1
: url.compare(d.getUrl());
if (c < 0)
u = p;
else if (c > 0)
l = p;
else
{
log_debug("article found after " << itcount << " iterations in file \"" << getFilename() << "\" at index " << p);
return std::pair<bool, const_iterator>(true, const_iterator(this, p));
}
}
Dirent d = getDirent(l);
int c = url.compare(d.getUrl());
if (c == 0)
{
log_debug("article found after " << itcount << " iterations in file \"" << getFilename() << "\" at index " << l);
return std::pair<bool, const_iterator>(true, const_iterator(this, l));
}
log_debug("article not found after " << itcount << " iterations (\"" << d.getUrl() << "\" does not match)");
return std::pair<bool, const_iterator>(false, const_iterator(this, c < 0 ? l : u));
}
std::pair<bool, File::const_iterator> File::findx(const std::string& url)
{
if (url.size() < 2 || url[1] != '/')
return std::pair<bool, const_iterator>(false, const_iterator());
return findx(url[0], url.substr(2));
}
std::pair<bool, File::const_iterator> File::findxByTitle(char ns, const std::string& title)
{
log_debug("find article by title " << ns << " \"" << title << "\", in file \"" << getFilename() << '"');
size_type l = getNamespaceBeginOffset(ns);
size_type u = getNamespaceEndOffset(ns);
if (l == u)
{
log_debug("namespace " << ns << " not found");
return std::pair<bool, const_iterator>(false, end());
}
unsigned itcount = 0;
while (u - l > 1)
{
++itcount;
size_type p = l + (u - l) / 2;
Dirent d = getDirentByTitle(p);
int c = ns < d.getNamespace() ? -1
: ns > d.getNamespace() ? 1
: title.compare(d.getTitle());
if (c < 0)
u = p;
else if (c > 0)
l = p;
else
{
log_debug("article found after " << itcount << " iterations in file \"" << getFilename() << "\" at index " << p);
return std::pair<bool, const_iterator>(true, const_iterator(this, p, const_iterator::ArticleIterator));
}
}
Dirent d = getDirentByTitle(l);
int c = title.compare(d.getTitle());
if (c == 0)
{
log_debug("article found after " << itcount << " iterations in file \"" << getFilename() << "\" at index " << l);
return std::pair<bool, const_iterator>(true, const_iterator(this, l, const_iterator::ArticleIterator));
}
log_debug("article not found after " << itcount << " iterations (\"" << d.getTitle() << "\" does not match)");
return std::pair<bool, const_iterator>(false, const_iterator(this, c < 0 ? l : u, const_iterator::ArticleIterator));
}
File::const_iterator File::find(char ns, const std::string& url)
{ return findx(ns, url).second; }
File::const_iterator File::find(const std::string& url)
{ return findx(url).second; }
File::const_iterator File::findByTitle(char ns, const std::string& title)
{ return findxByTitle(ns, title).second; }
std::string urldecode(const std::string& url)
{
std::string ret;
enum {
state_0,
state_h1,
state_h2,
} state = state_0;
char ch;
for (std::string::const_iterator it = url.begin(); it != url.end(); ++it)
{
switch (state)
{
case state_0:
if (*it == '+')
ret += ' ';
else if (*it == '%')
state = state_h1;
else
ret += *it;
break;
case state_h1:
if (*it >= '0' && *it <= '9'
||*it >= 'A' && *it <= 'F'
||*it >= 'a' && *it <= 'f')
{
ch = *it;
state = state_h2;
}
else
{
ret += '%';
ret += *it;
state = state_0;
}
break;
case state_h2:
if (*it >= '0' && *it <= '9'
||*it >= 'A' && *it <= 'F'
||*it >= 'a' && *it <= 'f')
{
ret += static_cast<char>(hexval(ch) * 16 + hexval(*it));
}
else
{
ret += static_cast<char>(hexval(ch));
ret += *it;
}
state = state_0;
break;
}
}
switch (state)
{
case state_h1:
ret += '%';
break;
case state_h2:
ret += '%';
ret += ch;
break;
}
return ret;
}
}

@ -1,110 +0,0 @@
/*
* Copyright (C) 2008 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/fileheader.h>
#include <iostream>
#include <algorithm>
#include "log.h"
log_define("zim.file.header")
namespace zim
{
const size_type Fileheader::zimMagic = 0x044d495a; // ="ZIM^d"
const size_type Fileheader::zimVersion = 5;
const size_type Fileheader::size = 80;
std::ostream& operator<< (std::ostream& out, const Fileheader& fh)
{
char header[Fileheader::size];
toLittleEndian(Fileheader::zimMagic, header);
toLittleEndian(Fileheader::zimVersion, header + 4);
std::copy(fh.getUuid().data, fh.getUuid().data + sizeof(Uuid), header + 8);
toLittleEndian(fh.getArticleCount(), header + 24);
toLittleEndian(fh.getClusterCount(), header + 28);
toLittleEndian(fh.getUrlPtrPos(), header + 32);
toLittleEndian(fh.getTitleIdxPos(), header + 40);
toLittleEndian(fh.getClusterPtrPos(), header + 48);
toLittleEndian(fh.getMimeListPos(), header + 56);
toLittleEndian(fh.getMainPage(), header + 64);
toLittleEndian(fh.getLayoutPage(), header + 68);
toLittleEndian(fh.getChecksumPos(), header + 72);
out.write(header, Fileheader::size);
return out;
}
std::istream& operator>> (std::istream& in, Fileheader& fh)
{
char header[Fileheader::size];
in.read(header, Fileheader::size);
if (in.fail())
return in;
if (static_cast<size_type>(in.gcount()) != Fileheader::size)
{
in.setstate(std::ios::failbit);
return in;
}
size_type magicNumber = fromLittleEndian(reinterpret_cast<const size_type*>(header));
if (magicNumber != Fileheader::zimMagic)
{
log_error("invalid magic number " << magicNumber << " found - "
<< Fileheader::zimMagic << " expected");
in.setstate(std::ios::failbit);
return in;
}
uint16_t version = fromLittleEndian(reinterpret_cast<const uint16_t*>(header + 4));
if (version != static_cast<size_type>(Fileheader::zimVersion))
{
log_error("invalid zimfile version " << version << " found - "
<< Fileheader::zimVersion << " expected");
in.setstate(std::ios::failbit);
return in;
}
Uuid uuid;
std::copy(header + 8, header + 24, uuid.data);
size_type articleCount = fromLittleEndian(reinterpret_cast<const size_type*>(header + 24));
size_type clusterCount = fromLittleEndian(reinterpret_cast<const size_type*>(header + 28));
offset_type urlPtrPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 32));
offset_type titleIdxPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 40));
offset_type clusterPtrPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 48));
offset_type mimeListPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 56));
size_type mainPage = fromLittleEndian(reinterpret_cast<const size_type*>(header + 64));
size_type layoutPage = fromLittleEndian(reinterpret_cast<const size_type*>(header + 68));
offset_type checksumPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 72));
fh.setUuid(uuid);
fh.setArticleCount(articleCount);
fh.setClusterCount(clusterCount);
fh.setUrlPtrPos(urlPtrPos);
fh.setTitleIdxPos(titleIdxPos);
fh.setClusterPtrPos(clusterPtrPos);
fh.setMimeListPos(mimeListPos);
fh.setMainPage(mainPage);
fh.setLayoutPage(layoutPage);
fh.setChecksumPos(checksumPos);
return in;
}
}

@ -1,360 +0,0 @@
/*
* Copyright (C) 2006,2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/fileimpl.h>
#include <zim/error.h>
#include <zim/dirent.h>
#include <zim/endian.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sstream>
#include <errno.h>
#include <cstring>
#include "config.h"
#include "log.h"
#include "envvalue.h"
#ifdef WITH_CXXTOOLS
# include <cxxtools/systemerror.h>
# include <cxxtools/md5stream.h>
#else
# include "md5stream.h"
#endif
log_define("zim.file.impl")
namespace zim
{
//////////////////////////////////////////////////////////////////////
// FileImpl
//
FileImpl::FileImpl(const char* fname)
: zimFile(fname),
direntCache(envValue("ZIM_DIRENTCACHE", DIRENT_CACHE_SIZE)),
clusterCache(envValue("ZIM_CLUSTERCACHE", CLUSTER_CACHE_SIZE))
{
log_trace("read file \"" << fname << '"');
if (!zimFile)
throw ZimFileFormatError(std::string("can't open zim-file \"") + fname + '"');
filename = fname;
// read header
zimFile >> header;
if (zimFile.fail())
throw ZimFileFormatError("error reading zim-file header");
if (getCountClusters() == 0)
log_warn("no clusters found");
else
{
offset_type lastOffset = getClusterOffset(getCountClusters() - 1);
log_debug("last offset=" << lastOffset << " file size=" << zimFile.fsize());
if (lastOffset > static_cast<offset_type>(zimFile.fsize()))
{
log_fatal("last offset (" << lastOffset << ") larger than file size (" << zimFile.fsize() << ')');
throw ZimFileFormatError("last cluster offset larger than file size; file corrupt");
}
}
// read mime types
zimFile.seekg(header.getMimeListPos());
std::string mimeType;
while (true)
{
std::getline(zimFile, mimeType, '\0');
if (zimFile.fail())
throw ZimFileFormatError("error reading mime type list");
if (mimeType.empty())
break;
mimeTypes.push_back(mimeType);;
}
}
Dirent FileImpl::getDirent(size_type idx)
{
log_trace("FileImpl::getDirent(" << idx << ')');
zimFile.setBufsize(64);
if (idx >= getCountArticles())
throw ZimFileFormatError("article index out of range");
if (!zimFile)
{
log_warn("file in error state");
throw ZimFileFormatError("file in error state");
}
std::pair<bool, Dirent> v = direntCache.getx(idx);
if (v.first)
{
log_debug("dirent " << idx << " found in cache; hits " << direntCache.getHits() << " misses " << direntCache.getMisses() << " ratio " << direntCache.hitRatio() * 100 << "% fillfactor " << direntCache.fillfactor());
return v.second;
}
log_debug("dirent " << idx << " not found in cache; hits " << direntCache.getHits() << " misses " << direntCache.getMisses() << " ratio " << direntCache.hitRatio() * 100 << "% fillfactor " << direntCache.fillfactor());
offset_type indexOffset = getOffset(header.getUrlPtrPos(), idx);
zimFile.seekg(indexOffset);
if (!zimFile)
{
log_warn("failed to seek to directory entry");
throw ZimFileFormatError("failed to seek to directory entry");
}
Dirent dirent;
zimFile >> dirent;
if (!zimFile)
{
log_warn("failed to read to directory entry");
throw ZimFileFormatError("failed to read directory entry");
}
log_debug("dirent read from " << indexOffset);
direntCache.put(idx, dirent);
return dirent;
}
Dirent FileImpl::getDirentByTitle(size_type idx)
{
if (idx >= getCountArticles())
throw ZimFileFormatError("article index out of range");
return getDirent(getIndexByTitle(idx));
}
size_type FileImpl::getIndexByTitle(size_type idx)
{
if (idx >= getCountArticles())
throw ZimFileFormatError("article index out of range");
zimFile.seekg(header.getTitleIdxPos() + sizeof(size_type) * idx);
size_type ret;
zimFile.read(reinterpret_cast<char*>(&ret), sizeof(size_type));
if (!zimFile)
throw ZimFileFormatError("error reading title index");
if (isBigEndian())
ret = fromLittleEndian(&ret);
return ret;
}
Cluster FileImpl::getCluster(size_type idx)
{
log_trace("getCluster(" << idx << ')');
if (idx >= getCountClusters())
throw ZimFileFormatError("cluster index out of range");
Cluster cluster = clusterCache.get(idx);
if (cluster)
{
log_debug("cluster " << idx << " found in cache; hits " << clusterCache.getHits() << " misses " << clusterCache.getMisses() << " ratio " << clusterCache.hitRatio() * 100 << "% fillfactor " << clusterCache.fillfactor());
return cluster;
}
zimFile.setBufsize(16384);
offset_type clusterOffset = getClusterOffset(idx);
log_debug("read cluster " << idx << " from offset " << clusterOffset);
zimFile.seekg(clusterOffset);
zimFile >> cluster;
if (zimFile.fail())
throw ZimFileFormatError("error reading cluster data");
if (cluster.isCompressed())
{
log_debug("put cluster " << idx << " into cluster cache; hits " << clusterCache.getHits() << " misses " << clusterCache.getMisses() << " ratio " << clusterCache.hitRatio() * 100 << "% fillfactor " << clusterCache.fillfactor());
clusterCache.put(idx, cluster);
}
else
log_debug("cluster " << idx << " is not compressed - do not cache");
return cluster;
}
offset_type FileImpl::getOffset(offset_type ptrOffset, size_type idx)
{
zimFile.seekg(ptrOffset + sizeof(offset_type) * idx);
offset_type offset;
zimFile.read(reinterpret_cast<char*>(&offset), sizeof(offset_type));
if (!zimFile)
throw ZimFileFormatError("error reading offset");
if (isBigEndian())
offset = fromLittleEndian(&offset);
return offset;
}
size_type FileImpl::getNamespaceBeginOffset(char ch)
{
log_trace("getNamespaceBeginOffset(" << ch << ')');
NamespaceCache::const_iterator it = namespaceBeginCache.find(ch);
if (it != namespaceBeginCache.end())
return it->second;
size_type lower = 0;
size_type upper = getCountArticles();
Dirent d = getDirent(0);
while (upper - lower > 1)
{
size_type m = lower + (upper - lower) / 2;
Dirent d = getDirent(m);
if (d.getNamespace() >= ch)
upper = m;
else
lower = m;
}
size_type ret = d.getNamespace() < ch ? upper : lower;
namespaceBeginCache[ch] = ret;
return ret;
}
size_type FileImpl::getNamespaceEndOffset(char ch)
{
log_trace("getNamespaceEndOffset(" << ch << ')');
NamespaceCache::const_iterator it = namespaceEndCache.find(ch);
if (it != namespaceEndCache.end())
return it->second;
size_type lower = 0;
size_type upper = getCountArticles();
log_debug("namespace " << ch << " lower=" << lower << " upper=" << upper);
while (upper - lower > 1)
{
size_type m = lower + (upper - lower) / 2;
Dirent d = getDirent(m);
if (d.getNamespace() > ch)
upper = m;
else
lower = m;
log_debug("namespace " << d.getNamespace() << " m=" << m << " lower=" << lower << " upper=" << upper);
}
namespaceEndCache[ch] = upper;
return upper;
}
std::string FileImpl::getNamespaces()
{
if (namespaces.empty())
{
Dirent d = getDirent(0);
namespaces = d.getNamespace();
size_type idx;
while ((idx = getNamespaceEndOffset(d.getNamespace())) < getCountArticles())
{
d = getDirent(idx);
namespaces += d.getNamespace();
}
}
return namespaces;
}
const std::string& FileImpl::getMimeType(uint16_t idx) const
{
if (idx > mimeTypes.size())
{
std::ostringstream msg;
msg << "unknown mime type code " << idx;
throw std::runtime_error(msg.str());
}
return mimeTypes[idx];
}
std::string FileImpl::getChecksum()
{
if (!header.hasChecksum())
return std::string();
zimFile.seekg(header.getChecksumPos());
unsigned char chksum[16];
zimFile.read(reinterpret_cast<char*>(chksum), 16);
if (!zimFile)
{
log_warn("error reading checksum");
return std::string();
}
char hexdigest[33];
hexdigest[32] = '\0';
static const char hex[] = "0123456789abcdef";
char* p = hexdigest;
for (int i = 0; i < 16; ++i)
{
*p++ = hex[chksum[i] >> 4];
*p++ = hex[chksum[i] & 0xf];
}
log_debug("chksum=" << hexdigest);
return hexdigest;
}
bool FileImpl::verify()
{
if (!header.hasChecksum())
return false;
#ifdef WITH_CXXTOOLS
cxxtools::Md5stream md5;
#else
Md5stream md5;
#endif
zimFile.seekg(0);
char ch;
for (offset_type n = 0; n < header.getChecksumPos() && zimFile.get(ch); ++n)
md5 << ch;
unsigned char chksumFile[16];
unsigned char chksumCalc[16];
zimFile.read(reinterpret_cast<char*>(chksumFile), 16);
if (!zimFile)
throw ZimFileFormatError("failed to read checksum from zim file");
md5.getDigest(chksumCalc);
if (std::memcmp(chksumFile, chksumCalc, 16) != 0)
throw ZimFileFormatError("invalid checksum in zim file");
return true;
}
}

@ -1,323 +0,0 @@
/*
* Copyright (C) 2010 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/fstream.h>
#include "log.h"
#include "config.h"
#include <sstream>
#include <stdexcept>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifdef WITH_CXXTOOLS
#include <cxxtools/systemerror.h>
#endif
#ifdef _WIN32
#include <io.h>
#else
#include <unistd.h>
#endif
#ifndef O_LARGEFILE
#define O_LARGEFILE 0
#endif
#ifndef O_BINARY
#define O_BINARY 0
#endif
log_define("zim.fstream")
namespace zim
{
class FileNotFound : public std::runtime_error
{
public:
FileNotFound()
: std::runtime_error("file not found")
{ }
};
////////////////////////////////////////////////////////////
// OpenfileInfo
//
streambuf::OpenfileInfo::OpenfileInfo(const std::string& fname_)
: fname(fname_),
#ifdef HAVE_OPEN64
fd(::open64(fname.c_str(), O_RDONLY | O_LARGEFILE | O_BINARY))
#else
fd(::open(fname.c_str(), O_RDONLY | O_LARGEFILE | O_BINARY))
#endif
{
if (fd < 0)
throw FileNotFound();
}
streambuf::OpenfileInfo::~OpenfileInfo()
{
::close(fd);
}
////////////////////////////////////////////////////////////
// FileInfo
//
streambuf::FileInfo::FileInfo(const std::string& fname_, int fd)
: fname(fname_)
{
#if defined(_WIN32)
__int64 ret = ::_lseeki64(fd, 0, SEEK_END);
#elif defined(HAVE_LSEEK64)
off64_t ret = ::lseek64(fd, 0, SEEK_END);
#else
off_t ret = ::lseek(fd, 0, SEEK_END);
#endif
if (ret < 0)
{
std::ostringstream msg;
msg << "error " << errno << " seeking to end in file " << fname << ": " << strerror(errno);
throw std::runtime_error(msg.str());
}
fsize = static_cast<zim::offset_type>(ret);
}
std::streambuf::int_type streambuf::overflow(std::streambuf::int_type ch)
{
return traits_type::eof();
}
std::streambuf::int_type streambuf::underflow()
{
log_debug("underflow; bufsize=" << buffer.size());
int n;
do
{
n = ::read(currentFile->fd, &buffer[0], buffer.size());
if (n < 0)
{
std::ostringstream msg;
msg << "error " << errno << " reading from file: " << strerror(errno);
throw std::runtime_error(msg.str());
}
else if (n == 0)
{
FilesType::iterator it;
for (it = files.begin(); it != files.end(); ++it)
{
if ((*it)->fname == currentFile->fname)
{
++it;
break;
}
}
if (it == files.end())
return traits_type::eof();
setCurrentFile((*it)->fname, 0);
}
} while (n == 0);
char* p = &buffer[0];
setg(p, p, p + n);
return traits_type::to_int_type(*gptr());
}
int streambuf::sync()
{
return traits_type::eof();
}
namespace
{
void parseFilelist(const std::string& list, std::vector<std::string>& out)
{
enum {
state_0,
state_t,
state_e
} state = state_0;
for (std::string::const_iterator it = list.begin(); it != list.end(); ++it)
{
switch (state)
{
case state_0:
out.push_back(std::string(1, *it));
state = state_t;
break;
case state_t:
if (*it == ':')
out.push_back(std::string(1, *it));
else if (*it == '\\')
state = state_e;
else
out.back() += *it;
break;
case state_e:
out.back() += *it;
state = state_t;
break;
}
}
}
}
streambuf::streambuf(const std::string& fname, unsigned bufsize, unsigned noOpenFiles)
: buffer(bufsize),
openFilesCache(noOpenFiles),
mtime(0)
{
log_debug("streambuf for " << fname << " with " << bufsize << " bytes");
try
{
currentFile = new OpenfileInfo(fname);
files.push_back(new FileInfo(fname, currentFile->fd));
openFilesCache.put(fname, currentFile);
}
catch (const FileNotFound&)
{
int errnoSave = errno;
try
{
for (char ch0 = 'a'; ch0 <= 'z'; ++ch0)
{
std::string fname0 = fname + ch0;
for (char ch1 = 'a'; ch1 <= 'z'; ++ch1)
{
std::string fname1 = fname0 + ch1;
currentFile = new OpenfileInfo(fname1);
files.push_back(new FileInfo(fname1, currentFile->fd));
openFilesCache.put(fname1, currentFile);
}
}
}
catch (const FileNotFound&)
{
if (files.empty())
{
std::ostringstream msg;
msg << "error " << errnoSave << " opening file \"" << fname << "\": " << strerror(errnoSave);
throw std::runtime_error(msg.str());
}
}
}
setCurrentFile((*files.begin())->fname, 0);
}
void streambuf::setCurrentFile(const std::string& fname, zim::offset_type off)
{
std::pair<bool, OpenfileInfoPtr> f = openFilesCache.getx(fname);
if (f.first)
{
currentFile = f.second;
}
else
{
// file not found in cache
currentFile = new OpenfileInfo(fname);
openFilesCache.put(fname, currentFile);
}
if (f.first || off != 0) // found in cache or seek requested
{
#if defined(_WIN32)
offset_type ret = ::_lseeki64(currentFile->fd, off, SEEK_SET);
#elif defined(HAVE_LSEEK64)
off64_t ret = ::lseek64(currentFile->fd, off, SEEK_SET);
#else
off_t ret = ::lseek(currentFile->fd, off, SEEK_SET);
#endif
if (ret < 0)
{
std::ostringstream msg;
msg << "error " << errno << " seeking to "<< off << " in file " << fname << ": " << strerror(errno);
throw std::runtime_error(msg.str());
}
}
}
void streambuf::seekg(zim::offset_type off)
{
setg(0, 0, 0);
currentPos = off;
zim::offset_type o = off;
FilesType::iterator it;
for (it = files.begin(); it != files.end() && (*it)->fsize < o; ++it)
o -= (*it)->fsize;
if (it == files.end())
{
std::ostringstream msg;
msg << "error seeking to "<< off;
throw std::runtime_error(msg.str());
}
setCurrentFile((*it)->fname, o);
}
zim::offset_type streambuf::fsize() const
{
zim::offset_type o = 0;
for (FilesType::const_iterator it = files.begin(); it != files.end(); ++it)
o += (*it)->fsize;
return o;
}
time_t streambuf::getMTime() const
{
if (mtime || files.empty())
return mtime;
const char* fname = files.front()->fname.c_str();
#ifdef HAVE_STAT64
struct stat64 st;
int ret = ::stat64(fname, &st);
#else
struct stat st;
int ret = ::stat(fname, &st);
#endif
if (ret != 0)
#ifdef WITH_CXXTOOLS
throw cxxtools::SystemError("stat");
#else
{
std::ostringstream msg;
msg << "stat failed with errno " << errno << " : " << strerror(errno);
throw std::runtime_error(msg.str());
}
#endif
mtime = st.st_mtime;
return mtime;
}
}

@ -1,165 +0,0 @@
/*
* Copyright (C) 2007 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/indexarticle.h>
#include <zim/zintstream.h>
#include <sstream>
#include <stdexcept>
#include "log.h"
#include "ptrstream.h"
log_define("zim.indexarticle")
namespace zim
{
bool IndexArticle::noOffset = false;
void IndexArticle::readEntries()
{
if (!good() || categoriesRead)
return;
log_debug("read entries for article " << getUrl());
if (getParameter().empty())
readEntriesB();
else
readEntriesZ();
categoriesRead = true;
}
void IndexArticle::readEntriesZ()
{
std::istringstream s(getParameter());
zim::ZIntStream extra(s);
unsigned flagfield; // field with one bit (bits 0-3) for each cateogry
extra.get(flagfield);
log_debug("flags: h" << std::hex << flagfield);
unsigned offset = 0;
for (unsigned c = 0; c <= 3; ++c)
{
bool catNotEmpty = (flagfield & 1);
flagfield >>= 1;
if (catNotEmpty)
{
log_debug("read category " << c);
unsigned len;
Entry entry;
bool s = extra.get(len) && extra.get(entry.index);
if (s && getNamespace() == 'X')
s = extra.get(entry.pos);
else
entry.pos = 0;
unsigned pos = entry.pos;
if (!s)
throw std::runtime_error("invalid index entry");
log_debug("first index " << entry.index << " pos " << entry.pos);
entries[c].push_back(entry);
log_debug("read data from offset " << offset << " len " << len);
zim::Blob b = getData();
ptrstream data(const_cast<char*>(b.data() + offset), const_cast<char*>(b.data() + offset + len));
ZIntStream zdata(data);
unsigned index;
unsigned indexOffset = 0;
while (zdata.get(index))
{
entry.index = indexOffset + index;
if (!noOffset)
indexOffset += index;
if (getNamespace() == 'X')
{
unsigned p;
if (!zdata.get(p))
throw std::runtime_error("invalid index entry");
pos += p;
entry.pos = p;
}
else
entry.pos = 0;
log_debug("index " << entry.index << " pos " << entry.pos);
entries[c].push_back(entry);
}
offset += len;
}
}
}
namespace
{
class Eof { };
zim::size_type getSizeValue(std::istream& in)
{
zim::size_type ret;
in.read(reinterpret_cast<char*>(&ret), sizeof(zim::size_type));
if (!in)
throw Eof();
ret = fromLittleEndian<zim::size_type>(&ret);
return ret;
}
}
void IndexArticle::readEntriesB()
{
try
{
zim::size_type categoryCount[4];
zim::Blob b = getData();
ptrstream data(const_cast<char*>(b.data()), const_cast<char*>(b.end()));
for (unsigned c = 0; c < 4; ++c)
categoryCount[c] = getSizeValue(data);
for (unsigned c = 0; c < 4; ++c)
{
log_debug("read " << categoryCount[c] << " entries for category " << c);
for (unsigned n = 0; n < categoryCount[c]; ++n)
{
Entry entry;
entry.index = getSizeValue(data);
if (getNamespace() == 'X')
entry.pos = getNamespace() ? getSizeValue(data) : 0;
entries[c].push_back(entry);
}
}
}
catch (const Eof&)
{
log_error("end of file when reading index entries for article " << getTitle());
return;
}
}
}

@ -1,36 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "config.h"
#ifdef WITH_CXXTOOLS
#include <cxxtools/log.h>
#else
#define log_define(e)
#define log_fatal(e)
#define log_error(e)
#define log_warn(e)
#define log_info(e)
#define log_debug(e)
#define log_trace(e)
#endif

@ -1,182 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/lzmastream.h>
#include <zim/zim.h>
#include "log.h"
#include <cstring>
#include <sstream>
log_define("zim.lzma.compress")
namespace zim
{
namespace
{
lzma_ret checkError(lzma_ret ret)
{
if (ret != LZMA_OK && ret != LZMA_STREAM_END)
{
std::ostringstream msg;
msg << "lzma-error " << ret;
switch (ret)
{
case LZMA_OK: msg << ": LZMA_OK"; break;
case LZMA_STREAM_END: msg << ": LZMA_STREAM_END"; break;
case LZMA_NO_CHECK: msg << ": LZMA_NO_CHECK"; break;
case LZMA_UNSUPPORTED_CHECK: msg << ": LZMA_UNSUPPORTED_CHECK"; break;
case LZMA_GET_CHECK: msg << ": LZMA_GET_CHECK"; break;
case LZMA_MEM_ERROR: msg << ": LZMA_MEM_ERROR"; break;
case LZMA_MEMLIMIT_ERROR: msg << ": LZMA_MEMLIMIT_ERROR"; break;
case LZMA_FORMAT_ERROR: msg << ": LZMA_FORMAT_ERROR"; break;
case LZMA_OPTIONS_ERROR: msg << ": LZMA_OPTIONS_ERROR"; break;
case LZMA_DATA_ERROR: msg << ": LZMA_DATA_ERROR"; break;
case LZMA_BUF_ERROR: msg << ": LZMA_BUF_ERROR"; break;
case LZMA_PROG_ERROR: msg << ": LZMA_PROG_ERROR"; break;
}
log_error(msg.str());
throw LzmaError(ret, msg.str());
}
return ret;
}
}
LzmaStreamBuf::LzmaStreamBuf(std::streambuf* sink_, uint32_t preset, lzma_check check, unsigned bufsize_)
: obuffer(bufsize_),
sink(sink_)
{
std::memset(reinterpret_cast<void*>(&stream), 0, sizeof(stream));
checkError(
::lzma_easy_encoder(&stream, preset, check));
setp(&obuffer[0], &obuffer[0] + obuffer.size());
}
LzmaStreamBuf::~LzmaStreamBuf()
{
::lzma_end(&stream);
}
LzmaStreamBuf::int_type LzmaStreamBuf::overflow(int_type c)
{
// initialize input-stream
stream.next_in = reinterpret_cast<const uint8_t*>(&obuffer[0]);
stream.avail_in = pptr() - &obuffer[0];
// initialize zbuffer for compressed data
char zbuffer[8192];
stream.next_out = reinterpret_cast<uint8_t*>(zbuffer);
stream.avail_out = sizeof(zbuffer);
// compress
checkError(::lzma_code(&stream, LZMA_RUN));
// copy zbuffer to sink / consume deflated data
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
if (count > 0)
{
std::streamsize n = sink->sputn(zbuffer, count);
if (n < count)
return traits_type::eof();
}
// move remaining characters to start of obuffer
if (stream.avail_in > 0)
memmove(&obuffer[0], stream.next_in, stream.avail_in);
// reset outbuffer
setp(&obuffer[0] + stream.avail_in, &obuffer[0] + obuffer.size());
if (c != traits_type::eof())
sputc(traits_type::to_char_type(c));
return 0;
}
LzmaStreamBuf::int_type LzmaStreamBuf::underflow()
{
return traits_type::eof();
}
int LzmaStreamBuf::sync()
{
// initialize input-stream for
stream.next_in = reinterpret_cast<const uint8_t*>(&obuffer[0]);
stream.avail_in = pptr() - &obuffer[0];
char zbuffer[8192];
while (stream.avail_in > 0)
{
// initialize zbuffer
stream.next_out = (uint8_t*)zbuffer;
stream.avail_out = sizeof(zbuffer);
checkError(::lzma_code(&stream, LZMA_FINISH));
// copy zbuffer to sink
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
if (count > 0)
{
std::streamsize n = sink->sputn(zbuffer, count);
if (n < count)
return -1;
}
};
// reset outbuffer
setp(&obuffer[0], &obuffer[0] + obuffer.size());
return 0;
}
int LzmaStreamBuf::end()
{
char zbuffer[8192];
// initialize input-stream for
stream.next_in = reinterpret_cast<const uint8_t*>(&obuffer[0]);
stream.avail_in = pptr() - &obuffer[0];
lzma_ret ret;
do
{
// initialize zbuffer
stream.next_out = (uint8_t*)zbuffer;
stream.avail_out = sizeof(zbuffer);
ret = checkError(::lzma_code(&stream, LZMA_FINISH));
// copy zbuffer to sink
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
if (count > 0)
{
std::streamsize n = sink->sputn(zbuffer, count);
if (n < count)
throw LzmaError(static_cast<lzma_ret>(0), "failed to send compressed data to sink in lzmastream");
}
} while (ret != LZMA_STREAM_END);
// reset outbuffer
setp(&obuffer[0], &obuffer[0] + obuffer.size());
return 0;
}
void LzmaStream::end()
{
if (streambuf.end() != 0)
setstate(failbit);
}
}

@ -1,340 +0,0 @@
/* MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
*/
/* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
rights reserved.
License to copy and use this software is granted provided that it
is identified as the "RSA Data Security, Inc. MD5 Message-Digest
Algorithm" in all material mentioning or referencing this software
or this function.
License is also granted to make and use derivative works provided
that such works are identified as "derived from the RSA Data
Security, Inc. MD5 Message-Digest Algorithm" in all material
mentioning or referencing the derived work.
RSA Data Security, Inc. makes no representations concerning either
the merchantability of this software or the suitability of this
software for any particular purpose. It is provided "as is"
without express or implied warranty of any kind.
These notices must be retained in any copies of any part of this
documentation and/or software.
*/
#include "md5.h"
#include <string.h>
#define MD5_CTX zim_MD5_CTX
/* Constants for MD5Transform routine.
*/
#define S11 7
#define S12 12
#define S13 17
#define S14 22
#define S21 5
#define S22 9
#define S23 14
#define S24 20
#define S31 4
#define S32 11
#define S33 16
#define S34 23
#define S41 6
#define S42 10
#define S43 15
#define S44 21
static void MD5Transform PROTO_LIST ((UINT4 [4], const unsigned char [64]));
static void Encode PROTO_LIST
((unsigned char *, UINT4 *, unsigned int));
static void Decode PROTO_LIST
((UINT4 *, const unsigned char *, unsigned int));
/*
static void MD5_memcpy PROTO_LIST ((POINTER, POINTER, unsigned int));
static void MD5_memset PROTO_LIST ((POINTER, int, unsigned int));
*/
#define MD5_memcpy memcpy
#define MD5_memset memset
static unsigned char PADDING[64] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/* F, G, H and I are basic MD5 functions.
*/
#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
#define H(x, y, z) ((x) ^ (y) ^ (z))
#define I(x, y, z) ((y) ^ ((x) | (~z)))
/* ROTATE_LEFT rotates x left n bits.
*/
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
Rotation is separate from addition to prevent recomputation.
*/
#define FF(a, b, c, d, x, s, ac) { \
(a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
#define GG(a, b, c, d, x, s, ac) { \
(a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
#define HH(a, b, c, d, x, s, ac) { \
(a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
#define II(a, b, c, d, x, s, ac) { \
(a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
/* MD5 initialization. Begins an MD5 operation, writing a new context.
*/
void zim_MD5Init (MD5_CTX* context)
{
context->count[0] = context->count[1] = 0;
/* Load magic initialization constants.
*/
context->state[0] = 0x67452301;
context->state[1] = 0xefcdab89;
context->state[2] = 0x98badcfe;
context->state[3] = 0x10325476;
}
/* MD5 block update operation. Continues an MD5 message-digest
operation, processing another message block, and updating the
context.
*/
void zim_MD5Update (
MD5_CTX *context,
const unsigned char *input, /* input block */
unsigned int inputLen) /* length of input block */
{
unsigned int i, index, partLen;
/* Compute number of bytes mod 64 */
index = (unsigned int)((context->count[0] >> 3) & 0x3F);
/* Update number of bits */
if ((context->count[0] += ((UINT4)inputLen << 3))
< ((UINT4)inputLen << 3))
context->count[1]++;
context->count[1] += ((UINT4)inputLen >> 29);
partLen = 64 - index;
/* Transform as many times as possible.
*/
if (inputLen >= partLen) {
MD5_memcpy
((POINTER)&context->buffer[index], (POINTER)input, partLen);
MD5Transform (context->state, context->buffer);
for (i = partLen; i + 63 < inputLen; i += 64)
MD5Transform (context->state, &input[i]);
index = 0;
}
else
i = 0;
/* Buffer remaining input */
MD5_memcpy
((POINTER)&context->buffer[index], (POINTER)&input[i],
inputLen-i);
}
/* MD5 finalization. Ends an MD5 message-digest operation, writing the
the message digest and zeroizing the context.
*/
void zim_MD5Final (
unsigned char digest[16], /* message digest */
MD5_CTX *context) /* context */
{
unsigned char bits[8];
unsigned int index, padLen;
/* Save number of bits */
Encode (bits, context->count, 8);
/* Pad out to 56 mod 64.
*/
index = (unsigned int)((context->count[0] >> 3) & 0x3f);
padLen = (index < 56) ? (56 - index) : (120 - index);
zim_MD5Update (context, PADDING, padLen);
/* Append length (before padding) */
zim_MD5Update (context, bits, 8);
/* Store state in digest */
Encode (digest, context->state, 16);
/* Zeroize sensitive information.
*/
MD5_memset ((POINTER)context, 0, sizeof (*context));
}
/* MD5 basic transformation. Transforms state based on block.
*/
static void MD5Transform (
UINT4 state[4],
const unsigned char block[64])
{
UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
Decode (x, block, 64);
/* Round 1 */
FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
/* Round 2 */
GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
/* Round 3 */
HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
/* Round 4 */
II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
/* Zeroize sensitive information.
*/
MD5_memset ((POINTER)x, 0, sizeof (x));
}
/* Encodes input (UINT4) into output (unsigned char). Assumes len is
a multiple of 4.
*/
static void Encode (
unsigned char *output,
UINT4 *input,
unsigned int len)
{
unsigned int i, j;
for (i = 0, j = 0; j < len; i++, j += 4) {
output[j] = (unsigned char)(input[i] & 0xff);
output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
}
}
/* Decodes input (unsigned char) into output (UINT4). Assumes len is
a multiple of 4.
*/
static void Decode (
UINT4 *output,
const unsigned char *input,
unsigned int len)
{
unsigned int i, j;
for (i = 0, j = 0; j < len; i++, j += 4)
output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) |
(((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
}
#if 0
/* Note: Replace "for loop" with standard memcpy if possible.
*/
static void MD5_memcpy (
POINTER output,
POINTER input,
unsigned int len)
{
unsigned int i;
for (i = 0; i < len; i++)
output[i] = input[i];
}
/* Note: Replace "for loop" with standard memset if possible.
*/
static void MD5_memset (
POINTER output,
int value,
unsigned int len)
{
unsigned int i;
for (i = 0; i < len; i++)
((char *)output)[i] = (char)value;
}
#endif

@ -1,107 +0,0 @@
/*
* Copyright (C) 2003 Tommi Maekitalo
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* As a special exception, you may use this file as part of a free
* software library without restriction. Specifically, if other files
* instantiate templates or use macros or inline functions from this
* file, or you compile this file and link it with other files to
* produce an executable, this file does not by itself cause the
* resulting executable to be covered by the GNU General Public
* License. This exception does not however invalidate any other
* reasons why the executable file might be covered by the GNU Library
* General Public License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
rights reserved.
License to copy and use this software is granted provided that it
is identified as the "RSA Data Security, Inc. MD5 Message-Digest
Algorithm" in all material mentioning or referencing this software
or this function.
License is also granted to make and use derivative works provided
that such works are identified as "derived from the RSA Data
Security, Inc. MD5 Message-Digest Algorithm" in all material
mentioning or referencing the derived work.
RSA Data Security, Inc. makes no representations concerning either
the merchantability of this software or the suitability of this
software for any particular purpose. It is provided "as is"
without express or implied warranty of any kind.
These notices must be retained in any copies of any part of this
documentation and/or software.
*/
/* RSAREF types and constants
*/
/* PROTOTYPES should be set to one if and only if the compiler supports
function argument prototyping.
The following makes PROTOTYPES default to 0 if it has not already
been defined with C compiler flags.
*/
#ifndef ZIM_MD5_H
#define ZIM_MD5_H
#ifndef PROTOTYPES
#define PROTOTYPES 1
#endif
/* POINTER defines a generic pointer type */
typedef unsigned char *POINTER;
/* UINT2 defines a two byte word */
typedef unsigned short int UINT2;
/* UINT4 defines a four byte word */
typedef unsigned int UINT4;
/* PROTO_LIST is defined depending on how PROTOTYPES is defined above.
If using PROTOTYPES, then PROTO_LIST returns the list, otherwise it
returns an empty list.
*/
#if PROTOTYPES
#define PROTO_LIST(list) list
#else
#define PROTO_LIST(list) ()
#endif
/* MD5 context. */
typedef struct {
UINT4 state[4]; /* state (ABCD) */
UINT4 count[2]; /* number of bits, modulo 2^64 (lsb first) */
unsigned char buffer[64]; /* input buffer */
} zim_MD5_CTX;
#ifdef __cplusplus
extern "C" {
#endif
void zim_MD5Init PROTO_LIST ((zim_MD5_CTX *));
void zim_MD5Update PROTO_LIST
((zim_MD5_CTX *, const unsigned char *, unsigned int));
void zim_MD5Final PROTO_LIST ((unsigned char [16], zim_MD5_CTX *));
#ifdef __cplusplus
}
#endif
#endif /* ZIM_MD5_H */

@ -1,134 +0,0 @@
/*
* Copyright (C) 2003 Tommi Maekitalo
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* As a special exception, you may use this file as part of a free
* software library without restriction. Specifically, if other files
* instantiate templates or use macros or inline functions from this
* file, or you compile this file and link it with other files to
* produce an executable, this file does not by itself cause the
* resulting executable to be covered by the GNU General Public
* License. This exception does not however invalidate any other
* reasons why the executable file might be covered by the GNU Library
* General Public License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*
* copied from cxxtools
*/
#include "md5stream.h"
#include <cstring>
namespace zim
{
////////////////////////////////////////////////////////////////////////
// Md5streambuf
//
Md5streambuf::Md5streambuf()
{
std::memset(digest, 0, 16);
}
std::streambuf::int_type Md5streambuf::overflow(
std::streambuf::int_type ch)
{
if (pptr() == 0)
{
// Ausgabepuffer ist leer - initialisieren
zim_MD5Init(&context);
}
else
{
// konsumiere Zeichen aus dem Puffer
zim_MD5Update(&context,
(const unsigned char*)pbase(),
pptr() - pbase());
}
// setze Ausgabepuffer
setp(buffer, buffer + bufsize);
if (ch != traits_type::eof())
{
// das Zeichen, welches den overflow ausgelöst hat, stecken
// wir in den Puffer.
*pptr() = traits_type::to_char_type(ch);
pbump(1);
}
return 0;
}
std::streambuf::int_type Md5streambuf::underflow()
{
// nur Ausgabestrom
return traits_type::eof();
}
int Md5streambuf::sync()
{
if (pptr() != pbase())
{
// konsumiere Zeichen aus dem Puffer
zim_MD5Update(&context, (const unsigned char*)pbase(), pptr() - pbase());
// leere Ausgabepuffer
setp(buffer, buffer + bufsize);
}
return 0;
}
void Md5streambuf::getDigest(unsigned char digest_[16])
{
if (pptr())
{
if (pptr() != pbase())
{
// konsumiere Zeichen aus dem Puffer
zim_MD5Update(&context, (const unsigned char*)pbase(), pptr() - pbase());
}
// deinitialisiere Ausgabepuffer
setp(0, 0);
zim_MD5Final(digest, &context);
}
std::memcpy(digest_, digest, 16);
}
////////////////////////////////////////////////////////////////////////
// Md5stream
//
const char* Md5stream::getHexDigest()
{
static const char hex[] = "0123456789abcdef";
unsigned char md5[16];
getDigest(md5);
int i;
char* p = hexdigest;
for (i = 0; i < 16; ++i)
{
*p++ = hex[md5[i] >> 4];
*p++ = hex[md5[i] & 0xf];
}
*p = '\0';
return hexdigest;
}
}

@ -1,134 +0,0 @@
/*
* Copyright (C) 2003 Tommi Maekitalo
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* As a special exception, you may use this file as part of a free
* software library without restriction. Specifically, if other files
* instantiate templates or use macros or inline functions from this
* file, or you compile this file and link it with other files to
* produce an executable, this file does not by itself cause the
* resulting executable to be covered by the GNU General Public
* License. This exception does not however invalidate any other
* reasons why the executable file might be covered by the GNU Library
* General Public License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*
* copied from cxxtools
*/
#ifndef ZIM_MD5STREAM_H
#define ZIM_MD5STREAM_H
#include "md5.h"
#include <iostream>
#include <iterator>
#include <algorithm>
namespace zim
{
class Md5streambuf : public std::streambuf
{
public:
Md5streambuf();
void getDigest(unsigned char digest[16]);
private:
static const unsigned int bufsize = 64;
char buffer[bufsize];
zim_MD5_CTX context;
unsigned char digest[16];
std::streambuf::int_type overflow(std::streambuf::int_type ch);
std::streambuf::int_type underflow();
int sync();
};
/**
This is a easy and safe interface to MD5-calculation.
To get a MD5-sum of data, instantiate a md5stream, copy your data
into it and read the digest.
After calling getDigest or getHexDigest, the class can be reused
for another md5-calculation. The algorithm is automatically
reinitialized when the first character is received.
example:
\code
int main(int argc, char* argv[])
{
Md5stream s;
for (int i = 1; i < argc; ++i)
{
std::ifstream in(argv[i]);
if (in)
{
s << in.rdbuf();
std::cout << s.getHexDigest() << " " << argv[i] << std::endl;
}
}
}
\endcode
*/
class Md5stream : public std::ostream
{
public:
typedef std::ostreambuf_iterator<char> iterator;
private:
Md5streambuf streambuf;
char hexdigest[33];
public:
/// initializes md5-calculation
Md5stream()
: std::ostream(0)
{
init(&streambuf);
}
/// ends md5-calculation and returns 16 bytes digest
void getDigest(unsigned char digest[16])
{ streambuf.getDigest(digest); }
/// ends md5-calculation and digest as 32 bytes hex
const char* getHexDigest();
/// returns output-iterator to Md5stream
iterator begin()
{ return iterator(&streambuf); }
};
template <typename iterator_type>
std::string md5(iterator_type from, iterator_type to)
{
Md5stream s;
std::copy(from, to, std::ostream_iterator<char>(s));
return s.getHexDigest();
}
template <typename data_type>
std::string md5(const data_type& data)
{
Md5stream s;
s << data;
return s.getHexDigest();
}
}
#endif // ZIM_MD5STREAM_H

@ -1,39 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "ptrstream.h"
namespace zim
{
std::streambuf::int_type ptrstreambuf::overflow(int_type c)
{
return traits_type::eof();
}
std::streambuf::int_type ptrstreambuf::underflow()
{
return traits_type::eof();
}
int ptrstreambuf::sync()
{
return 0;
}
}

@ -1,56 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#ifndef ZIM_PTRSTREAM_H
#define ZIM_PTRSTREAM_H
#include <iostream>
namespace zim
{
class ptrstreambuf : public std::streambuf
{
public:
ptrstreambuf(char* start, char* end)
{
setp(start, end);
setg(start, start, end);
}
/// see std::streambuf
int_type overflow(int_type c);
/// see std::streambuf
int_type underflow();
/// see std::streambuf
int sync();
};
class ptrstream : public std::iostream
{
ptrstreambuf streambuf;
public:
ptrstream(char* start, char* end)
: std::iostream(0),
streambuf(start, end)
{ init(&streambuf); }
};
}
#endif

@ -1,260 +0,0 @@
/*
* Copyright (C) 2007 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/search.h>
#include <zim/fileiterator.h>
#include <zim/indexarticle.h>
#include <sstream>
#include "log.h"
#include <map>
#include <math.h>
#include <cctype>
#include <stdexcept>
log_define("zim.search")
namespace zim
{
namespace
{
class PriorityGt : public std::binary_function<bool, SearchResult, SearchResult>
{
public:
bool operator() (const SearchResult& s1, const SearchResult& s2) const
{
return s1.getPriority() > s2.getPriority()
|| (s1.getPriority() == s2.getPriority()
&& s1.getArticle().getTitle() > s2.getArticle().getTitle());
}
};
}
double SearchResult::getPriority() const
{
if (!wordList.empty() && priority == 0.0)
{
log_debug("weightOcc=" << Search::getWeightOcc()
<< " weightPlus=" << Search::getWeightPlus()
<< " weightOccOff=" << Search::getWeightOccOff()
<< " weightDist=" << Search::getWeightDist()
<< " weightPos=" << Search::getWeightPos()
<< " weightDistinctWords=" << Search::getWeightDistinctWords());
priority = 1.0;
log_debug("getPriority, " << wordList.size() << " words; idx=" << article.getIndex());
// weight occurencies of words in article and title
for (WordListType::const_iterator itw = wordList.begin(); itw != wordList.end(); ++itw)
{
priority *= 1.0 + log(itw->second.count * Search::getWeightOcc()
+ Search::getWeightPlus() * itw->second.addweight)
+ Search::getWeightOccOff()
+ Search::getWeightPlus() * itw->second.addweight;
std::string title = article.getTitle();
for (std::string::iterator it = title.begin(); it != title.end(); ++it)
*it = std::tolower(*it);
//std::string::size_type p = title.find(itw->first);
//if (p != std::string::npos)
//priority *= Search::getWeightTitle() / (p + 1) / title.size();
}
log_debug("priority1: " << priority);
// weight distinct words
priority += Search::getWeightDistinctWords() * wordList.size();
log_debug("priority2: " << priority);
// weight distance between different words
PosListType::const_iterator itp = posList.begin();
std::string word = itp->second;
size_type pos = itp->first + word.size();
for (++itp; itp != posList.end(); ++itp)
{
if (word != itp->second)
{
size_type dist = itp->first > pos ? (itp->first - pos)
: itp->first < pos ? (pos - itp->first)
: 1;
priority += Search::getWeightDist() / dist;
}
word = itp->second;
pos = itp->first + word.size();
}
log_debug("priority3: " << priority);
// weight position of words in the document
if (Search::getWeightPos())
for (itp = posList.begin(); itp != posList.end(); ++itp)
priority += Search::getWeightPos() / pow(1.01, static_cast<double>(itp->first));
if (Search::getWeightPosRel())
for (itp = posList.begin(); itp != posList.end(); ++itp)
priority += Search::getWeightPosRel() * itp->first / article.getData().size();
log_debug("priority of article " << article.getIndex() << " \"" << article.getTitle() << "\", " << wordList.size() << " words: " << priority);
}
return priority;
}
void SearchResult::foundWord(const std::string& word, size_type pos, unsigned addweight)
{
++wordList[word].count;
wordList[word].addweight += addweight;
posList[pos] = word;
}
double Search::weightOcc = 10.0;
double Search::weightOccOff = 1.0;
double Search::weightPlus = 10.0;
double Search::weightDist = 10;
double Search::weightPos = 10;
double Search::weightPosRel = 0;
double Search::weightDistinctWords = 50;
unsigned Search::searchLimit = 10000;
void Search::search(Results& results, const std::string& expr)
{
log_trace("search articles with expression \"" << expr << '"');
std::istringstream ssearch(expr);
std::string token;
// map from article-idx to article + relevance-informations
typedef std::map<size_type, SearchResult> IndexType;
IndexType index;
while (ssearch >> token)
{
unsigned addweight = 0;
while (token.size() > 0 && token.at(0) == '+')
{
++addweight;
token.erase(0, 1);
}
if (token.empty())
{
log_warn("empty token");
continue;
}
for (std::string::iterator it = token.begin(); it != token.end(); ++it)
*it = std::tolower(*it);
log_debug("search for token \"" << token << '"');
IndexArticle indexarticle = indexfile.getArticleByTitle('X', token);
if (indexarticle.getTotalCount() > 0)
{
for (unsigned cat = 0; cat < 4; ++cat)
{
const IndexArticle::EntriesType ent = indexarticle.getCategory(cat);
for (IndexArticle::EntriesType::const_iterator it = ent.begin(); it != ent.end(); ++it)
{
size_type articleIdx = it->index;
size_type position = it->pos;
IndexType::iterator itIt = index.insert(
IndexType::value_type(articleIdx,
SearchResult(articlefile.getArticle(articleIdx)))).first;
itIt->second.foundWord(token, position, addweight + 3 - cat);
}
}
}
else
{
log_debug("no entries found - try searching for titles");
Results results;
find(results, 'A', token);
for (Results::const_iterator it = results.begin(); it != results.end(); ++it)
{
size_type articleIdx = it->getArticle().getIndex();
IndexType::iterator itIt = index.insert(
IndexType::value_type(articleIdx,
SearchResult(it->getArticle()))).first;
itIt->second.foundWord(token, 0, addweight + 3 - it->getArticle().getTitle().size());
}
}
}
log_debug("copy/filter " << index.size() << " articles");
results.setExpression(expr);
for (IndexType::const_iterator it = index.begin(); it != index.end(); ++it)
{
if (it->second.getCountPositions() > 1)
results.push_back(it->second);
//else
//log_debug("discard article " << it->first);
}
if (results.empty())
{
for (IndexType::const_iterator it = index.begin(); it != index.end(); ++it)
results.push_back(it->second);
}
log_debug("sort " << results.size() << " articles");
std::sort(results.begin(), results.end(), PriorityGt());
}
void Search::find(Results& results, char ns, const std::string& praefix, unsigned limit)
{
log_debug("find results in namespace " << ns << " for praefix \"" << praefix << '"');
for (File::const_iterator pos = articlefile.findByTitle(ns, praefix);
pos != articlefile.end() && results.size() < limit; ++pos)
{
if (ns != pos->getNamespace() || pos->getTitle().compare(0, praefix.size(), praefix) > 0)
{
log_debug("article " << pos->getNamespace() << ", \"" << pos->getTitle() << "\" does not match " << ns << ", \"" << praefix << '"');
break;
}
results.push_back(SearchResult(*pos));
}
log_debug(results.size() << " articles in result");
}
void Search::find(Results& results, char ns, const std::string& begin,
const std::string& end, unsigned limit)
{
log_debug("find results in namespace " << ns << " for praefix \"" << begin << '"');
for (File::const_iterator pos = articlefile.findByTitle(ns, begin);
pos != articlefile.end() && results.size() < limit; ++pos)
{
log_debug("check " << pos->getNamespace() << '/' << pos->getTitle());
if (pos->getNamespace() != ns || pos->getTitle().compare(end) > 0)
{
log_debug("article " << pos->getNamespace() << ", \"" << pos->getTitle() << "\" does not match");
break;
}
results.push_back(SearchResult(*pos));
}
log_debug(results.size() << " articles in result");
}
}

@ -1,142 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/template.h>
namespace zim
{
void TemplateParser::state_data(char ch)
{
data += ch;
if (ch == '<')
{
state = &TemplateParser::state_lt;
save = data.size() - 1;
}
}
void TemplateParser::state_lt(char ch)
{
data += ch;
if (ch == '%')
state = &TemplateParser::state_token0;
else
state = &TemplateParser::state_data;
}
void TemplateParser::state_token0(char ch)
{
data += ch;
if (ch == '/')
state = &TemplateParser::state_link0;
else
{
token = data.size() - 1;
state = &TemplateParser::state_token;
}
}
void TemplateParser::state_token(char ch)
{
data += ch;
if (ch == '%')
state = &TemplateParser::state_token_end;
}
void TemplateParser::state_token_end(char ch)
{
if (ch == '>')
{
if (event)
{
event->onData(data.substr(0, save));
event->onToken(data.substr(token, data.size() - token - 1));
data.clear();
}
state = &TemplateParser::state_data;
}
else
{
data += ch;
state = &TemplateParser::state_data;
}
}
void TemplateParser::state_link0(char ch)
{
data += ch;
ns = ch;
state = &TemplateParser::state_link;
}
void TemplateParser::state_link(char ch)
{
data += ch;
if (ch == '/')
{
token = data.size();
state = &TemplateParser::state_title;
}
else
state = &TemplateParser::state_data;
}
void TemplateParser::state_title(char ch)
{
data += ch;
if (ch == '%')
{
token_e = data.size() - 1;
state = &TemplateParser::state_title_end;
}
}
void TemplateParser::state_title_end(char ch)
{
data += ch;
if (ch == '>')
{
if (event)
{
event->onData(data.substr(0, save));
event->onLink(ns, data.substr(token, token_e - token));
}
data.clear();
state = &TemplateParser::state_data;
}
}
void TemplateParser::flush()
{
if (event)
event->onData(data);
data.clear();
state = &TemplateParser::state_data;
}
}

File diff suppressed because it is too large Load Diff

@ -1,164 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/unlzmastream.h>
#include <zim/zim.h>
#include "log.h"
#include "config.h"
#include <sstream>
#include <cstring>
#include "envvalue.h"
log_define("zim.lzma.uncompress")
namespace zim
{
namespace
{
lzma_ret checkError(lzma_ret ret)
{
if (ret != LZMA_OK && ret != LZMA_STREAM_END)
{
std::ostringstream msg;
msg << "inflate-error " << ret;
switch (ret)
{
case LZMA_OK: msg << ": LZMA_OK"; break;
case LZMA_STREAM_END: msg << ": LZMA_STREAM_END"; break;
case LZMA_NO_CHECK: msg << ": LZMA_NO_CHECK"; break;
case LZMA_UNSUPPORTED_CHECK: msg << ": LZMA_UNSUPPORTED_CHECK"; break;
case LZMA_GET_CHECK: msg << ": LZMA_GET_CHECK"; break;
case LZMA_MEM_ERROR: msg << ": LZMA_MEM_ERROR"; break;
case LZMA_MEMLIMIT_ERROR: msg << ": LZMA_MEMLIMIT_ERROR"; break;
case LZMA_FORMAT_ERROR: msg << ": LZMA_FORMAT_ERROR"; break;
case LZMA_OPTIONS_ERROR: msg << ": LZMA_OPTIONS_ERROR"; break;
case LZMA_DATA_ERROR: msg << ": LZMA_DATA_ERROR"; break;
case LZMA_BUF_ERROR: msg << ": LZMA_BUF_ERROR"; break;
case LZMA_PROG_ERROR: msg << ": LZMA_PROG_ERROR"; break;
}
log_error(msg);
throw UnlzmaError(ret, msg.str());
}
return ret;
}
}
UnlzmaStreamBuf::UnlzmaStreamBuf(std::streambuf* sinksource_, unsigned bufsize_)
: iobuffer(new char_type[bufsize_]),
bufsize(bufsize_),
sinksource(sinksource_)
{
std::memset(reinterpret_cast<void*>(&stream), 0, sizeof(stream));
unsigned memsize = envMemSize("ZIM_LZMA_MEMORY_SIZE", LZMA_MEMORY_SIZE * 1024 * 1024);
checkError(
::lzma_stream_decoder(&stream, memsize, 0));
}
UnlzmaStreamBuf::~UnlzmaStreamBuf()
{
::lzma_end(&stream);
delete[] iobuffer;
}
UnlzmaStreamBuf::int_type UnlzmaStreamBuf::overflow(int_type c)
{
if (pptr())
{
// initialize input-stream for
stream.next_in = reinterpret_cast<const uint8_t*>(obuffer());
stream.avail_in = pptr() - pbase();
lzma_ret ret;
do
{
// initialize ibuffer
stream.next_out = reinterpret_cast<uint8_t*>(ibuffer());
stream.avail_out = ibuffer_size();
ret = ::lzma_code(&stream, LZMA_RUN);
checkError(ret);
// copy zbuffer to sinksource
std::streamsize count = ibuffer_size() - stream.avail_out;
std::streamsize n = sinksource->sputn(reinterpret_cast<char*>(ibuffer()), count);
if (n < count)
return traits_type::eof();
} while (ret != LZMA_STREAM_END && stream.avail_in > 0);
}
// reset outbuffer
setp(obuffer(), obuffer() + obuffer_size());
if (c != traits_type::eof())
sputc(traits_type::to_char_type(c));
return 0;
}
UnlzmaStreamBuf::int_type UnlzmaStreamBuf::underflow()
{
// read from sinksource and decompress into obuffer
stream.next_out = reinterpret_cast<uint8_t*>(obuffer());
stream.avail_out = obuffer_size();
do
{
// fill ibuffer first if needed
if (stream.avail_in == 0)
{
if (sinksource->in_avail() > 0)
{
// there is data already available
// read compressed data from source into ibuffer
stream.avail_in = sinksource->sgetn(ibuffer(), std::min(sinksource->in_avail(), ibuffer_size()));
}
else
{
// no data available
stream.avail_in = sinksource->sgetn(ibuffer(), ibuffer_size());
if (stream.avail_in == 0)
return traits_type::eof();
}
stream.next_in = (const uint8_t*)ibuffer();
}
// we decompress it now into obuffer
// at least one character received from source - pass to decompressor
checkError(::lzma_code(&stream, LZMA_RUN));
setg(obuffer(), obuffer(), obuffer() + obuffer_size() - stream.avail_out);
} while (gptr() == egptr());
return sgetc();
}
int UnlzmaStreamBuf::sync()
{
if (pptr() && overflow(traits_type::eof()) == traits_type::eof())
return -1;
return 0;
}
}

@ -1,122 +0,0 @@
/*
* Copyright (C) 2009 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/uuid.h>
#include <iostream>
#include <time.h>
#include <zim/zim.h> // necessary to have the new types
#include "log.h"
#ifdef WITH_CXXTOOLS
#include <cxxtools/md5stream.h>
#endif
#ifdef _WIN32
# include <time.h>
#define NOMINMAX
# include <windows.h>
#undef NOMINMAX
int gettimeofday(struct timeval* tp, void* tzp) {
DWORD t;
t = timeGetTime();
tp->tv_sec = t / 1000;
tp->tv_usec = t % 1000;
return 0;
}
#define getpid GetCurrentProcessId
#else
# include <sys/time.h>
# include <unistd.h>
#endif
log_define("zim.uuid")
namespace zim
{
namespace
{
char hex[] = "0123456789abcdef";
inline char hi(char v)
{ return hex[(v >> 4) & 0xf]; }
inline char lo(char v)
{ return hex[v & 0xf]; }
}
Uuid Uuid::generate()
{
Uuid ret;
struct timeval tv;
gettimeofday(&tv, 0);
#ifdef WITH_CXXTOOLS
cxxtools::Md5stream m;
clock_t c = clock();
m << c << tv.tv_sec << tv.tv_usec;
m.getDigest(reinterpret_cast<unsigned char*>(&ret.data[0]));
#else
union {
void* p;
int32_t n;
} u;
u.p = &ret;
*reinterpret_cast<int32_t*>(ret.data) = u.n;
*reinterpret_cast<int32_t*>(ret.data + 4) = static_cast<int32_t>(tv.tv_sec);
*reinterpret_cast<int32_t*>(ret.data + 8) = static_cast<int32_t>(tv.tv_usec);
*reinterpret_cast<int32_t*>(ret.data + 12) = static_cast<int32_t>(getpid());
#endif
log_debug("generated uuid: " << ret.data);
return ret;
}
std::ostream& operator<< (std::ostream& out, const Uuid& uuid)
{
for (unsigned n = 0; n < 4; ++n)
out << hi(uuid.data[n]) << lo(uuid.data[n]);
out << '-';
for (unsigned n = 4; n < 6; ++n)
out << hi(uuid.data[n]) << lo(uuid.data[n]);
out << '-';
for (unsigned n = 6; n < 8; ++n)
out << hi(uuid.data[n]) << lo(uuid.data[n]);
out << '-';
for (unsigned n = 6; n < 8; ++n)
out << hi(uuid.data[n]) << lo(uuid.data[n]);
out << '-';
for (unsigned n = 8; n < 16; ++n)
out << hi(uuid.data[n]) << lo(uuid.data[n]);
return out;
}
}

@ -1,103 +0,0 @@
/*
* Copyright (C) 2007 Tommi Maekitalo
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <zim/zintstream.h>
#include "log.h"
log_define("zim.zintstream")
namespace zim
{
size_type ZIntStream::get()
{
char ch;
if (!_istream->get(ch))
return *this;
if (ch == '\xff')
{
log_error("invalid bytestream in int decompressor");
_istream->setstate(std::ios::failbit);
}
size_type uuvalue = static_cast<size_type>(static_cast<unsigned char>(ch));
uint64_t ubound = 0x80;
size_type add = 0;
unsigned short s = 7;
unsigned short N = 0;
size_type mask = 0x7F;
while (ch & 0x80)
{
++N;
ch <<= 1;
--s;
add += ubound;
ubound <<= 7;
mask >>= 1;
}
uuvalue &= mask;
while (N-- && _istream->get(ch))
{
uuvalue |= static_cast<size_type>(static_cast<unsigned char>(ch)) << s;
s += 8;
}
if (_istream)
{
uuvalue += add;
}
else
{
log_error("incomplete bytestream in int decompressor");
_istream->setstate(std::ios::failbit);
}
return uuvalue;
}
ZIntStream& ZIntStream::put(size_type value)
{
size_type nmask = 0;
size_type mask = 0x7F;
uint64_t ubound = 0x80;
unsigned short N = 0;
while (value >= ubound)
{
value -= ubound;
ubound <<= 7;
nmask = (nmask >> 1) | 0x80;
mask = mask >> 1;
++N;
}
_ostream->put(static_cast<char>(nmask | (value & mask)));
value >>= 7 - N;
while (N--)
{
_ostream->put(static_cast<char>(value & 0xFF));
value >>= 8;
}
return *this;
}
}
Loading…
Cancel
Save