mirror of https://github.com/kiwix/kiwix-tools.git
parent
e027ec4e8d
commit
0b0d2fc4d0
@ -1,97 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2006 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_ARTICLE_H
|
||||
#define ZIM_ARTICLE_H
|
||||
|
||||
#include <string>
|
||||
#include <zim/zim.h>
|
||||
#include <zim/dirent.h>
|
||||
#include <zim/file.h>
|
||||
#include <limits>
|
||||
#include <iosfwd>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class Article
|
||||
{
|
||||
private:
|
||||
File file;
|
||||
size_type idx;
|
||||
|
||||
public:
|
||||
Article()
|
||||
: idx(std::numeric_limits<size_type>::max())
|
||||
{ }
|
||||
|
||||
Article(const File& file_, size_type idx_)
|
||||
: file(file_),
|
||||
idx(idx_)
|
||||
{ }
|
||||
|
||||
Dirent getDirent() const { return const_cast<File&>(file).getDirent(idx); }
|
||||
|
||||
std::string getParameter() const { return getDirent().getParameter(); }
|
||||
|
||||
std::string getTitle() const { return getDirent().getTitle(); }
|
||||
std::string getUrl() const { return getDirent().getUrl(); }
|
||||
std::string getLongUrl() const { return getDirent().getLongUrl(); }
|
||||
|
||||
uint16_t getLibraryMimeType() const { return getDirent().getMimeType(); }
|
||||
const std::string&
|
||||
getMimeType() const { return file.getMimeType(getLibraryMimeType()); }
|
||||
|
||||
bool isRedirect() const { return getDirent().isRedirect(); }
|
||||
|
||||
char getNamespace() const { return getDirent().getNamespace(); }
|
||||
|
||||
size_type getRedirectIndex() const { return getDirent().getRedirectIndex(); }
|
||||
Article getRedirectArticle() const { return Article(file, getRedirectIndex()); }
|
||||
|
||||
size_type getArticleSize() const;
|
||||
|
||||
bool operator< (const Article& a) const
|
||||
{ return getNamespace() < a.getNamespace()
|
||||
|| (getNamespace() == a.getNamespace()
|
||||
&& getTitle() < a.getTitle()); }
|
||||
|
||||
Cluster getCluster() const
|
||||
{ return file.getCluster(getDirent().getClusterNumber()); }
|
||||
|
||||
Blob getData() const
|
||||
{
|
||||
Dirent dirent = getDirent();
|
||||
return dirent.isRedirect() ? Blob()
|
||||
: const_cast<File&>(file).getBlob(dirent.getClusterNumber(), dirent.getBlobNumber());
|
||||
}
|
||||
|
||||
std::string getPage(bool layout = true, unsigned maxRecurse = 10);
|
||||
void getPage(std::ostream&, bool layout = true, unsigned maxRecurse = 10);
|
||||
|
||||
const File& getFile() const { return file; }
|
||||
File& getFile() { return file; }
|
||||
size_type getIndex() const { return idx; }
|
||||
|
||||
bool good() const { return idx != std::numeric_limits<size_type>::max(); }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_ARTICLE_H
|
||||
|
||||
@ -1,48 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2007 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_ARTICLESEARCH_H
|
||||
#define ZIM_ARTICLESEARCH_H
|
||||
|
||||
#include <vector>
|
||||
#include <zim/file.h>
|
||||
#include <zim/fileiterator.h>
|
||||
#include <zim/article.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class ArticleSearch
|
||||
{
|
||||
public:
|
||||
typedef std::vector<Article> Results;
|
||||
|
||||
private:
|
||||
File articleFile;
|
||||
std::string titles;
|
||||
|
||||
public:
|
||||
explicit ArticleSearch(const File& articleFile_)
|
||||
: articleFile(articleFile_)
|
||||
{ }
|
||||
|
||||
Results search(const std::string& expr);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ZIM_ARTICLESEARCH_H
|
||||
@ -1,64 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_BLOB_H
|
||||
#define ZIM_BLOB_H
|
||||
|
||||
#include <iostream>
|
||||
#include <zim/cluster.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class Blob
|
||||
{
|
||||
const char* _data;
|
||||
unsigned _size;
|
||||
SmartPtr<ClusterImpl> _cluster;
|
||||
|
||||
public:
|
||||
Blob()
|
||||
: _data(0), _size(0)
|
||||
{ }
|
||||
|
||||
Blob(const char* data, unsigned size)
|
||||
: _data(data),
|
||||
_size(size)
|
||||
{ }
|
||||
|
||||
Blob(ClusterImpl* cluster, const char* data, unsigned size)
|
||||
: _data(data),
|
||||
_size(size),
|
||||
_cluster(cluster)
|
||||
{ }
|
||||
|
||||
const char* data() const { return _data; }
|
||||
const char* end() const { return _data + _size; }
|
||||
unsigned size() const { return _size; }
|
||||
};
|
||||
|
||||
inline std::ostream& operator<< (std::ostream& out, const Blob& blob)
|
||||
{
|
||||
if (blob.data())
|
||||
out.write(blob.data(), blob.size());
|
||||
return out;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_BLOB_H
|
||||
@ -1,353 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2008 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_CACHE_H
|
||||
#define ZIM_CACHE_H
|
||||
|
||||
#include <map>
|
||||
#include <limits>
|
||||
#include <iostream>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define NOMINMAX
|
||||
# include <windows.h>
|
||||
#undef NOMINMAX
|
||||
#undef max
|
||||
#endif
|
||||
|
||||
namespace zim
|
||||
{
|
||||
/**
|
||||
Implements a container for caching elements.
|
||||
|
||||
The cache holds a list of key-value-pairs. There are 2 main operations for
|
||||
accessing the cache: put and get. Put takes a key and a value and puts the
|
||||
element into the list. Get takes a key and optional a value. If the value
|
||||
for the key is found, it is returned. The passed value otherwise. By
|
||||
default the value is constructed with the empty ctor of the value-type.
|
||||
|
||||
The cache has a maximum size, after which key-value-pairs are dropped,
|
||||
when a new item is put into the cache.
|
||||
|
||||
The algorithm for this cache is as follows:
|
||||
- when the cache is not full, new elements are appended
|
||||
- new elements are put into the middle of the list otherwise
|
||||
- the last element of the list is then dropped
|
||||
- when getting a value and the value is found, it is put to the
|
||||
beginning of the list
|
||||
|
||||
When elements are searched, a linear search is done using the ==-operator
|
||||
of the key type.
|
||||
|
||||
The caching algorithm keeps elements, which are fetched more than once in
|
||||
the first half of the list. In the second half the elements are either new
|
||||
or the elements are pushed from the first half to the second half by other
|
||||
elements, which are found in the cache.
|
||||
|
||||
You should be aware, that the key type should be simple. Comparing keys
|
||||
must be cheap. Copying elements (both key and value) must be possible and
|
||||
should be cheap, since they are moved in the underlying container.
|
||||
|
||||
*/
|
||||
template <typename Key, typename Value>
|
||||
class Cache
|
||||
{
|
||||
struct Data
|
||||
{
|
||||
bool winner;
|
||||
unsigned serial;
|
||||
Value value;
|
||||
Data() { }
|
||||
Data(bool winner_, unsigned serial_, const Value& value_)
|
||||
: winner(winner_),
|
||||
serial(serial_),
|
||||
value(value_)
|
||||
{ }
|
||||
};
|
||||
|
||||
typedef std::map<Key, Data> DataType;
|
||||
DataType data;
|
||||
|
||||
typename DataType::size_type maxElements;
|
||||
unsigned serial;
|
||||
unsigned hits;
|
||||
unsigned misses;
|
||||
|
||||
unsigned _nextSerial()
|
||||
{
|
||||
if (serial == std::numeric_limits<unsigned>::max())
|
||||
{
|
||||
for (typename DataType::iterator it = data.begin(); it != data.end(); ++it)
|
||||
it->second.serial = 0;
|
||||
serial = 1;
|
||||
}
|
||||
|
||||
return serial++;
|
||||
}
|
||||
|
||||
typename DataType::iterator _getOldest(bool winner)
|
||||
{
|
||||
typename DataType::iterator foundElement = data.begin();
|
||||
|
||||
typename DataType::iterator it = data.begin();
|
||||
|
||||
for (++it; it != data.end(); ++it)
|
||||
if (it->second.winner == winner
|
||||
&& (foundElement->second.winner != winner || it->second.serial < foundElement->second.serial))
|
||||
foundElement = it;
|
||||
|
||||
return foundElement;
|
||||
}
|
||||
|
||||
typename DataType::iterator _getNewest(bool winner)
|
||||
{
|
||||
typename DataType::iterator foundElement = data.begin();
|
||||
|
||||
typename DataType::iterator it = data.begin();
|
||||
|
||||
for (++it; it != data.end(); ++it)
|
||||
if (it->second.winner == winner
|
||||
&& (foundElement->second.winner != winner || it->second.serial > foundElement->second.serial))
|
||||
foundElement = it;
|
||||
|
||||
return foundElement;
|
||||
}
|
||||
|
||||
// drop one element
|
||||
void _dropLooser()
|
||||
{
|
||||
// look for the oldest element in the list of loosers to drop it
|
||||
data.erase(_getOldest(false));
|
||||
}
|
||||
|
||||
void _makeLooser()
|
||||
{
|
||||
// look for the oldest element in the list of winners to make it a looser
|
||||
typename DataType::iterator it = _getOldest(true);
|
||||
it->second.winner = false;
|
||||
it->second.serial = _nextSerial();
|
||||
}
|
||||
|
||||
public:
|
||||
typedef typename DataType::size_type size_type;
|
||||
typedef Value value_type;
|
||||
|
||||
explicit Cache(size_type maxElements_)
|
||||
: maxElements(maxElements_ + (maxElements_ & 1)),
|
||||
serial(0),
|
||||
hits(0),
|
||||
misses(0)
|
||||
{ }
|
||||
|
||||
/// returns the number of elements currently in the cache
|
||||
size_type size() const { return data.size(); }
|
||||
|
||||
/// returns the maximum number of elements in the cache
|
||||
size_type getMaxElements() const { return maxElements; }
|
||||
|
||||
void setMaxElements(size_type maxElements_)
|
||||
{
|
||||
size_type numWinners = size() < maxElements / 2 ? size() : maxElements / 2;
|
||||
|
||||
maxElements_ += (maxElements_ & 1);
|
||||
|
||||
if (maxElements_ > maxElements)
|
||||
{
|
||||
maxElements = maxElements_;
|
||||
|
||||
while (numWinners < maxElements / 2)
|
||||
{
|
||||
_getNewest(false)->winner = true;
|
||||
++numWinners;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (maxElements > maxElements_)
|
||||
{
|
||||
_dropLooser();
|
||||
_dropLooser();
|
||||
_makeLooser();
|
||||
maxElements -= 2;
|
||||
}
|
||||
|
||||
while (numWinners > maxElements / 2)
|
||||
{
|
||||
_getNewest(true)->winner = false;
|
||||
--numWinners;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// removes a element from the cache and returns true, if found
|
||||
bool erase(const Key& key)
|
||||
{
|
||||
typename DataType::iterator it = data.find(key);
|
||||
if (it == data.end())
|
||||
return false;
|
||||
|
||||
if (it->second.winner)
|
||||
_getNewest(false)->winner=true;
|
||||
|
||||
data.erase(it);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// clears the cache.
|
||||
void clear(bool stats = false)
|
||||
{
|
||||
data.clear();
|
||||
if (stats)
|
||||
hits = misses = 0;
|
||||
}
|
||||
|
||||
/// puts a new element in the cache. If the element is already found in
|
||||
/// the cache, it is considered a cache hit and pushed to the top of the
|
||||
/// list.
|
||||
void put(const Key& key, const Value& value)
|
||||
{
|
||||
typename DataType::iterator it;
|
||||
if (data.size() < maxElements)
|
||||
{
|
||||
data.insert(data.begin(),
|
||||
typename DataType::value_type(key,
|
||||
Data(data.size() < maxElements / 2, _nextSerial(), value)));
|
||||
}
|
||||
else if ((it = data.find(key)) == data.end())
|
||||
{
|
||||
// element not found
|
||||
_dropLooser();
|
||||
data.insert(data.begin(),
|
||||
typename DataType::value_type(key,
|
||||
Data(false, _nextSerial(), value)));
|
||||
}
|
||||
else
|
||||
{
|
||||
// element found
|
||||
it->second.serial = _nextSerial();
|
||||
if (!it->second.winner)
|
||||
{
|
||||
// move element to the winner part
|
||||
it->second.winner = true;
|
||||
_makeLooser();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// puts a new element on the top of the cache. If the element is already
|
||||
/// found in the cache, it is considered a cache hit and pushed to the
|
||||
/// top of the list. This method actually overrides the need, that a element
|
||||
/// needs a hit to get to the top of the cache.
|
||||
void put_top(const Key& key, const Value& value)
|
||||
{
|
||||
typename DataType::iterator it;
|
||||
if (data.size() < maxElements)
|
||||
{
|
||||
if (data.size() >= maxElements / 2)
|
||||
_makeLooser();
|
||||
|
||||
data.insert(data.begin(),
|
||||
typename DataType::value_type(key,
|
||||
Data(true, _nextSerial(), value)));
|
||||
}
|
||||
else if ((it = data.find(key)) == data.end())
|
||||
{
|
||||
// element not found
|
||||
_dropLooser();
|
||||
_makeLooser();
|
||||
data.insert(data.begin(),
|
||||
typename DataType::value_type(key,
|
||||
Data(true, _nextSerial(), value)));
|
||||
}
|
||||
else
|
||||
{
|
||||
// element found
|
||||
it->second.serial = _nextSerial();
|
||||
if (!it->second.winner)
|
||||
{
|
||||
// move element to the winner part
|
||||
it->second.winner = true;
|
||||
_makeLooser();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Value* getptr(const Key& key)
|
||||
{
|
||||
typename DataType::iterator it = data.find(key);
|
||||
if (it == data.end())
|
||||
return 0;
|
||||
|
||||
it->second.serial = _nextSerial();
|
||||
|
||||
if (!it->second.winner)
|
||||
{
|
||||
// move element to the winner part
|
||||
it->second.winner = true;
|
||||
_makeLooser();
|
||||
}
|
||||
|
||||
return &it->second.value;
|
||||
}
|
||||
|
||||
/// returns a pair of values - a flag, if the value was found and the
|
||||
/// value if found or the passed default otherwise. If the value is
|
||||
/// found it is a cahce hit and pushed to the top of the list.
|
||||
std::pair<bool, Value> getx(const Key& key, Value def = Value())
|
||||
{
|
||||
Value* v = getptr(key);
|
||||
return v ? std::pair<bool, Value>(true, *v)
|
||||
: std::pair<bool, Value>(false, def);
|
||||
}
|
||||
|
||||
/// returns the value to a key or the passed default value if not found.
|
||||
/// If the value is found it is a cahce hit and pushed to the top of the
|
||||
/// list.
|
||||
Value get(const Key& key, Value def = Value())
|
||||
{
|
||||
return getx(key, def).second;
|
||||
}
|
||||
|
||||
/// returns the number of hits.
|
||||
unsigned getHits() const { return hits; }
|
||||
/// returns the number of misses.
|
||||
unsigned getMisses() const { return misses; }
|
||||
/// returns the cache hit ratio between 0 and 1.
|
||||
double hitRatio() const { return hits+misses > 0 ? static_cast<double>(hits)/static_cast<double>(hits+misses) : 0; }
|
||||
/// returns the ratio, between held elements and maximum elements.
|
||||
double fillfactor() const { return static_cast<double>(data.size()) / static_cast<double>(maxElements); }
|
||||
|
||||
/*
|
||||
void dump(std::ostream& out) const
|
||||
{
|
||||
out << "cache max size=" << maxElements << " current size=" << size() << '\n';
|
||||
for (typename DataType::const_iterator it = data.begin(); it != data.end(); ++it)
|
||||
{
|
||||
out << "\tkey=\"" << it->first << "\" value=\"" << it->second.value << "\" serial=" << it->second.serial << " winner=" << it->second.winner << '\n';
|
||||
}
|
||||
out << "--------\n";
|
||||
}
|
||||
*/
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_CACHE_H
|
||||
@ -1,107 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_CLUSTER_H
|
||||
#define ZIM_CLUSTER_H
|
||||
|
||||
#include <zim/zim.h>
|
||||
#include <zim/refcounted.h>
|
||||
#include <zim/smartptr.h>
|
||||
#include <iosfwd>
|
||||
#include <vector>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class Blob;
|
||||
class Cluster;
|
||||
|
||||
class ClusterImpl : public RefCounted
|
||||
{
|
||||
friend std::istream& operator>> (std::istream& in, ClusterImpl& blobImpl);
|
||||
friend std::ostream& operator<< (std::ostream& out, const ClusterImpl& blobImpl);
|
||||
|
||||
typedef std::vector<size_type> Offsets;
|
||||
typedef std::vector<char> Data;
|
||||
|
||||
CompressionType compression;
|
||||
Offsets offsets;
|
||||
Data data;
|
||||
|
||||
void read(std::istream& in);
|
||||
void write(std::ostream& out) const;
|
||||
|
||||
public:
|
||||
ClusterImpl();
|
||||
|
||||
void setCompression(CompressionType c) { compression = c; }
|
||||
CompressionType getCompression() const { return compression; }
|
||||
bool isCompressed() const { return compression == zimcompZip || compression == zimcompBzip2 || compression == zimcompLzma; }
|
||||
|
||||
size_type getCount() const { return offsets.size() - 1; }
|
||||
const char* getData(unsigned n) const { return &data[ offsets[n] ]; }
|
||||
size_type getSize(unsigned n) const { return offsets[n+1] - offsets[n]; }
|
||||
size_type getSize() const { return offsets.size() * sizeof(size_type) + data.size(); }
|
||||
Blob getBlob(size_type n) const;
|
||||
void clear();
|
||||
|
||||
void addBlob(const Blob& blob);
|
||||
void addBlob(const char* data, unsigned size);
|
||||
};
|
||||
|
||||
class Cluster
|
||||
{
|
||||
friend std::istream& operator>> (std::istream& in, Cluster& blob);
|
||||
friend std::ostream& operator<< (std::ostream& out, const Cluster& blob);
|
||||
|
||||
SmartPtr<ClusterImpl> impl;
|
||||
|
||||
ClusterImpl* getImpl();
|
||||
|
||||
public:
|
||||
Cluster();
|
||||
|
||||
void setCompression(CompressionType c) { getImpl()->setCompression(c); }
|
||||
CompressionType getCompression() const { return impl ? impl->getCompression() : zimcompNone; }
|
||||
bool isCompressed() const
|
||||
{ return impl && (impl->getCompression() == zimcompZip
|
||||
|| impl->getCompression() == zimcompBzip2
|
||||
|| impl->getCompression() == zimcompLzma); }
|
||||
|
||||
const char* getBlobPtr(size_type n) const { return impl->getData(n); }
|
||||
size_type getBlobSize(size_type n) const { return impl->getSize(n); }
|
||||
Blob getBlob(size_type n) const;
|
||||
|
||||
size_type count() const { return impl ? impl->getCount() : 0; }
|
||||
size_type size() const { return impl ? impl->getSize() : 0; }
|
||||
void clear() { impl = 0; }
|
||||
|
||||
void addBlob(const char* data, unsigned size) { getImpl()->addBlob(data, size); }
|
||||
void addBlob(const Blob& blob) { getImpl()->addBlob(blob); }
|
||||
|
||||
operator bool() const { return impl; }
|
||||
};
|
||||
|
||||
std::istream& operator>> (std::istream& in, ClusterImpl& blobImpl);
|
||||
std::istream& operator>> (std::istream& in, Cluster& blob);
|
||||
std::ostream& operator<< (std::ostream& out, const ClusterImpl& blobImpl);
|
||||
std::ostream& operator<< (std::ostream& out, const Cluster& blob);
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_CLUSTER_H
|
||||
@ -1,125 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2006 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_DIRENT_H
|
||||
#define ZIM_DIRENT_H
|
||||
|
||||
#include <string>
|
||||
#include <zim/zim.h>
|
||||
#include <limits>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class Dirent
|
||||
{
|
||||
bool redirect;
|
||||
uint16_t mimeType;
|
||||
|
||||
size_type version;
|
||||
|
||||
size_type clusterNumber; // only used when redirect is false
|
||||
size_type blobNumber; // only used when redirect is false
|
||||
|
||||
size_type redirectIndex; // only used when redirect is true
|
||||
|
||||
char ns;
|
||||
std::string title;
|
||||
std::string url;
|
||||
std::string parameter;
|
||||
|
||||
public:
|
||||
Dirent()
|
||||
: redirect(false),
|
||||
mimeType(0),
|
||||
version(0),
|
||||
clusterNumber(0),
|
||||
blobNumber(0),
|
||||
redirectIndex(0),
|
||||
ns('\0')
|
||||
{}
|
||||
|
||||
bool isRedirect() const { return redirect; }
|
||||
uint16_t getMimeType() const { return mimeType; }
|
||||
|
||||
size_type getVersion() const { return version; }
|
||||
void setVersion(size_type v) { version = v; }
|
||||
|
||||
size_type getClusterNumber() const { return isRedirect() ? 0 : clusterNumber; }
|
||||
size_type getBlobNumber() const { return isRedirect() ? 0 : blobNumber; }
|
||||
void setCluster(size_type clusterNumber_, size_type blobNumber_)
|
||||
{ clusterNumber = clusterNumber_; blobNumber = blobNumber_; }
|
||||
|
||||
size_type getRedirectIndex() const { return isRedirect() ? redirectIndex : 0; }
|
||||
|
||||
char getNamespace() const { return ns; }
|
||||
const std::string& getTitle() const { return title.empty() ? url : title; }
|
||||
const std::string& getUrl() const { return url; }
|
||||
std::string getLongUrl() const;
|
||||
const std::string& getParameter() const { return parameter; }
|
||||
|
||||
unsigned getDirentSize() const
|
||||
{
|
||||
unsigned ret = (isRedirect() ? 12 : 16) + url.size() + parameter.size() + 2;
|
||||
if (title != url)
|
||||
ret += title.size();
|
||||
return ret;
|
||||
}
|
||||
|
||||
void setTitle(const std::string& title_)
|
||||
{
|
||||
title = title_;
|
||||
}
|
||||
|
||||
void setUrl(char ns_, const std::string& url_)
|
||||
{
|
||||
ns = ns_;
|
||||
url = url_;
|
||||
}
|
||||
|
||||
void setParameter(const std::string& parameter_)
|
||||
{
|
||||
parameter = parameter_;
|
||||
}
|
||||
|
||||
void setRedirect(size_type idx)
|
||||
{
|
||||
redirect = true;
|
||||
redirectIndex = idx;
|
||||
mimeType = std::numeric_limits<uint16_t>::max();
|
||||
clusterNumber = 0;
|
||||
blobNumber = 0;
|
||||
}
|
||||
|
||||
void setArticle(uint16_t mimeType_, size_type clusterNumber_, size_type blobNumber_)
|
||||
{
|
||||
redirect = false;
|
||||
mimeType = mimeType_;
|
||||
clusterNumber = clusterNumber_;
|
||||
blobNumber = blobNumber_;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
std::ostream& operator<< (std::ostream& out, const Dirent& fh);
|
||||
std::istream& operator>> (std::istream& in, Dirent& fh);
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_DIRENT_H
|
||||
|
||||
@ -1,123 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2006 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ENDIAN_H
|
||||
#define ENDIAN_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <zim/zim.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
|
||||
/// Returns true, if machine is big-endian (high byte first).
|
||||
/// e.g. PowerPC
|
||||
inline bool isBigEndian()
|
||||
{
|
||||
const int i = 1;
|
||||
return *reinterpret_cast<const int8_t*>(&i) == 0;
|
||||
}
|
||||
|
||||
/// Returns true, if machine is little-endian (low byte first).
|
||||
/// e.g. x86
|
||||
inline bool isLittleEndian()
|
||||
{
|
||||
const int i = 1;
|
||||
return *reinterpret_cast<const int8_t*>(&i) == 1;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
template <typename T>
|
||||
void toLittleEndian(const T& d, char* dst, bool bigEndian = isBigEndian())
|
||||
{
|
||||
if (bigEndian)
|
||||
{
|
||||
std::reverse_copy(
|
||||
reinterpret_cast<const char*>(&d),
|
||||
reinterpret_cast<const char*>(&d) + sizeof(T),
|
||||
dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::copy(
|
||||
reinterpret_cast<const char*>(&d),
|
||||
reinterpret_cast<const char*>(&d) + sizeof(T),
|
||||
dst);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T fromLittleEndian(const T* ptr, bool bigEndian = isBigEndian())
|
||||
{
|
||||
if (bigEndian)
|
||||
{
|
||||
T ret;
|
||||
std::reverse_copy(reinterpret_cast<const int8_t*>(ptr),
|
||||
reinterpret_cast<const int8_t*>(ptr) + sizeof(T),
|
||||
reinterpret_cast<int8_t*>(&ret));
|
||||
return ret;
|
||||
}
|
||||
else
|
||||
{
|
||||
return *ptr;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
template <typename T>
|
||||
void toBigEndian(const T& d, char* dst, bool bigEndian = isBigEndian())
|
||||
{
|
||||
if (bigEndian)
|
||||
{
|
||||
std::copy(
|
||||
reinterpret_cast<const char*>(&d),
|
||||
reinterpret_cast<const char*>(&d) + sizeof(T),
|
||||
dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::reverse_copy(
|
||||
reinterpret_cast<const char*>(&d),
|
||||
reinterpret_cast<const char*>(&d) + sizeof(T),
|
||||
dst);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T fromBigEndian(const T* ptr, bool bigEndian = isBigEndian())
|
||||
{
|
||||
if (bigEndian)
|
||||
{
|
||||
return *ptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
T ret;
|
||||
std::reverse_copy(reinterpret_cast<const int8_t*>(ptr),
|
||||
reinterpret_cast<const int8_t*>(ptr) + sizeof(T),
|
||||
reinterpret_cast<int8_t*>(&ret));
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // ENDIAN_H
|
||||
|
||||
@ -1,38 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2006 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_ERROR_H
|
||||
#define ZIM_ERROR_H
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class ZimFileFormatError : public std::runtime_error
|
||||
{
|
||||
public:
|
||||
explicit ZimFileFormatError(const std::string& msg)
|
||||
: std::runtime_error(msg)
|
||||
{ }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_ERROR_H
|
||||
|
||||
@ -1,103 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2006,2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_FILE_H
|
||||
#define ZIM_FILE_H
|
||||
|
||||
#include <string>
|
||||
#include <iterator>
|
||||
#include <zim/zim.h>
|
||||
#include <zim/fileimpl.h>
|
||||
#include <zim/blob.h>
|
||||
#include <zim/smartptr.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class Article;
|
||||
|
||||
class File
|
||||
{
|
||||
SmartPtr<FileImpl> impl;
|
||||
|
||||
public:
|
||||
File()
|
||||
{ }
|
||||
explicit File(const std::string& fname)
|
||||
: impl(new FileImpl(fname.c_str()))
|
||||
{ }
|
||||
|
||||
const std::string& getFilename() const { return impl->getFilename(); }
|
||||
const Fileheader& getFileheader() const { return impl->getFileheader(); }
|
||||
offset_type getFilesize() const { return impl->getFilesize(); }
|
||||
|
||||
Dirent getDirent(size_type idx) { return impl->getDirent(idx); }
|
||||
Dirent getDirentByTitle(size_type idx) { return impl->getDirentByTitle(idx); }
|
||||
size_type getCountArticles() const { return impl->getCountArticles(); }
|
||||
|
||||
Article getArticle(size_type idx) const;
|
||||
Article getArticle(char ns, const std::string& url);
|
||||
Article getArticleByUrl(const std::string& url);
|
||||
Article getArticleByTitle(size_type idx);
|
||||
Article getArticleByTitle(char ns, const std::string& title);
|
||||
|
||||
Cluster getCluster(size_type idx) const { return impl->getCluster(idx); }
|
||||
size_type getCountClusters() const { return impl->getCountClusters(); }
|
||||
offset_type getClusterOffset(size_type idx) const { return impl->getClusterOffset(idx); }
|
||||
|
||||
Blob getBlob(size_type clusterIdx, size_type blobIdx)
|
||||
{ return getCluster(clusterIdx).getBlob(blobIdx); }
|
||||
|
||||
size_type getNamespaceBeginOffset(char ch)
|
||||
{ return impl->getNamespaceBeginOffset(ch); }
|
||||
size_type getNamespaceEndOffset(char ch)
|
||||
{ return impl->getNamespaceEndOffset(ch); }
|
||||
size_type getNamespaceCount(char ns)
|
||||
{ return getNamespaceEndOffset(ns) - getNamespaceBeginOffset(ns); }
|
||||
|
||||
std::string getNamespaces()
|
||||
{ return impl->getNamespaces(); }
|
||||
bool hasNamespace(char ch);
|
||||
|
||||
class const_iterator;
|
||||
|
||||
const_iterator begin();
|
||||
const_iterator beginByTitle();
|
||||
const_iterator end();
|
||||
std::pair<bool, const_iterator> findx(char ns, const std::string& url);
|
||||
std::pair<bool, const_iterator> findx(const std::string& url);
|
||||
std::pair<bool, const_iterator> findxByTitle(char ns, const std::string& title);
|
||||
const_iterator findByTitle(char ns, const std::string& title);
|
||||
const_iterator find(char ns, const std::string& url);
|
||||
const_iterator find(const std::string& url);
|
||||
|
||||
bool good() const { return impl.getPointer() != 0; }
|
||||
time_t getMTime() const { return impl->getMTime(); }
|
||||
|
||||
const std::string& getMimeType(uint16_t idx) const { return impl->getMimeType(idx); }
|
||||
|
||||
std::string getChecksum() { return impl->getChecksum(); }
|
||||
bool verify() { return impl->verify(); }
|
||||
};
|
||||
|
||||
std::string urldecode(const std::string& url);
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_FILE_H
|
||||
|
||||
@ -1,108 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2008 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_FILEHEADER_H
|
||||
#define ZIM_FILEHEADER_H
|
||||
|
||||
#include <zim/zim.h>
|
||||
#include <zim/endian.h>
|
||||
#include <zim/uuid.h>
|
||||
#include <iosfwd>
|
||||
#include <limits>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define NOMINMAX
|
||||
# include <windows.h>
|
||||
#undef NOMINMAX
|
||||
#undef max
|
||||
#endif
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class Fileheader
|
||||
{
|
||||
public:
|
||||
static const size_type zimMagic;
|
||||
static const size_type zimVersion;
|
||||
static const size_type size;
|
||||
|
||||
private:
|
||||
Uuid uuid;
|
||||
size_type articleCount;
|
||||
offset_type titleIdxPos;
|
||||
offset_type urlPtrPos;
|
||||
offset_type mimeListPos;
|
||||
size_type blobCount;
|
||||
offset_type blobPtrPos;
|
||||
size_type mainPage;
|
||||
size_type layoutPage;
|
||||
offset_type checksumPos;
|
||||
|
||||
public:
|
||||
Fileheader()
|
||||
: articleCount(0),
|
||||
titleIdxPos(0),
|
||||
urlPtrPos(0),
|
||||
blobCount(0),
|
||||
blobPtrPos(0),
|
||||
mainPage(std::numeric_limits<size_type>::max()),
|
||||
layoutPage(std::numeric_limits<size_type>::max()),
|
||||
checksumPos(std::numeric_limits<offset_type>::max())
|
||||
{}
|
||||
|
||||
const Uuid& getUuid() const { return uuid; }
|
||||
void setUuid(const Uuid& uuid_) { uuid = uuid_; }
|
||||
|
||||
size_type getArticleCount() const { return articleCount; }
|
||||
void setArticleCount(size_type s) { articleCount = s; }
|
||||
|
||||
offset_type getTitleIdxPos() const { return titleIdxPos; }
|
||||
void setTitleIdxPos(offset_type p) { titleIdxPos = p; }
|
||||
|
||||
offset_type getUrlPtrPos() const { return urlPtrPos; }
|
||||
void setUrlPtrPos(offset_type p) { urlPtrPos = p; }
|
||||
|
||||
offset_type getMimeListPos() const { return mimeListPos; }
|
||||
void setMimeListPos(offset_type p) { mimeListPos = p; }
|
||||
|
||||
size_type getClusterCount() const { return blobCount; }
|
||||
void setClusterCount(size_type s) { blobCount = s; }
|
||||
|
||||
offset_type getClusterPtrPos() const { return blobPtrPos; }
|
||||
void setClusterPtrPos(offset_type p) { blobPtrPos = p; }
|
||||
|
||||
bool hasMainPage() const { return mainPage != std::numeric_limits<size_type>::max(); }
|
||||
size_type getMainPage() const { return mainPage; }
|
||||
void setMainPage(size_type s) { mainPage = s; }
|
||||
|
||||
bool hasLayoutPage() const { return layoutPage != std::numeric_limits<size_type>::max(); }
|
||||
size_type getLayoutPage() const { return layoutPage; }
|
||||
void setLayoutPage(size_type s) { layoutPage = s; }
|
||||
|
||||
bool hasChecksum() const { return getMimeListPos() >= 80; }
|
||||
offset_type getChecksumPos() const { return hasChecksum() ? checksumPos : 0; }
|
||||
void setChecksumPos(offset_type p) { checksumPos = p; }
|
||||
};
|
||||
|
||||
std::ostream& operator<< (std::ostream& out, const Fileheader& fh);
|
||||
std::istream& operator>> (std::istream& in, Fileheader& fh);
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_FILEHEADER_H
|
||||
@ -1,90 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2006 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_FILEIMPL_H
|
||||
#define ZIM_FILEIMPL_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <zim/fstream.h>
|
||||
#include <zim/refcounted.h>
|
||||
#include <zim/zim.h>
|
||||
#include <zim/fileheader.h>
|
||||
#include <zim/cache.h>
|
||||
#include <zim/dirent.h>
|
||||
#include <zim/cluster.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class FileImpl : public RefCounted
|
||||
{
|
||||
ifstream zimFile;
|
||||
Fileheader header;
|
||||
std::string filename;
|
||||
|
||||
Cache<size_type, Dirent> direntCache;
|
||||
Cache<offset_type, Cluster> clusterCache;
|
||||
typedef std::map<char, size_type> NamespaceCache;
|
||||
NamespaceCache namespaceBeginCache;
|
||||
NamespaceCache namespaceEndCache;
|
||||
|
||||
std::string namespaces;
|
||||
|
||||
typedef std::vector<std::string> MimeTypes;
|
||||
MimeTypes mimeTypes;
|
||||
|
||||
offset_type getOffset(offset_type ptrOffset, size_type idx);
|
||||
|
||||
public:
|
||||
explicit FileImpl(const char* fname);
|
||||
|
||||
time_t getMTime() const { return zimFile.getMTime(); }
|
||||
|
||||
const std::string& getFilename() const { return filename; }
|
||||
const Fileheader& getFileheader() const { return header; }
|
||||
offset_type getFilesize() const { return zimFile.fsize(); }
|
||||
|
||||
Dirent getDirent(size_type idx);
|
||||
Dirent getDirentByTitle(size_type idx);
|
||||
size_type getIndexByTitle(size_type idx);
|
||||
size_type getCountArticles() const { return header.getArticleCount(); }
|
||||
|
||||
Cluster getCluster(size_type idx);
|
||||
size_type getCountClusters() const { return header.getClusterCount(); }
|
||||
offset_type getClusterOffset(size_type idx) { return getOffset(header.getClusterPtrPos(), idx); }
|
||||
|
||||
size_type getNamespaceBeginOffset(char ch);
|
||||
size_type getNamespaceEndOffset(char ch);
|
||||
size_type getNamespaceCount(char ns)
|
||||
{ return getNamespaceEndOffset(ns) - getNamespaceBeginOffset(ns); }
|
||||
|
||||
std::string getNamespaces();
|
||||
bool hasNamespace(char ch);
|
||||
|
||||
const std::string& getMimeType(uint16_t idx) const;
|
||||
|
||||
std::string getChecksum();
|
||||
bool verify();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_FILEIMPL_H
|
||||
|
||||
@ -1,107 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2006 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_FILEITERATOR_H
|
||||
#define ZIM_FILEITERATOR_H
|
||||
|
||||
#include <iterator>
|
||||
#include <zim/article.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class File::const_iterator : public std::iterator<std::bidirectional_iterator_tag, Article>
|
||||
{
|
||||
public:
|
||||
enum Mode {
|
||||
UrlIterator,
|
||||
ArticleIterator
|
||||
};
|
||||
|
||||
private:
|
||||
File* file;
|
||||
size_type idx;
|
||||
mutable Article article;
|
||||
Mode mode;
|
||||
|
||||
bool is_end() const { return file == 0 || idx >= file->getCountArticles(); }
|
||||
|
||||
public:
|
||||
explicit const_iterator(File* file_ = 0, size_type idx_ = 0, Mode mode_ = UrlIterator)
|
||||
: file(file_),
|
||||
idx(idx_),
|
||||
mode(mode_)
|
||||
{ }
|
||||
|
||||
size_type getIndex() const { return idx; }
|
||||
const File& getFile() const { return *file; }
|
||||
|
||||
bool operator== (const const_iterator& it) const
|
||||
{ return (is_end() && it.is_end())
|
||||
|| (file == it.file && idx == it.idx); }
|
||||
bool operator!= (const const_iterator& it) const
|
||||
{ return !operator==(it); }
|
||||
|
||||
const_iterator& operator++()
|
||||
{
|
||||
++idx;
|
||||
article = Article();
|
||||
return *this;
|
||||
}
|
||||
|
||||
const_iterator operator++(int)
|
||||
{
|
||||
const_iterator it = *this;
|
||||
operator++();
|
||||
return it;
|
||||
}
|
||||
|
||||
const_iterator& operator--()
|
||||
{
|
||||
--idx;
|
||||
article = Article();
|
||||
return *this;
|
||||
}
|
||||
|
||||
const_iterator operator--(int)
|
||||
{
|
||||
const_iterator it = *this;
|
||||
operator--();
|
||||
return it;
|
||||
}
|
||||
|
||||
const Article& operator*() const
|
||||
{
|
||||
if (!article.good())
|
||||
article = mode == UrlIterator ? file->getArticle(idx)
|
||||
: file->getArticleByTitle(idx);
|
||||
return article;
|
||||
}
|
||||
|
||||
pointer operator->() const
|
||||
{
|
||||
operator*();
|
||||
return &article;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_FILEITERATOR_H
|
||||
|
||||
@ -1,103 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2010 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_FSTREAM_H
|
||||
#define ZIM_FSTREAM_H
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <zim/zim.h>
|
||||
#include <zim/smartptr.h>
|
||||
#include <zim/cache.h>
|
||||
#include <zim/refcounted.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class streambuf : public std::streambuf
|
||||
{
|
||||
struct FileInfo : public RefCounted
|
||||
{
|
||||
std::string fname;
|
||||
zim::offset_type fsize;
|
||||
|
||||
FileInfo() { }
|
||||
FileInfo(const std::string& fname_, int fd);
|
||||
};
|
||||
|
||||
struct OpenfileInfo : public RefCounted
|
||||
{
|
||||
std::string fname;
|
||||
int fd;
|
||||
|
||||
explicit OpenfileInfo(const std::string& fname);
|
||||
~OpenfileInfo();
|
||||
};
|
||||
|
||||
typedef SmartPtr<FileInfo> FileInfoPtr;
|
||||
typedef std::vector<FileInfoPtr> FilesType;
|
||||
|
||||
typedef SmartPtr<OpenfileInfo> OpenfileInfoPtr;
|
||||
typedef Cache<std::string, OpenfileInfoPtr> OpenFilesCacheType;
|
||||
|
||||
std::vector<char> buffer;
|
||||
|
||||
FilesType files;
|
||||
OpenFilesCacheType openFilesCache;
|
||||
OpenfileInfoPtr currentFile;
|
||||
zim::offset_type currentPos;
|
||||
|
||||
std::streambuf::int_type overflow(std::streambuf::int_type ch);
|
||||
std::streambuf::int_type underflow();
|
||||
int sync();
|
||||
|
||||
void setCurrentFile(const std::string& fname, zim::offset_type off);
|
||||
|
||||
mutable time_t mtime;
|
||||
|
||||
public:
|
||||
streambuf(const std::string& fname, unsigned bufsize, unsigned openFilesCache);
|
||||
|
||||
void seekg(zim::offset_type off);
|
||||
void setBufsize(unsigned s)
|
||||
{ buffer.resize(s); }
|
||||
zim::offset_type fsize() const;
|
||||
time_t getMTime() const;
|
||||
};
|
||||
|
||||
class ifstream : public std::istream
|
||||
{
|
||||
streambuf myStreambuf;
|
||||
|
||||
public:
|
||||
explicit ifstream(const std::string& fname, unsigned bufsize = 8192, unsigned openFilesCache = 5)
|
||||
: std::istream(0),
|
||||
myStreambuf(fname, bufsize, openFilesCache)
|
||||
{
|
||||
init(&myStreambuf);
|
||||
}
|
||||
|
||||
void seekg(zim::offset_type off) { myStreambuf.seekg(off); }
|
||||
void setBufsize(unsigned s) { myStreambuf.setBufsize(s); }
|
||||
zim::offset_type fsize() const { return myStreambuf.fsize(); }
|
||||
time_t getMTime() const { return myStreambuf.getMTime(); }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_FSTREAM_H
|
||||
@ -1,72 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2007 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_INDEXARTICLE_H
|
||||
#define ZIM_INDEXARTICLE_H
|
||||
|
||||
#include <zim/article.h>
|
||||
#include <vector>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class IndexArticle : public Article
|
||||
{
|
||||
public:
|
||||
struct Entry
|
||||
{
|
||||
unsigned index;
|
||||
unsigned pos;
|
||||
};
|
||||
|
||||
typedef std::vector<Entry> EntriesType;
|
||||
|
||||
private:
|
||||
EntriesType entries[4];
|
||||
bool categoriesRead;
|
||||
void readEntries();
|
||||
void readEntriesZ(); // directmedia style zint-compression
|
||||
void readEntriesB(); // article compressed style
|
||||
|
||||
static bool noOffset;
|
||||
|
||||
public:
|
||||
IndexArticle(const Article& article)
|
||||
: Article(article),
|
||||
categoriesRead(false)
|
||||
{ }
|
||||
|
||||
unsigned getCategoryCount(unsigned cat)
|
||||
{ readEntries(); return entries[cat].size(); }
|
||||
const EntriesType& getCategory(unsigned cat)
|
||||
{ readEntries(); return entries[cat]; }
|
||||
unsigned getTotalCount()
|
||||
{
|
||||
readEntries();
|
||||
unsigned c = 0;
|
||||
for (unsigned cat = 0; cat < 4; ++cat)
|
||||
c += entries[cat].size();
|
||||
return c;
|
||||
}
|
||||
|
||||
static void setNoOffset(bool sw = true) { noOffset = sw; }
|
||||
static bool getNoOffset() { return noOffset; }
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ZIM_INDEXARTICLE_H
|
||||
@ -1,94 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_LZMASTREAM_H
|
||||
#define ZIM_LZMASTREAM_H
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <lzma.h>
|
||||
#include <vector>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class LzmaError : public std::runtime_error
|
||||
{
|
||||
lzma_ret ret;
|
||||
|
||||
public:
|
||||
LzmaError(lzma_ret ret_, const std::string& msg)
|
||||
: std::runtime_error(msg),
|
||||
ret(ret_)
|
||||
{ }
|
||||
|
||||
lzma_ret getRetcode() const { return ret; }
|
||||
};
|
||||
|
||||
class LzmaStreamBuf : public std::streambuf
|
||||
{
|
||||
lzma_stream stream;
|
||||
std::vector<char_type> obuffer;
|
||||
std::streambuf* sink;
|
||||
|
||||
public:
|
||||
LzmaStreamBuf(std::streambuf* sink_,
|
||||
uint32_t preset = 3 | LZMA_PRESET_EXTREME,
|
||||
lzma_check check = LZMA_CHECK_CRC32 /* LZMA_CHECK_NONE */,
|
||||
unsigned bufsize = 8192);
|
||||
~LzmaStreamBuf();
|
||||
|
||||
/// see std::streambuf
|
||||
int_type overflow(int_type c);
|
||||
/// see std::streambuf
|
||||
int_type underflow();
|
||||
/// see std::streambuf
|
||||
int sync();
|
||||
/// end stream
|
||||
int end();
|
||||
|
||||
void setSink(std::streambuf* sink_) { sink = sink_; }
|
||||
};
|
||||
|
||||
class LzmaStream : public std::ostream
|
||||
{
|
||||
LzmaStreamBuf streambuf;
|
||||
|
||||
public:
|
||||
explicit LzmaStream(std::streambuf* sink,
|
||||
uint32_t preset = 3 | LZMA_PRESET_EXTREME,
|
||||
lzma_check check = LZMA_CHECK_CRC32 /* LZMA_CHECK_NONE */,
|
||||
unsigned bufsize = 8192)
|
||||
: std::ostream(0),
|
||||
streambuf(sink, preset, check, bufsize)
|
||||
{ init(&streambuf); }
|
||||
explicit LzmaStream(std::ostream& sink,
|
||||
uint32_t preset = 3 | LZMA_PRESET_EXTREME,
|
||||
lzma_check check = LZMA_CHECK_CRC32 /* LZMA_CHECK_NONE */,
|
||||
unsigned bufsize = 8192)
|
||||
: std::ostream(0),
|
||||
streambuf(sink.rdbuf(), preset, check, bufsize)
|
||||
{ init(&streambuf); }
|
||||
|
||||
void end();
|
||||
void setSink(std::streambuf* sink) { streambuf.setSink(sink); }
|
||||
void setSink(std::ostream& sink) { streambuf.setSink(sink.rdbuf()); }
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ZIM_LZMASTREAM_H
|
||||
@ -1,45 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2006 Tommi Maekitalo
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* As a special exception, you may use this file as part of a free
|
||||
* software library without restriction. Specifically, if other files
|
||||
* instantiate templates or use macros or inline functions from this
|
||||
* file, or you compile this file and link it with other files to
|
||||
* produce an executable, this file does not by itself cause the
|
||||
* resulting executable to be covered by the GNU General Public
|
||||
* License. This exception does not however invalidate any other
|
||||
* reasons why the executable file might be covered by the GNU Library
|
||||
* General Public License.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef ZIM_NONCOPYABLE_H
|
||||
#define ZIM_NONCOPYABLE_H
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class NonCopyable
|
||||
{
|
||||
private:
|
||||
NonCopyable(const NonCopyable&); // no implementation
|
||||
NonCopyable& operator=(const NonCopyable&); // no implementation
|
||||
public:
|
||||
NonCopyable() { }
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ZIM_NONCOPYABLE_H
|
||||
|
||||
@ -1,59 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Tommi Maekitalo
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* As a special exception, you may use this file as part of a free
|
||||
* software library without restriction. Specifically, if other files
|
||||
* instantiate templates or use macros or inline functions from this
|
||||
* file, or you compile this file and link it with other files to
|
||||
* produce an executable, this file does not by itself cause the
|
||||
* resulting executable to be covered by the GNU General Public
|
||||
* License. This exception does not however invalidate any other
|
||||
* reasons why the executable file might be covered by the GNU Library
|
||||
* General Public License.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef ZIM_REFCOUNTED_H
|
||||
#define ZIM_REFCOUNTED_H
|
||||
|
||||
#include <zim/noncopyable.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class RefCounted : private NonCopyable
|
||||
{
|
||||
unsigned rc;
|
||||
|
||||
public:
|
||||
RefCounted()
|
||||
: rc(0)
|
||||
{ }
|
||||
|
||||
explicit RefCounted(unsigned refs_)
|
||||
: rc(refs_)
|
||||
{ }
|
||||
|
||||
virtual ~RefCounted() { }
|
||||
|
||||
virtual unsigned addRef() { return ++rc; }
|
||||
virtual void release() { if (--rc == 0) delete this; }
|
||||
unsigned refs() const { return rc; }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_REFCOUNTED_H
|
||||
|
||||
@ -1,122 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2007 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_SEARCH_H
|
||||
#define ZIM_SEARCH_H
|
||||
|
||||
#include <zim/article.h>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class SearchResult
|
||||
{
|
||||
Article article;
|
||||
mutable double priority;
|
||||
struct WordAttr
|
||||
{
|
||||
unsigned count;
|
||||
unsigned addweight;
|
||||
WordAttr() : count(0), addweight(1) { }
|
||||
};
|
||||
|
||||
typedef std::map<std::string, WordAttr> WordListType; // map word => count and addweight
|
||||
typedef std::map<size_type, std::string> PosListType; // map position => word
|
||||
WordListType wordList;
|
||||
PosListType posList;
|
||||
|
||||
public:
|
||||
SearchResult() : priority(0) { }
|
||||
explicit SearchResult(const Article& article_, unsigned priority_ = 0)
|
||||
: article(article_),
|
||||
priority(priority_)
|
||||
{ }
|
||||
const Article& getArticle() const { return article; }
|
||||
double getPriority() const;
|
||||
void foundWord(const std::string& word, size_type pos, unsigned addweight);
|
||||
unsigned getCountWords() const { return wordList.size(); }
|
||||
unsigned getCountPositions() const { return posList.size(); }
|
||||
};
|
||||
|
||||
class Search
|
||||
{
|
||||
public:
|
||||
class Results : public std::vector<SearchResult>
|
||||
{
|
||||
std::string expr;
|
||||
|
||||
public:
|
||||
void setExpression(const std::string& e)
|
||||
{ expr = e; }
|
||||
const std::string& getExpression() const
|
||||
{ return expr; }
|
||||
};
|
||||
|
||||
private:
|
||||
static double weightOcc;
|
||||
static double weightOccOff;
|
||||
static double weightPlus;
|
||||
static double weightDist;
|
||||
static double weightPos;
|
||||
static double weightPosRel;
|
||||
static double weightDistinctWords;
|
||||
static unsigned searchLimit;
|
||||
|
||||
File indexfile;
|
||||
File articlefile;
|
||||
|
||||
public:
|
||||
Search()
|
||||
{ }
|
||||
|
||||
explicit Search(const File& zimfile)
|
||||
: indexfile(zimfile),
|
||||
articlefile(zimfile)
|
||||
{ }
|
||||
Search(const File& articlefile_, const File& indexfile_)
|
||||
: indexfile(indexfile_),
|
||||
articlefile(articlefile_)
|
||||
{ }
|
||||
|
||||
void search(Results& results, const std::string& expr);
|
||||
void find(Results& results, char ns, const std::string& praefix, unsigned limit = searchLimit);
|
||||
void find(Results& results, char ns, const std::string& begin, const std::string& end, unsigned limit = searchLimit);
|
||||
|
||||
static double getWeightOcc() { return weightOcc; }
|
||||
static double getWeightOccOff() { return weightOccOff; }
|
||||
static double getWeightPlus() { return weightPlus; }
|
||||
static double getWeightDist() { return weightDist; }
|
||||
static double getWeightPos() { return weightPos; }
|
||||
static double getWeightPosRel() { return weightPosRel; }
|
||||
static double getWeightDistinctWords() { return weightDistinctWords; }
|
||||
static unsigned getSearchLimit() { return searchLimit; }
|
||||
|
||||
static void setWeightOcc(double v) { weightOcc = v; }
|
||||
static void setWeightOccOff(double v) { weightOccOff = v; }
|
||||
static void setWeightPlus(double v) { weightPlus = v; }
|
||||
static void setWeightDist(double v) { weightDist = v; }
|
||||
static void setWeightPos(double v) { weightPos = v; }
|
||||
static void setWeightPosRel(double v) { weightPosRel = v; }
|
||||
static void setWeightDistinctWords(double v) { weightDistinctWords = v; }
|
||||
static void setSearchLimit(unsigned v) { searchLimit = v; }
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ZIM_SEARCH_H
|
||||
@ -1,87 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2005 Tommi Maekitalo
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* As a special exception, you may use this file as part of a free
|
||||
* software library without restriction. Specifically, if other files
|
||||
* instantiate templates or use macros or inline functions from this
|
||||
* file, or you compile this file and link it with other files to
|
||||
* produce an executable, this file does not by itself cause the
|
||||
* resulting executable to be covered by the GNU General Public
|
||||
* License. This exception does not however invalidate any other
|
||||
* reasons why the executable file might be covered by the GNU Library
|
||||
* General Public License.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef ZIM_SMARTPTR_H
|
||||
#define ZIM_SMARTPTR_H
|
||||
|
||||
namespace zim
|
||||
{
|
||||
template <typename objectType>
|
||||
class SmartPtr
|
||||
{
|
||||
objectType* object;
|
||||
|
||||
public:
|
||||
SmartPtr()
|
||||
: object(0)
|
||||
{}
|
||||
SmartPtr(objectType* ptr)
|
||||
: object(ptr)
|
||||
{ if (object) object->addRef(); }
|
||||
SmartPtr(const SmartPtr& ptr)
|
||||
: object(ptr.object)
|
||||
{ if (object) object->addRef(); }
|
||||
~SmartPtr()
|
||||
{ if (object) object->release(); }
|
||||
|
||||
SmartPtr& operator= (const SmartPtr& ptr)
|
||||
{
|
||||
if (object != ptr.object)
|
||||
{
|
||||
if (object)
|
||||
object->release();
|
||||
|
||||
object = ptr.object;
|
||||
|
||||
if (object)
|
||||
object->addRef();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// The object can be dereferenced like the held object
|
||||
objectType* operator->() const { return object; }
|
||||
/// The object can be dereferenced like the held object
|
||||
objectType& operator*() const { return *object; }
|
||||
|
||||
bool operator== (const objectType* p) const { return object == p; }
|
||||
bool operator!= (const objectType* p) const { return object != p; }
|
||||
bool operator< (const objectType* p) const { return object < p; }
|
||||
bool operator! () const { return object == 0; }
|
||||
operator bool () const { return object != 0; }
|
||||
|
||||
objectType* getPointer() { return object; }
|
||||
const objectType* getPointer() const { return object; }
|
||||
operator objectType* () { return object; }
|
||||
operator const objectType* () const { return object; }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_SMARTPTR_H
|
||||
|
||||
@ -1,83 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_TEMPLATE_H
|
||||
#define ZIM_TEMPLATE_H
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class TemplateParser
|
||||
{
|
||||
public:
|
||||
class Event
|
||||
{
|
||||
public:
|
||||
virtual void onData(const std::string& data) = 0;
|
||||
virtual void onToken(const std::string& token) = 0;
|
||||
virtual void onLink(char ns, const std::string& url) = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
Event* event;
|
||||
|
||||
std::string data;
|
||||
std::string::size_type save;
|
||||
std::string::size_type token;
|
||||
std::string::size_type token_e;
|
||||
char ns;
|
||||
typedef void (TemplateParser::*state_type)(char);
|
||||
|
||||
state_type state;
|
||||
|
||||
void state_data(char ch);
|
||||
void state_lt(char ch);
|
||||
void state_token0(char ch);
|
||||
void state_token(char ch);
|
||||
void state_token_end(char ch);
|
||||
void state_link0(char ch);
|
||||
void state_link(char ch);
|
||||
void state_title(char ch);
|
||||
void state_title_end(char ch);
|
||||
|
||||
public:
|
||||
explicit TemplateParser(Event* ev)
|
||||
: event(ev),
|
||||
state(&TemplateParser::state_data)
|
||||
{ }
|
||||
|
||||
void parse(char ch)
|
||||
{
|
||||
(this->*state)(ch);
|
||||
}
|
||||
|
||||
void parse(const std::string& s)
|
||||
{
|
||||
for (std::string::const_iterator ch = s.begin(); ch != s.end(); ++ch)
|
||||
parse(*ch);
|
||||
}
|
||||
|
||||
void flush();
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_TEMPLATE_H
|
||||
@ -1,95 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2008 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <locale>
|
||||
#include <zim/zim.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
uint32_t tolower(uint32_t ucs);
|
||||
|
||||
uint32_t toupper(uint32_t ucs);
|
||||
|
||||
std::ctype_base::mask ctypeMask(uint32_t ch);
|
||||
|
||||
inline bool isalpha(uint32_t ch)
|
||||
{
|
||||
return ctypeMask(ch) & std::ctype_base::alpha;
|
||||
}
|
||||
|
||||
|
||||
inline bool isalnum(uint32_t ch)
|
||||
{
|
||||
return ctypeMask(ch) & std::ctype_base::alnum;
|
||||
}
|
||||
|
||||
|
||||
inline bool ispunct(uint32_t ch)
|
||||
{
|
||||
return ctypeMask(ch) & std::ctype_base::punct;
|
||||
}
|
||||
|
||||
|
||||
inline bool iscntrl(uint32_t ch)
|
||||
{
|
||||
return ctypeMask(ch) & std::ctype_base::cntrl;
|
||||
}
|
||||
|
||||
|
||||
inline bool isdigit(uint32_t ch)
|
||||
{
|
||||
return ctypeMask(ch) & std::ctype_base::digit;
|
||||
}
|
||||
|
||||
|
||||
inline bool isxdigit(uint32_t ch)
|
||||
{
|
||||
return ctypeMask(ch) & std::ctype_base::xdigit;
|
||||
}
|
||||
|
||||
inline bool isgraph(uint32_t ch)
|
||||
{
|
||||
return ctypeMask(ch) & std::ctype_base::graph;
|
||||
}
|
||||
|
||||
|
||||
inline bool islower(uint32_t ch)
|
||||
{
|
||||
return ctypeMask(ch) & std::ctype_base::lower;
|
||||
}
|
||||
|
||||
|
||||
inline bool isupper(uint32_t ch)
|
||||
{
|
||||
return ctypeMask(ch) & std::ctype_base::upper;
|
||||
}
|
||||
|
||||
|
||||
inline bool isprint(uint32_t ch)
|
||||
{
|
||||
return ctypeMask(ch) & std::ctype_base::print;
|
||||
}
|
||||
|
||||
|
||||
inline bool isspace(uint32_t ch)
|
||||
{
|
||||
return ctypeMask(ch) & std::ctype_base::space;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,91 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef ZIM_UNLZMASTREAM_H
|
||||
#define ZIM_UNLZMASTREAM_H
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <lzma.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class UnlzmaError : public std::runtime_error
|
||||
{
|
||||
lzma_ret ret;
|
||||
|
||||
public:
|
||||
UnlzmaError(lzma_ret ret_, const std::string& msg)
|
||||
: std::runtime_error(msg),
|
||||
ret(ret_)
|
||||
{ }
|
||||
|
||||
lzma_ret getRetcode() const { return ret; }
|
||||
};
|
||||
|
||||
class UnlzmaStreamBuf : public std::streambuf
|
||||
{
|
||||
lzma_stream stream;
|
||||
char_type* iobuffer;
|
||||
unsigned bufsize;
|
||||
std::streambuf* sinksource;
|
||||
|
||||
char_type* ibuffer() { return iobuffer; }
|
||||
std::streamsize ibuffer_size() { return bufsize >> 1; }
|
||||
char_type* obuffer() { return iobuffer + ibuffer_size(); }
|
||||
std::streamsize obuffer_size() { return bufsize >> 1; }
|
||||
|
||||
public:
|
||||
explicit UnlzmaStreamBuf(std::streambuf* sinksource_, unsigned bufsize = 8192);
|
||||
~UnlzmaStreamBuf();
|
||||
|
||||
/// see std::streambuf
|
||||
int_type overflow(int_type c);
|
||||
/// see std::streambuf
|
||||
int_type underflow();
|
||||
/// see std::streambuf
|
||||
int sync();
|
||||
|
||||
void setSinksource(std::streambuf* sinksource_) { sinksource = sinksource_; }
|
||||
};
|
||||
|
||||
class UnlzmaStream : public std::iostream
|
||||
{
|
||||
UnlzmaStreamBuf streambuf;
|
||||
|
||||
public:
|
||||
explicit UnlzmaStream(std::streambuf* sinksource, unsigned bufsize = 8192)
|
||||
: std::iostream(0),
|
||||
streambuf(sinksource, bufsize)
|
||||
{ init(&streambuf); }
|
||||
explicit UnlzmaStream(std::ios& sinksource, unsigned bufsize = 8192)
|
||||
: std::iostream(0),
|
||||
streambuf(sinksource.rdbuf(), bufsize)
|
||||
{ init(&streambuf); }
|
||||
|
||||
void setSinksource(std::streambuf* sinksource) { streambuf.setSinksource(sinksource); }
|
||||
void setSinksource(std::ios& sinksource) { streambuf.setSinksource(sinksource.rdbuf()); }
|
||||
void setSink(std::ostream& sink) { streambuf.setSinksource(sink.rdbuf()); }
|
||||
void setSource(std::istream& source) { streambuf.setSinksource(source.rdbuf()); }
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ZIM_UNLZMASTREAM_H
|
||||
|
||||
@ -1,54 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_UUID_H
|
||||
#define ZIM_UUID_H
|
||||
|
||||
#include <iosfwd>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
struct Uuid
|
||||
{
|
||||
Uuid()
|
||||
{
|
||||
std::memset(data, 0, 16);
|
||||
}
|
||||
|
||||
Uuid(const char uuid[16])
|
||||
{
|
||||
std::copy(uuid, uuid+16, data);
|
||||
}
|
||||
|
||||
static Uuid generate();
|
||||
|
||||
bool operator== (const Uuid& other) const
|
||||
{ return std::equal(data, data+16, other.data); }
|
||||
unsigned size() const { return 16; }
|
||||
|
||||
char data[16];
|
||||
};
|
||||
|
||||
std::ostream& operator<< (std::ostream& out, const Uuid& uuid);
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_UUID_H
|
||||
@ -1,130 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2006 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_ZIM_H
|
||||
#define ZIM_ZIM_H
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
// define 8 bit integer types
|
||||
//
|
||||
typedef unsigned char uint8_t;
|
||||
typedef char int8_t;
|
||||
|
||||
// define 16 bit integer types
|
||||
//
|
||||
#if USHRT_MAX == 0xffff
|
||||
|
||||
typedef unsigned short uint16_t;
|
||||
typedef short int16_t;
|
||||
|
||||
#elif UINT_MAX == 0xffff
|
||||
|
||||
typedef unsigned int uint16_t;
|
||||
typedef int int16_t;
|
||||
|
||||
#elif ULONG_MAX == 0xffff
|
||||
|
||||
typedef unsigned long uint16_t;
|
||||
typedef long int16_t;
|
||||
|
||||
#else
|
||||
|
||||
}
|
||||
#include <stdint.h>
|
||||
namespace zim
|
||||
{
|
||||
|
||||
#endif
|
||||
|
||||
// define 32 bit integer types
|
||||
//
|
||||
#if USHRT_MAX == 0xffffffffUL
|
||||
|
||||
typedef unsigned short uint32_t;
|
||||
typedef short int32_t;
|
||||
|
||||
#elif UINT_MAX == 0xffffffffUL
|
||||
|
||||
typedef unsigned int uint32_t;
|
||||
typedef int int32_t;
|
||||
|
||||
#elif ULONG_MAX == 0xffffffffUL
|
||||
|
||||
typedef unsigned long uint32_t;
|
||||
typedef long int32_t;
|
||||
|
||||
#else
|
||||
|
||||
}
|
||||
#include <stdint.h>
|
||||
namespace zim
|
||||
{
|
||||
|
||||
#endif
|
||||
|
||||
// define 64 bit integer types
|
||||
//
|
||||
#if UINT_MAX == 18446744073709551615ULL
|
||||
|
||||
typedef unsigned int uint64_t;
|
||||
typedef int int64_t;
|
||||
|
||||
#elif ULONG_MAX == 18446744073709551615ULL
|
||||
|
||||
typedef unsigned long uint64_t;
|
||||
typedef long int64_t;
|
||||
|
||||
#elif ULLONG_MAX == 18446744073709551615ULL
|
||||
|
||||
typedef unsigned long long uint64_t;
|
||||
typedef long long int64_t;
|
||||
|
||||
#else
|
||||
|
||||
}
|
||||
#include <stdint.h>
|
||||
namespace zim
|
||||
{
|
||||
#endif
|
||||
|
||||
typedef uint32_t size_type;
|
||||
|
||||
#ifdef _WIN32
|
||||
typedef __int64 offset_type;
|
||||
#else
|
||||
typedef uint64_t offset_type;
|
||||
#endif
|
||||
|
||||
enum CompressionType
|
||||
{
|
||||
zimcompDefault,
|
||||
zimcompNone,
|
||||
zimcompZip,
|
||||
zimcompBzip2,
|
||||
zimcompLzma
|
||||
};
|
||||
|
||||
static const char MimeHtmlTemplate[] = "text/x-zim-htmltemplate";
|
||||
}
|
||||
|
||||
#endif // ZIM_ZIM_H
|
||||
|
||||
@ -1,98 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2007 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_ZINTSTREAM_H
|
||||
#define ZIM_ZINTSTREAM_H
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <zim/zim.h>
|
||||
|
||||
/*
|
||||
ZInt implements a int compressor and decompressor. The algorithm compresses
|
||||
small values into fewer bytes.
|
||||
|
||||
The idea is to add information about used bytes in the first byte. The number
|
||||
of additional bytes used is specified by the number of set bits counted from
|
||||
the most significant bit. So the numbers 0-127 are encoded as is, since they
|
||||
fit into the 7 low order bits and the high order bit specifies, that no
|
||||
additional bytes are used. The number starting from 128 up to 16383 need more
|
||||
than 7 bits, so we need to set the highest order bit to 1 and the next bit to
|
||||
0, leaving 6 bits of actual data, which is used as the low order bits of the
|
||||
number.
|
||||
|
||||
Since the numbers 0-127 are already encoded in one byte, the 127 is
|
||||
substracted from the actual number, so a 2 byte zero is actually a 128.
|
||||
|
||||
The same logic continues on the 3rd, 4th, ... byte. Up to 7 additional bytes
|
||||
are used, so the first byte must contain at least one 0.
|
||||
|
||||
binary range
|
||||
------------------------------- --------------------------------------------------
|
||||
0xxx xxxx 0 - 127
|
||||
10xx xxxx xxxx xxxx 128 - (2^14+128-1 = 16511)
|
||||
110x xxxx xxxx xxxx xxxx xxxx 16512 - (2^21+16512-1 = 2113663)
|
||||
1110 xxxx xxxx xxxx xxxx xxxx xxxx xxxx
|
||||
2113664 - (2^28+2113664-1 = 270549119)
|
||||
...
|
||||
|
||||
*/
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class ZIntStream
|
||||
{
|
||||
std::istream* _istream;
|
||||
std::ostream* _ostream;
|
||||
|
||||
public:
|
||||
/// prepare ZIntStream for compression or decompression
|
||||
explicit ZIntStream(std::iostream& iostream)
|
||||
: _istream(&iostream),
|
||||
_ostream(&iostream)
|
||||
{ }
|
||||
|
||||
/// prepare ZIntStream for decompression
|
||||
explicit ZIntStream(std::istream& istream)
|
||||
: _istream(&istream),
|
||||
_ostream(0)
|
||||
{ }
|
||||
|
||||
/// prepare ZIntStream for compression
|
||||
explicit ZIntStream(std::ostream& ostream)
|
||||
: _istream(0),
|
||||
_ostream(&ostream)
|
||||
{ }
|
||||
|
||||
/// decompresses one value from input stream and returns it
|
||||
size_type get();
|
||||
|
||||
ZIntStream& get(size_type &value)
|
||||
{ value = get(); return *this; }
|
||||
|
||||
/// compresses one value to output stream
|
||||
ZIntStream& put(size_type value);
|
||||
|
||||
operator bool() const
|
||||
{ return (_istream == 0 || *_istream)
|
||||
&& (_ostream == 0 || *_ostream); }
|
||||
};
|
||||
|
||||
}
|
||||
#endif // ZIM_ZINTSTREAM_H
|
||||
@ -1,139 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2006 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/article.h>
|
||||
#include <zim/template.h>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include "log.h"
|
||||
|
||||
log_define("zim.article")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
size_type Article::getArticleSize() const
|
||||
{
|
||||
Dirent dirent = getDirent();
|
||||
return file.getCluster(dirent.getClusterNumber())
|
||||
.getBlobSize(dirent.getBlobNumber());
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
class Ev : public TemplateParser::Event
|
||||
{
|
||||
std::ostream& out;
|
||||
Article& article;
|
||||
unsigned maxRecurse;
|
||||
|
||||
public:
|
||||
Ev(std::ostream& out_, Article& article_, unsigned maxRecurse_)
|
||||
: out(out_),
|
||||
article(article_),
|
||||
maxRecurse(maxRecurse_)
|
||||
{ }
|
||||
void onData(const std::string& data);
|
||||
void onToken(const std::string& token);
|
||||
void onLink(char ns, const std::string& title);
|
||||
};
|
||||
|
||||
void Ev::onData(const std::string& data)
|
||||
{
|
||||
out << data;
|
||||
}
|
||||
|
||||
void Ev::onToken(const std::string& token)
|
||||
{
|
||||
log_trace("onToken(\"" << token << "\")");
|
||||
|
||||
if (token == "title")
|
||||
out << article.getTitle();
|
||||
else if (token == "url")
|
||||
out << article.getUrl();
|
||||
else if (token == "namespace")
|
||||
out << article.getNamespace();
|
||||
else if (token == "content")
|
||||
{
|
||||
if (maxRecurse <= 0)
|
||||
throw std::runtime_error("maximum recursive limit is reached");
|
||||
article.getPage(out, false, maxRecurse - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_warn("unknown token \"" << token << "\" found in template");
|
||||
out << "<%" << token << "%>";
|
||||
}
|
||||
}
|
||||
|
||||
void Ev::onLink(char ns, const std::string& url)
|
||||
{
|
||||
if (maxRecurse <= 0)
|
||||
throw std::runtime_error("maximum recursive limit is reached");
|
||||
article.getFile().getArticle(ns, url).getPage(out, false, maxRecurse - 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::string Article::getPage(bool layout, unsigned maxRecurse)
|
||||
{
|
||||
std::ostringstream s;
|
||||
getPage(s, layout, maxRecurse);
|
||||
return s.str();
|
||||
}
|
||||
|
||||
void Article::getPage(std::ostream& out, bool layout, unsigned maxRecurse)
|
||||
{
|
||||
log_trace("Article::getPage(" << layout << ", " << maxRecurse << ')');
|
||||
|
||||
if (getMimeType().compare(0, 9, "text/html") == 0 || getMimeType() == MimeHtmlTemplate)
|
||||
{
|
||||
if (layout && file.getFileheader().hasLayoutPage())
|
||||
{
|
||||
Article layoutPage = file.getArticle(file.getFileheader().getLayoutPage());
|
||||
Blob data = layoutPage.getData();
|
||||
|
||||
Ev ev(out, *this, maxRecurse);
|
||||
log_debug("call template parser");
|
||||
TemplateParser parser(&ev);
|
||||
for (const char* p = data.data(); p != data.end(); ++p)
|
||||
parser.parse(*p);
|
||||
parser.flush();
|
||||
|
||||
return;
|
||||
}
|
||||
else if (getMimeType() == MimeHtmlTemplate)
|
||||
{
|
||||
Blob data = getData();
|
||||
|
||||
Ev ev(out, *this, maxRecurse);
|
||||
TemplateParser parser(&ev);
|
||||
for (const char* p = data.data(); p != data.end(); ++p)
|
||||
parser.parse(*p);
|
||||
parser.flush();
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// default case - template cases has return above
|
||||
out << getData();
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,52 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2007 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/articlesearch.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
ArticleSearch::Results ArticleSearch::search(const std::string& expr)
|
||||
{
|
||||
Results ret;
|
||||
|
||||
// TODO: implement title-cache
|
||||
#if 0
|
||||
if (titles.empty())
|
||||
{
|
||||
for (File::const_iterator it = articleFile.begin(); it != articleFile.end(); ++it)
|
||||
{
|
||||
if (article.isMainArticle()
|
||||
&& article.getLibraryMimeType() == zim::Dirent::zimMimeTextHtml
|
||||
&& article.getNamespace() == 'A')
|
||||
{
|
||||
titles.push_back(article.getTitle());
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (File::const_iterator it = articleFile.begin(); it != articleFile.end(); ++it)
|
||||
{
|
||||
std::string title = it->getTitle();
|
||||
if (title.find(expr) != std::string::npos)
|
||||
ret.push_back(*it);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -1,317 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/cluster.h>
|
||||
#include <zim/blob.h>
|
||||
#include <zim/endian.h>
|
||||
#include <stdlib.h>
|
||||
#include <sstream>
|
||||
|
||||
#include "log.h"
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#ifdef ENABLE_ZLIB
|
||||
#include <zim/deflatestream.h>
|
||||
#include <zim/inflatestream.h>
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_BZIP2
|
||||
#include <zim/bzip2stream.h>
|
||||
#include <zim/bunzip2stream.h>
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_LZMA
|
||||
#include <zim/lzmastream.h>
|
||||
#include <zim/unlzmastream.h>
|
||||
#endif
|
||||
|
||||
log_define("zim.cluster")
|
||||
|
||||
#define log_debug1(e)
|
||||
|
||||
namespace zim
|
||||
{
|
||||
Cluster::Cluster()
|
||||
: impl(0)
|
||||
{ }
|
||||
|
||||
ClusterImpl* Cluster::getImpl()
|
||||
{
|
||||
if (impl.getPointer() == 0)
|
||||
impl = new ClusterImpl();
|
||||
return impl;
|
||||
}
|
||||
|
||||
ClusterImpl::ClusterImpl()
|
||||
: compression(zimcompDefault)
|
||||
{
|
||||
offsets.push_back(0);
|
||||
}
|
||||
|
||||
void ClusterImpl::read(std::istream& in)
|
||||
{
|
||||
log_debug1("read");
|
||||
|
||||
// read first offset, which specifies, how many offsets we need to read
|
||||
size_type offset;
|
||||
in.read(reinterpret_cast<char*>(&offset), sizeof(offset));
|
||||
if (in.fail())
|
||||
return;
|
||||
|
||||
offset = fromLittleEndian(&offset);
|
||||
|
||||
size_type n = offset / 4;
|
||||
size_type a = offset;
|
||||
|
||||
log_debug1("first offset is " << offset << " n=" << n << " a=" << a);
|
||||
|
||||
// read offsets
|
||||
offsets.clear();
|
||||
data.clear();
|
||||
offsets.reserve(n);
|
||||
offsets.push_back(0);
|
||||
while (--n)
|
||||
{
|
||||
in.read(reinterpret_cast<char*>(&offset), sizeof(offset));
|
||||
if (in.fail())
|
||||
{
|
||||
log_debug1("fail at " << n);
|
||||
return;
|
||||
}
|
||||
offset = fromLittleEndian(&offset);
|
||||
log_debug1("offset=" << offset << '(' << offset-a << ')');
|
||||
offsets.push_back(offset - a);
|
||||
}
|
||||
|
||||
// last offset points past the end of the cluster, so we know now, how may bytes to read
|
||||
if (offsets.size() > 1)
|
||||
{
|
||||
n = offsets.back() - offsets.front();
|
||||
data.resize(n);
|
||||
if (n > 0)
|
||||
{
|
||||
log_debug1("read " << n << " bytes of data");
|
||||
in.read(&(data[0]), n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ClusterImpl::write(std::ostream& out) const
|
||||
{
|
||||
size_type a = offsets.size() * sizeof(size_type);
|
||||
for (Offsets::const_iterator it = offsets.begin(); it != offsets.end(); ++it)
|
||||
{
|
||||
size_type o = *it;
|
||||
o += a;
|
||||
o = fromLittleEndian(&o);
|
||||
out.write(reinterpret_cast<const char*>(&o), sizeof(size_type));
|
||||
}
|
||||
|
||||
out.write(&(data[0]), data.size());
|
||||
}
|
||||
|
||||
void ClusterImpl::addBlob(const Blob& blob)
|
||||
{
|
||||
log_debug1("addBlob(ptr, " << blob.size() << ')');
|
||||
data.insert(data.end(), blob.data(), blob.end());
|
||||
offsets.push_back(data.size());
|
||||
}
|
||||
|
||||
Blob ClusterImpl::getBlob(size_type n) const
|
||||
{
|
||||
return getSize(n) > 0 ?
|
||||
Blob(const_cast<ClusterImpl*>(this), getData(n), getSize(n)) : Blob();
|
||||
}
|
||||
|
||||
void ClusterImpl::clear()
|
||||
{
|
||||
offsets.clear();
|
||||
data.clear();
|
||||
offsets.push_back(0);
|
||||
}
|
||||
|
||||
void ClusterImpl::addBlob(const char* data, unsigned size)
|
||||
{
|
||||
addBlob(Blob(data, size));
|
||||
}
|
||||
|
||||
Blob Cluster::getBlob(size_type n) const
|
||||
{
|
||||
return impl->getBlob(n);
|
||||
}
|
||||
|
||||
std::istream& operator>> (std::istream& in, ClusterImpl& clusterImpl)
|
||||
{
|
||||
log_trace("read cluster");
|
||||
|
||||
char c;
|
||||
in.get(c);
|
||||
clusterImpl.setCompression(static_cast<CompressionType>(c));
|
||||
|
||||
switch (static_cast<CompressionType>(c))
|
||||
{
|
||||
case zimcompDefault:
|
||||
case zimcompNone:
|
||||
clusterImpl.read(in);
|
||||
break;
|
||||
|
||||
case zimcompZip:
|
||||
{
|
||||
#ifdef ENABLE_ZLIB
|
||||
log_debug("uncompress data (zlib)");
|
||||
zim::InflateStream is(in);
|
||||
is.exceptions(std::ios::failbit | std::ios::badbit);
|
||||
clusterImpl.read(is);
|
||||
#else
|
||||
throw std::runtime_error("zlib not enabled in this library");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
case zimcompBzip2:
|
||||
{
|
||||
#ifdef ENABLE_BZIP2
|
||||
log_debug("uncompress data (bzip2)");
|
||||
zim::Bunzip2Stream is(in);
|
||||
is.exceptions(std::ios::failbit | std::ios::badbit);
|
||||
clusterImpl.read(is);
|
||||
#else
|
||||
throw std::runtime_error("bzip2 not enabled in this library");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
case zimcompLzma:
|
||||
{
|
||||
#ifdef ENABLE_LZMA
|
||||
log_debug("uncompress data (lzma)");
|
||||
zim::UnlzmaStream is(in);
|
||||
is.exceptions(std::ios::failbit | std::ios::badbit);
|
||||
clusterImpl.read(is);
|
||||
#else
|
||||
throw std::runtime_error("lzma not enabled in this library");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
log_error("invalid compression flag " << c);
|
||||
in.setstate(std::ios::failbit);
|
||||
break;
|
||||
}
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
std::istream& operator>> (std::istream& in, Cluster& cluster)
|
||||
{
|
||||
return in >> *cluster.getImpl();
|
||||
}
|
||||
|
||||
std::ostream& operator<< (std::ostream& out, const ClusterImpl& clusterImpl)
|
||||
{
|
||||
log_trace("write cluster");
|
||||
|
||||
out.put(static_cast<char>(clusterImpl.getCompression()));
|
||||
|
||||
switch(clusterImpl.getCompression())
|
||||
{
|
||||
case zimcompDefault:
|
||||
case zimcompNone:
|
||||
clusterImpl.write(out);
|
||||
break;
|
||||
|
||||
case zimcompZip:
|
||||
{
|
||||
#ifdef ENABLE_ZLIB
|
||||
log_debug("compress data (zlib)");
|
||||
zim::DeflateStream os(out);
|
||||
os.exceptions(std::ios::failbit | std::ios::badbit);
|
||||
clusterImpl.write(os);
|
||||
os.flush();
|
||||
#else
|
||||
throw std::runtime_error("zlib not enabled in this library");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
case zimcompBzip2:
|
||||
{
|
||||
#ifdef ENABLE_BZIP2
|
||||
log_debug("compress data (bzip2)");
|
||||
zim::Bzip2Stream os(out);
|
||||
os.exceptions(std::ios::failbit | std::ios::badbit);
|
||||
clusterImpl.write(os);
|
||||
os.end();
|
||||
#else
|
||||
throw std::runtime_error("bzip2 not enabled in this library");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
case zimcompLzma:
|
||||
{
|
||||
#ifdef ENABLE_LZMA
|
||||
uint32_t lzmaPreset = 3 | LZMA_PRESET_EXTREME;
|
||||
/**
|
||||
* read lzma preset from environment
|
||||
* ZIM_LZMA_PRESET is a number followed optionally by a
|
||||
* suffix 'e'. The number gives the preset and the suffix tells,
|
||||
* if LZMA_PRESET_EXTREME should be set.
|
||||
* e.g.:
|
||||
* ZIM_LZMA_LEVEL=9 => 9
|
||||
* ZIM_LZMA_LEVEL=3e => 3 + extreme
|
||||
*/
|
||||
const char* e = ::getenv("ZIM_LZMA_LEVEL");
|
||||
if (e)
|
||||
{
|
||||
char flag = '\0';
|
||||
std::istringstream s(e);
|
||||
s >> lzmaPreset >> flag;
|
||||
if (flag == 'e')
|
||||
lzmaPreset |= LZMA_PRESET_EXTREME;
|
||||
}
|
||||
|
||||
log_debug("compress data (lzma, " << std::hex << lzmaPreset << ")");
|
||||
zim::LzmaStream os(out, lzmaPreset);
|
||||
os.exceptions(std::ios::failbit | std::ios::badbit);
|
||||
clusterImpl.write(os);
|
||||
os.end();
|
||||
#else
|
||||
throw std::runtime_error("lzma not enabled in this library");
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
std::ostringstream msg;
|
||||
msg << "invalid compression flag " << clusterImpl.getCompression();
|
||||
log_error(msg.str());
|
||||
throw std::runtime_error(msg.str());
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::ostream& operator<< (std::ostream& out, const Cluster& cluster)
|
||||
{
|
||||
return out << *cluster.impl;
|
||||
}
|
||||
}
|
||||
@ -1,245 +0,0 @@
|
||||
/* src/zimlib/src/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* set zim cluster cache size to number of cached chunks */
|
||||
#undef CLUSTER_CACHE_SIZE
|
||||
|
||||
/* set zim dirent cache size to number of cached chunks */
|
||||
#undef DIRENT_CACHE_SIZE
|
||||
|
||||
/* defined if lzma compression is enabled */
|
||||
#undef ENABLE_LZMA
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#undef HAVE_DLFCN_H
|
||||
|
||||
/* Define to 1 if you have the <fcntl.h> header file. */
|
||||
#undef HAVE_FCNTL_H
|
||||
|
||||
/* Define to 1 if you have the <float.h> header file. */
|
||||
#undef HAVE_FLOAT_H
|
||||
|
||||
/* Define to 1 if you have the `fork' function. */
|
||||
#undef HAVE_FORK
|
||||
|
||||
/* Define to 1 if you have the `getcwd' function. */
|
||||
#undef HAVE_GETCWD
|
||||
|
||||
/* Define to 1 if you have the `gettimeofday' function. */
|
||||
#undef HAVE_GETTIMEOFDAY
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the `clucene' library (-lclucene). */
|
||||
#undef HAVE_LIBCLUCENE
|
||||
|
||||
/* Define to 1 if you have the <libintl.h> header file. */
|
||||
#undef HAVE_LIBINTL_H
|
||||
|
||||
/* Define to 1 if you have the `lzma' library (-llzma). */
|
||||
#undef HAVE_LIBLZMA
|
||||
|
||||
/* Define to 1 if you have the `microhttpd' library (-lmicrohttpd). */
|
||||
#undef HAVE_LIBMICROHTTPD
|
||||
|
||||
/* Define to 1 if you have the `z' library (-lz). */
|
||||
#undef HAVE_LIBZ
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
#undef HAVE_LIMITS_H
|
||||
|
||||
/* Define to 1 if your system has a GNU libc compatible `malloc' function, and
|
||||
to 0 otherwise. */
|
||||
#undef HAVE_MALLOC
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
#undef HAVE_MEMMOVE
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#undef HAVE_MEMORY_H
|
||||
|
||||
/* Define to 1 if you have the `memset' function. */
|
||||
#undef HAVE_MEMSET
|
||||
|
||||
/* Define to 1 if you have the `pow' function. */
|
||||
#undef HAVE_POW
|
||||
|
||||
/* Define to 1 if the system has the type `ptrdiff_t'. */
|
||||
#undef HAVE_PTRDIFF_T
|
||||
|
||||
/* Define to 1 if you have the `regcomp' function. */
|
||||
#undef HAVE_REGCOMP
|
||||
|
||||
/* Define to 1 if you have the `sqrt' function. */
|
||||
#undef HAVE_SQRT
|
||||
|
||||
/* Define to 1 if you have the `stat64' function. */
|
||||
#undef HAVE_STAT64
|
||||
|
||||
/* Define to 1 if stdbool.h conforms to C99. */
|
||||
#undef HAVE_STDBOOL_H
|
||||
|
||||
/* Define to 1 if you have the <stddef.h> header file. */
|
||||
#undef HAVE_STDDEF_H
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#undef HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#undef HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the `strcasecmp' function. */
|
||||
#undef HAVE_STRCASECMP
|
||||
|
||||
/* Define to 1 if you have the `strchr' function. */
|
||||
#undef HAVE_STRCHR
|
||||
|
||||
/* Define to 1 if you have the `strdup' function. */
|
||||
#undef HAVE_STRDUP
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
#undef HAVE_STRERROR
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#undef HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#undef HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the `strtol' function. */
|
||||
#undef HAVE_STRTOL
|
||||
|
||||
/* Define to 1 if you have the <sys/socket.h> header file. */
|
||||
#undef HAVE_SYS_SOCKET_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#undef HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/time.h> header file. */
|
||||
#undef HAVE_SYS_TIME_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#undef HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* Define to 1 if you have the `vfork' function. */
|
||||
#undef HAVE_VFORK
|
||||
|
||||
/* Define to 1 if you have the <vfork.h> header file. */
|
||||
#undef HAVE_VFORK_H
|
||||
|
||||
/* Define to 1 if you have the <wchar.h> header file. */
|
||||
#undef HAVE_WCHAR_H
|
||||
|
||||
/* Define to 1 if `fork' works. */
|
||||
#undef HAVE_WORKING_FORK
|
||||
|
||||
/* Define to 1 if `vfork' works. */
|
||||
#undef HAVE_WORKING_VFORK
|
||||
|
||||
/* Define to 1 if the system has the type `_Bool'. */
|
||||
#undef HAVE__BOOL
|
||||
|
||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||
*/
|
||||
#undef LT_OBJDIR
|
||||
|
||||
/* set lzma uncompress memory size to number of MB */
|
||||
#undef LZMA_MEMORY_SIZE
|
||||
|
||||
/* Name of package */
|
||||
#undef PACKAGE
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#undef PACKAGE_BUGREPORT
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#undef PACKAGE_NAME
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#undef PACKAGE_URL
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Version number of package */
|
||||
#undef VERSION
|
||||
|
||||
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
|
||||
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
|
||||
#define below would cause a syntax error. */
|
||||
#undef _UINT32_T
|
||||
|
||||
/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
|
||||
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
|
||||
#define below would cause a syntax error. */
|
||||
#undef _UINT64_T
|
||||
|
||||
/* Define for Solaris 2.5.1 so the uint8_t typedef from <sys/synch.h>,
|
||||
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
|
||||
#define below would cause a syntax error. */
|
||||
#undef _UINT8_T
|
||||
|
||||
/* Define to `__inline__' or `__inline' if that's what the C compiler
|
||||
calls it, or to nothing if 'inline' is not supported under any name. */
|
||||
#ifndef __cplusplus
|
||||
#undef inline
|
||||
#endif
|
||||
|
||||
/* Define to the type of a signed integer type of width exactly 16 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
#undef int16_t
|
||||
|
||||
/* Define to the type of a signed integer type of width exactly 32 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
#undef int32_t
|
||||
|
||||
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
#undef int64_t
|
||||
|
||||
/* Define to the type of a signed integer type of width exactly 8 bits if such
|
||||
a type exists and the standard includes do not define it. */
|
||||
#undef int8_t
|
||||
|
||||
/* Define to rpl_malloc if the replacement function should be used. */
|
||||
#undef malloc
|
||||
|
||||
/* Define to `long int' if <sys/types.h> does not define. */
|
||||
#undef off_t
|
||||
|
||||
/* Define to `int' if <sys/types.h> does not define. */
|
||||
#undef pid_t
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
#undef size_t
|
||||
|
||||
/* Define to the type of an unsigned integer type of width exactly 16 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
#undef uint16_t
|
||||
|
||||
/* Define to the type of an unsigned integer type of width exactly 32 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
#undef uint32_t
|
||||
|
||||
/* Define to the type of an unsigned integer type of width exactly 64 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
#undef uint64_t
|
||||
|
||||
/* Define to the type of an unsigned integer type of width exactly 8 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
#undef uint8_t
|
||||
|
||||
/* Define as `fork' if `vfork' does not work. */
|
||||
#undef vfork
|
||||
@ -1,165 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2006 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/dirent.h>
|
||||
#include <zim/zim.h>
|
||||
#include <zim/endian.h>
|
||||
#include "log.h"
|
||||
#include <algorithm>
|
||||
|
||||
log_define("zim.dirent")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Dirent
|
||||
//
|
||||
|
||||
std::ostream& operator<< (std::ostream& out, const Dirent& dirent)
|
||||
{
|
||||
union
|
||||
{
|
||||
char d[16];
|
||||
long a;
|
||||
} header;
|
||||
toLittleEndian(dirent.getMimeType(), header.d);
|
||||
header.d[2] = static_cast<char>(dirent.getParameter().size());
|
||||
header.d[3] = dirent.getNamespace();
|
||||
|
||||
log_debug("title=" << dirent.getTitle() << " title.size()=" << dirent.getTitle().size());
|
||||
|
||||
toLittleEndian(dirent.getVersion(), header.d + 4);
|
||||
|
||||
if (dirent.isRedirect())
|
||||
{
|
||||
toLittleEndian(dirent.getRedirectIndex(), header.d + 8);
|
||||
out.write(header.d, 12);
|
||||
}
|
||||
else
|
||||
{
|
||||
toLittleEndian(dirent.getClusterNumber(), header.d + 8);
|
||||
toLittleEndian(dirent.getBlobNumber(), header.d + 12);
|
||||
out.write(header.d, 16);
|
||||
}
|
||||
|
||||
out << dirent.getUrl() << '\0';
|
||||
|
||||
std::string t = dirent.getTitle();
|
||||
if (t != dirent.getUrl())
|
||||
out << t;
|
||||
out << '\0' << dirent.getParameter();
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::istream& operator>> (std::istream& in, Dirent& dirent)
|
||||
{
|
||||
union
|
||||
{
|
||||
long a;
|
||||
char d[16];
|
||||
} header;
|
||||
|
||||
in.read(header.d, 12);
|
||||
if (in.fail())
|
||||
{
|
||||
log_warn("error reading dirent header");
|
||||
return in;
|
||||
}
|
||||
|
||||
if (in.gcount() != 12)
|
||||
{
|
||||
log_warn("error reading dirent header (2)");
|
||||
in.setstate(std::ios::failbit);
|
||||
return in;
|
||||
}
|
||||
|
||||
uint16_t mimeType = fromLittleEndian(reinterpret_cast<const uint16_t*>(header.d));
|
||||
bool redirect = (mimeType == std::numeric_limits<uint16_t>::max());
|
||||
char ns = header.d[3];
|
||||
size_type version = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 4));
|
||||
dirent.setVersion(version);
|
||||
|
||||
if (redirect)
|
||||
{
|
||||
size_type redirectIndex = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 8));
|
||||
|
||||
log_debug("redirectIndex=" << redirectIndex);
|
||||
|
||||
dirent.setRedirect(redirectIndex);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_debug("read article entry");
|
||||
|
||||
in.read(header.d + 12, 4);
|
||||
if (in.fail())
|
||||
{
|
||||
log_warn("error reading article dirent header");
|
||||
return in;
|
||||
}
|
||||
|
||||
if (in.gcount() != 4)
|
||||
{
|
||||
log_warn("error reading article dirent header (2)");
|
||||
in.setstate(std::ios::failbit);
|
||||
return in;
|
||||
}
|
||||
|
||||
size_type clusterNumber = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 8));
|
||||
size_type blobNumber = fromLittleEndian(reinterpret_cast<const size_type*>(header.d + 12));
|
||||
|
||||
log_debug("mimeType=" << mimeType << " clusterNumber=" << clusterNumber << " blobNumber=" << blobNumber);
|
||||
|
||||
dirent.setArticle(mimeType, clusterNumber, blobNumber);
|
||||
}
|
||||
|
||||
char ch;
|
||||
std::string url;
|
||||
std::string title;
|
||||
std::string parameter;
|
||||
|
||||
log_debug("read url, title and parameters");
|
||||
|
||||
while (in.get(ch) && ch != '\0')
|
||||
url += ch;
|
||||
|
||||
while (in.get(ch) && ch != '\0')
|
||||
title += ch;
|
||||
|
||||
uint8_t extraLen = static_cast<uint8_t>(header.d[2]);
|
||||
while (extraLen-- > 0 && in.get(ch))
|
||||
parameter += ch;
|
||||
|
||||
dirent.setUrl(ns, url);
|
||||
dirent.setTitle(title);
|
||||
dirent.setParameter(parameter);
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
std::string Dirent::getLongUrl() const
|
||||
{
|
||||
log_trace("Dirent::getLongUrl()");
|
||||
log_debug("namespace=" << getNamespace() << " title=" << getTitle());
|
||||
|
||||
return std::string(1, getNamespace()) + '/' + getUrl();
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,58 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <sstream>
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
unsigned envValue(const char* env, unsigned def)
|
||||
{
|
||||
const char* v = ::getenv(env);
|
||||
if (v)
|
||||
{
|
||||
std::istringstream s(v);
|
||||
s >> def;
|
||||
}
|
||||
return def;
|
||||
}
|
||||
|
||||
unsigned envMemSize(const char* env, unsigned def)
|
||||
{
|
||||
const char* v = ::getenv(env);
|
||||
if (v)
|
||||
{
|
||||
char unit = '\0';
|
||||
std::istringstream s(v);
|
||||
s >> def >> unit;
|
||||
|
||||
switch (unit)
|
||||
{
|
||||
case 'k':
|
||||
case 'K': def *= 1024; break;
|
||||
case 'm':
|
||||
case 'M': def *= 1024 * 1024; break;
|
||||
case 'g':
|
||||
case 'G': def *= 1024 * 1024 * 1024; break;
|
||||
}
|
||||
}
|
||||
return def;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,29 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_ENVVALUE_H
|
||||
#define ZIM_ENVVALUE_H
|
||||
|
||||
namespace zim
|
||||
{
|
||||
unsigned envValue(const char* env, unsigned def);
|
||||
unsigned envMemSize(const char* env, unsigned def);
|
||||
}
|
||||
|
||||
#endif // ZIM_ENVVALUE_H
|
||||
@ -1,272 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2006,2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/file.h>
|
||||
#include <zim/article.h>
|
||||
#include "log.h"
|
||||
#include <zim/fileiterator.h>
|
||||
|
||||
log_define("zim.file")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
namespace
|
||||
{
|
||||
int hexval(char ch)
|
||||
{
|
||||
if (ch >= '0' && ch <= '9')
|
||||
return ch - '0';
|
||||
if (ch >= 'a' && ch <= 'f')
|
||||
return ch - 'a' + 10;
|
||||
if (ch >= 'A' && ch <= 'F')
|
||||
return ch - 'A' + 10;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
Article File::getArticle(size_type idx) const
|
||||
{
|
||||
return Article(*this, idx);
|
||||
}
|
||||
|
||||
Article File::getArticle(char ns, const std::string& url)
|
||||
{
|
||||
log_trace("File::getArticle('" << ns << "', \"" << url << ')');
|
||||
std::pair<bool, const_iterator> r = findx(ns, url);
|
||||
return r.first ? *r.second : Article();
|
||||
}
|
||||
|
||||
Article File::getArticleByUrl(const std::string& url)
|
||||
{
|
||||
log_trace("File::getArticle(\"" << url << ')');
|
||||
std::pair<bool, const_iterator> r = findx(url);
|
||||
return r.first ? *r.second : Article();
|
||||
}
|
||||
|
||||
Article File::getArticleByTitle(size_type idx)
|
||||
{
|
||||
return Article(*this, impl->getIndexByTitle(idx));
|
||||
}
|
||||
|
||||
Article File::getArticleByTitle(char ns, const std::string& title)
|
||||
{
|
||||
log_trace("File::getArticleByTitle('" << ns << "', \"" << title << ')');
|
||||
std::pair<bool, const_iterator> r = findxByTitle(ns, title);
|
||||
return r.first ? *r.second : Article();
|
||||
}
|
||||
|
||||
bool File::hasNamespace(char ch)
|
||||
{
|
||||
size_type off = getNamespaceBeginOffset(ch);
|
||||
return off < getCountArticles() && getDirent(off).getNamespace() == ch;
|
||||
}
|
||||
|
||||
File::const_iterator File::begin()
|
||||
{ return const_iterator(this, 0); }
|
||||
|
||||
File::const_iterator File::beginByTitle()
|
||||
{ return const_iterator(this, 0, const_iterator::ArticleIterator); }
|
||||
|
||||
File::const_iterator File::end()
|
||||
{ return const_iterator(this, getCountArticles()); }
|
||||
|
||||
std::pair<bool, File::const_iterator> File::findx(char ns, const std::string& url)
|
||||
{
|
||||
log_debug("find article by url " << ns << " \"" << url << "\", in file \"" << getFilename() << '"');
|
||||
|
||||
size_type l = getNamespaceBeginOffset(ns);
|
||||
size_type u = getNamespaceEndOffset(ns);
|
||||
|
||||
if (l == u)
|
||||
{
|
||||
log_debug("namespace " << ns << " not found");
|
||||
return std::pair<bool, const_iterator>(false, end());
|
||||
}
|
||||
|
||||
unsigned itcount = 0;
|
||||
while (u - l > 1)
|
||||
{
|
||||
++itcount;
|
||||
size_type p = l + (u - l) / 2;
|
||||
Dirent d = getDirent(p);
|
||||
|
||||
int c = ns < d.getNamespace() ? -1
|
||||
: ns > d.getNamespace() ? 1
|
||||
: url.compare(d.getUrl());
|
||||
|
||||
if (c < 0)
|
||||
u = p;
|
||||
else if (c > 0)
|
||||
l = p;
|
||||
else
|
||||
{
|
||||
log_debug("article found after " << itcount << " iterations in file \"" << getFilename() << "\" at index " << p);
|
||||
return std::pair<bool, const_iterator>(true, const_iterator(this, p));
|
||||
}
|
||||
}
|
||||
|
||||
Dirent d = getDirent(l);
|
||||
int c = url.compare(d.getUrl());
|
||||
|
||||
if (c == 0)
|
||||
{
|
||||
log_debug("article found after " << itcount << " iterations in file \"" << getFilename() << "\" at index " << l);
|
||||
return std::pair<bool, const_iterator>(true, const_iterator(this, l));
|
||||
}
|
||||
|
||||
log_debug("article not found after " << itcount << " iterations (\"" << d.getUrl() << "\" does not match)");
|
||||
return std::pair<bool, const_iterator>(false, const_iterator(this, c < 0 ? l : u));
|
||||
}
|
||||
|
||||
std::pair<bool, File::const_iterator> File::findx(const std::string& url)
|
||||
{
|
||||
if (url.size() < 2 || url[1] != '/')
|
||||
return std::pair<bool, const_iterator>(false, const_iterator());
|
||||
return findx(url[0], url.substr(2));
|
||||
}
|
||||
|
||||
std::pair<bool, File::const_iterator> File::findxByTitle(char ns, const std::string& title)
|
||||
{
|
||||
log_debug("find article by title " << ns << " \"" << title << "\", in file \"" << getFilename() << '"');
|
||||
|
||||
size_type l = getNamespaceBeginOffset(ns);
|
||||
size_type u = getNamespaceEndOffset(ns);
|
||||
|
||||
if (l == u)
|
||||
{
|
||||
log_debug("namespace " << ns << " not found");
|
||||
return std::pair<bool, const_iterator>(false, end());
|
||||
}
|
||||
|
||||
unsigned itcount = 0;
|
||||
while (u - l > 1)
|
||||
{
|
||||
++itcount;
|
||||
size_type p = l + (u - l) / 2;
|
||||
Dirent d = getDirentByTitle(p);
|
||||
|
||||
int c = ns < d.getNamespace() ? -1
|
||||
: ns > d.getNamespace() ? 1
|
||||
: title.compare(d.getTitle());
|
||||
|
||||
if (c < 0)
|
||||
u = p;
|
||||
else if (c > 0)
|
||||
l = p;
|
||||
else
|
||||
{
|
||||
log_debug("article found after " << itcount << " iterations in file \"" << getFilename() << "\" at index " << p);
|
||||
return std::pair<bool, const_iterator>(true, const_iterator(this, p, const_iterator::ArticleIterator));
|
||||
}
|
||||
}
|
||||
|
||||
Dirent d = getDirentByTitle(l);
|
||||
int c = title.compare(d.getTitle());
|
||||
|
||||
if (c == 0)
|
||||
{
|
||||
log_debug("article found after " << itcount << " iterations in file \"" << getFilename() << "\" at index " << l);
|
||||
return std::pair<bool, const_iterator>(true, const_iterator(this, l, const_iterator::ArticleIterator));
|
||||
}
|
||||
|
||||
log_debug("article not found after " << itcount << " iterations (\"" << d.getTitle() << "\" does not match)");
|
||||
return std::pair<bool, const_iterator>(false, const_iterator(this, c < 0 ? l : u, const_iterator::ArticleIterator));
|
||||
}
|
||||
|
||||
File::const_iterator File::find(char ns, const std::string& url)
|
||||
{ return findx(ns, url).second; }
|
||||
|
||||
File::const_iterator File::find(const std::string& url)
|
||||
{ return findx(url).second; }
|
||||
|
||||
File::const_iterator File::findByTitle(char ns, const std::string& title)
|
||||
{ return findxByTitle(ns, title).second; }
|
||||
|
||||
std::string urldecode(const std::string& url)
|
||||
{
|
||||
std::string ret;
|
||||
enum {
|
||||
state_0,
|
||||
state_h1,
|
||||
state_h2,
|
||||
} state = state_0;
|
||||
|
||||
char ch;
|
||||
for (std::string::const_iterator it = url.begin(); it != url.end(); ++it)
|
||||
{
|
||||
switch (state)
|
||||
{
|
||||
case state_0:
|
||||
if (*it == '+')
|
||||
ret += ' ';
|
||||
else if (*it == '%')
|
||||
state = state_h1;
|
||||
else
|
||||
ret += *it;
|
||||
break;
|
||||
|
||||
case state_h1:
|
||||
if (*it >= '0' && *it <= '9'
|
||||
||*it >= 'A' && *it <= 'F'
|
||||
||*it >= 'a' && *it <= 'f')
|
||||
{
|
||||
ch = *it;
|
||||
state = state_h2;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret += '%';
|
||||
ret += *it;
|
||||
state = state_0;
|
||||
}
|
||||
break;
|
||||
|
||||
case state_h2:
|
||||
if (*it >= '0' && *it <= '9'
|
||||
||*it >= 'A' && *it <= 'F'
|
||||
||*it >= 'a' && *it <= 'f')
|
||||
{
|
||||
ret += static_cast<char>(hexval(ch) * 16 + hexval(*it));
|
||||
}
|
||||
else
|
||||
{
|
||||
ret += static_cast<char>(hexval(ch));
|
||||
ret += *it;
|
||||
}
|
||||
state = state_0;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
switch (state)
|
||||
{
|
||||
case state_h1:
|
||||
ret += '%';
|
||||
break;
|
||||
|
||||
case state_h2:
|
||||
ret += '%';
|
||||
ret += ch;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -1,110 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2008 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/fileheader.h>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include "log.h"
|
||||
|
||||
log_define("zim.file.header")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
const size_type Fileheader::zimMagic = 0x044d495a; // ="ZIM^d"
|
||||
const size_type Fileheader::zimVersion = 5;
|
||||
const size_type Fileheader::size = 80;
|
||||
|
||||
std::ostream& operator<< (std::ostream& out, const Fileheader& fh)
|
||||
{
|
||||
char header[Fileheader::size];
|
||||
toLittleEndian(Fileheader::zimMagic, header);
|
||||
toLittleEndian(Fileheader::zimVersion, header + 4);
|
||||
std::copy(fh.getUuid().data, fh.getUuid().data + sizeof(Uuid), header + 8);
|
||||
toLittleEndian(fh.getArticleCount(), header + 24);
|
||||
toLittleEndian(fh.getClusterCount(), header + 28);
|
||||
toLittleEndian(fh.getUrlPtrPos(), header + 32);
|
||||
toLittleEndian(fh.getTitleIdxPos(), header + 40);
|
||||
toLittleEndian(fh.getClusterPtrPos(), header + 48);
|
||||
toLittleEndian(fh.getMimeListPos(), header + 56);
|
||||
toLittleEndian(fh.getMainPage(), header + 64);
|
||||
toLittleEndian(fh.getLayoutPage(), header + 68);
|
||||
toLittleEndian(fh.getChecksumPos(), header + 72);
|
||||
|
||||
out.write(header, Fileheader::size);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::istream& operator>> (std::istream& in, Fileheader& fh)
|
||||
{
|
||||
char header[Fileheader::size];
|
||||
in.read(header, Fileheader::size);
|
||||
if (in.fail())
|
||||
return in;
|
||||
if (static_cast<size_type>(in.gcount()) != Fileheader::size)
|
||||
{
|
||||
in.setstate(std::ios::failbit);
|
||||
return in;
|
||||
}
|
||||
|
||||
size_type magicNumber = fromLittleEndian(reinterpret_cast<const size_type*>(header));
|
||||
if (magicNumber != Fileheader::zimMagic)
|
||||
{
|
||||
log_error("invalid magic number " << magicNumber << " found - "
|
||||
<< Fileheader::zimMagic << " expected");
|
||||
in.setstate(std::ios::failbit);
|
||||
return in;
|
||||
}
|
||||
|
||||
uint16_t version = fromLittleEndian(reinterpret_cast<const uint16_t*>(header + 4));
|
||||
if (version != static_cast<size_type>(Fileheader::zimVersion))
|
||||
{
|
||||
log_error("invalid zimfile version " << version << " found - "
|
||||
<< Fileheader::zimVersion << " expected");
|
||||
in.setstate(std::ios::failbit);
|
||||
return in;
|
||||
}
|
||||
|
||||
Uuid uuid;
|
||||
std::copy(header + 8, header + 24, uuid.data);
|
||||
size_type articleCount = fromLittleEndian(reinterpret_cast<const size_type*>(header + 24));
|
||||
size_type clusterCount = fromLittleEndian(reinterpret_cast<const size_type*>(header + 28));
|
||||
offset_type urlPtrPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 32));
|
||||
offset_type titleIdxPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 40));
|
||||
offset_type clusterPtrPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 48));
|
||||
offset_type mimeListPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 56));
|
||||
size_type mainPage = fromLittleEndian(reinterpret_cast<const size_type*>(header + 64));
|
||||
size_type layoutPage = fromLittleEndian(reinterpret_cast<const size_type*>(header + 68));
|
||||
offset_type checksumPos = fromLittleEndian(reinterpret_cast<const offset_type*>(header + 72));
|
||||
|
||||
fh.setUuid(uuid);
|
||||
fh.setArticleCount(articleCount);
|
||||
fh.setClusterCount(clusterCount);
|
||||
fh.setUrlPtrPos(urlPtrPos);
|
||||
fh.setTitleIdxPos(titleIdxPos);
|
||||
fh.setClusterPtrPos(clusterPtrPos);
|
||||
fh.setMimeListPos(mimeListPos);
|
||||
fh.setMainPage(mainPage);
|
||||
fh.setLayoutPage(layoutPage);
|
||||
fh.setChecksumPos(checksumPos);
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,360 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2006,2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/fileimpl.h>
|
||||
#include <zim/error.h>
|
||||
#include <zim/dirent.h>
|
||||
#include <zim/endian.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sstream>
|
||||
#include <errno.h>
|
||||
#include <cstring>
|
||||
#include "config.h"
|
||||
#include "log.h"
|
||||
#include "envvalue.h"
|
||||
|
||||
#ifdef WITH_CXXTOOLS
|
||||
# include <cxxtools/systemerror.h>
|
||||
# include <cxxtools/md5stream.h>
|
||||
#else
|
||||
# include "md5stream.h"
|
||||
#endif
|
||||
|
||||
log_define("zim.file.impl")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// FileImpl
|
||||
//
|
||||
FileImpl::FileImpl(const char* fname)
|
||||
: zimFile(fname),
|
||||
direntCache(envValue("ZIM_DIRENTCACHE", DIRENT_CACHE_SIZE)),
|
||||
clusterCache(envValue("ZIM_CLUSTERCACHE", CLUSTER_CACHE_SIZE))
|
||||
{
|
||||
log_trace("read file \"" << fname << '"');
|
||||
|
||||
if (!zimFile)
|
||||
throw ZimFileFormatError(std::string("can't open zim-file \"") + fname + '"');
|
||||
|
||||
filename = fname;
|
||||
|
||||
// read header
|
||||
zimFile >> header;
|
||||
if (zimFile.fail())
|
||||
throw ZimFileFormatError("error reading zim-file header");
|
||||
|
||||
if (getCountClusters() == 0)
|
||||
log_warn("no clusters found");
|
||||
else
|
||||
{
|
||||
offset_type lastOffset = getClusterOffset(getCountClusters() - 1);
|
||||
log_debug("last offset=" << lastOffset << " file size=" << zimFile.fsize());
|
||||
if (lastOffset > static_cast<offset_type>(zimFile.fsize()))
|
||||
{
|
||||
log_fatal("last offset (" << lastOffset << ") larger than file size (" << zimFile.fsize() << ')');
|
||||
throw ZimFileFormatError("last cluster offset larger than file size; file corrupt");
|
||||
}
|
||||
}
|
||||
|
||||
// read mime types
|
||||
zimFile.seekg(header.getMimeListPos());
|
||||
std::string mimeType;
|
||||
while (true)
|
||||
{
|
||||
std::getline(zimFile, mimeType, '\0');
|
||||
|
||||
if (zimFile.fail())
|
||||
throw ZimFileFormatError("error reading mime type list");
|
||||
|
||||
if (mimeType.empty())
|
||||
break;
|
||||
|
||||
mimeTypes.push_back(mimeType);;
|
||||
}
|
||||
}
|
||||
|
||||
Dirent FileImpl::getDirent(size_type idx)
|
||||
{
|
||||
log_trace("FileImpl::getDirent(" << idx << ')');
|
||||
|
||||
zimFile.setBufsize(64);
|
||||
|
||||
if (idx >= getCountArticles())
|
||||
throw ZimFileFormatError("article index out of range");
|
||||
|
||||
if (!zimFile)
|
||||
{
|
||||
log_warn("file in error state");
|
||||
throw ZimFileFormatError("file in error state");
|
||||
}
|
||||
|
||||
std::pair<bool, Dirent> v = direntCache.getx(idx);
|
||||
if (v.first)
|
||||
{
|
||||
log_debug("dirent " << idx << " found in cache; hits " << direntCache.getHits() << " misses " << direntCache.getMisses() << " ratio " << direntCache.hitRatio() * 100 << "% fillfactor " << direntCache.fillfactor());
|
||||
return v.second;
|
||||
}
|
||||
|
||||
log_debug("dirent " << idx << " not found in cache; hits " << direntCache.getHits() << " misses " << direntCache.getMisses() << " ratio " << direntCache.hitRatio() * 100 << "% fillfactor " << direntCache.fillfactor());
|
||||
|
||||
offset_type indexOffset = getOffset(header.getUrlPtrPos(), idx);
|
||||
|
||||
zimFile.seekg(indexOffset);
|
||||
if (!zimFile)
|
||||
{
|
||||
log_warn("failed to seek to directory entry");
|
||||
throw ZimFileFormatError("failed to seek to directory entry");
|
||||
}
|
||||
|
||||
Dirent dirent;
|
||||
zimFile >> dirent;
|
||||
|
||||
if (!zimFile)
|
||||
{
|
||||
log_warn("failed to read to directory entry");
|
||||
throw ZimFileFormatError("failed to read directory entry");
|
||||
}
|
||||
|
||||
log_debug("dirent read from " << indexOffset);
|
||||
direntCache.put(idx, dirent);
|
||||
|
||||
return dirent;
|
||||
}
|
||||
|
||||
Dirent FileImpl::getDirentByTitle(size_type idx)
|
||||
{
|
||||
if (idx >= getCountArticles())
|
||||
throw ZimFileFormatError("article index out of range");
|
||||
return getDirent(getIndexByTitle(idx));
|
||||
}
|
||||
|
||||
size_type FileImpl::getIndexByTitle(size_type idx)
|
||||
{
|
||||
if (idx >= getCountArticles())
|
||||
throw ZimFileFormatError("article index out of range");
|
||||
|
||||
zimFile.seekg(header.getTitleIdxPos() + sizeof(size_type) * idx);
|
||||
size_type ret;
|
||||
zimFile.read(reinterpret_cast<char*>(&ret), sizeof(size_type));
|
||||
|
||||
if (!zimFile)
|
||||
throw ZimFileFormatError("error reading title index");
|
||||
|
||||
if (isBigEndian())
|
||||
ret = fromLittleEndian(&ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
Cluster FileImpl::getCluster(size_type idx)
|
||||
{
|
||||
log_trace("getCluster(" << idx << ')');
|
||||
|
||||
if (idx >= getCountClusters())
|
||||
throw ZimFileFormatError("cluster index out of range");
|
||||
|
||||
Cluster cluster = clusterCache.get(idx);
|
||||
if (cluster)
|
||||
{
|
||||
log_debug("cluster " << idx << " found in cache; hits " << clusterCache.getHits() << " misses " << clusterCache.getMisses() << " ratio " << clusterCache.hitRatio() * 100 << "% fillfactor " << clusterCache.fillfactor());
|
||||
return cluster;
|
||||
}
|
||||
|
||||
zimFile.setBufsize(16384);
|
||||
|
||||
offset_type clusterOffset = getClusterOffset(idx);
|
||||
log_debug("read cluster " << idx << " from offset " << clusterOffset);
|
||||
zimFile.seekg(clusterOffset);
|
||||
zimFile >> cluster;
|
||||
|
||||
if (zimFile.fail())
|
||||
throw ZimFileFormatError("error reading cluster data");
|
||||
|
||||
if (cluster.isCompressed())
|
||||
{
|
||||
log_debug("put cluster " << idx << " into cluster cache; hits " << clusterCache.getHits() << " misses " << clusterCache.getMisses() << " ratio " << clusterCache.hitRatio() * 100 << "% fillfactor " << clusterCache.fillfactor());
|
||||
clusterCache.put(idx, cluster);
|
||||
}
|
||||
else
|
||||
log_debug("cluster " << idx << " is not compressed - do not cache");
|
||||
|
||||
return cluster;
|
||||
}
|
||||
|
||||
offset_type FileImpl::getOffset(offset_type ptrOffset, size_type idx)
|
||||
{
|
||||
zimFile.seekg(ptrOffset + sizeof(offset_type) * idx);
|
||||
offset_type offset;
|
||||
zimFile.read(reinterpret_cast<char*>(&offset), sizeof(offset_type));
|
||||
|
||||
if (!zimFile)
|
||||
throw ZimFileFormatError("error reading offset");
|
||||
|
||||
if (isBigEndian())
|
||||
offset = fromLittleEndian(&offset);
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
size_type FileImpl::getNamespaceBeginOffset(char ch)
|
||||
{
|
||||
log_trace("getNamespaceBeginOffset(" << ch << ')');
|
||||
|
||||
NamespaceCache::const_iterator it = namespaceBeginCache.find(ch);
|
||||
if (it != namespaceBeginCache.end())
|
||||
return it->second;
|
||||
|
||||
size_type lower = 0;
|
||||
size_type upper = getCountArticles();
|
||||
Dirent d = getDirent(0);
|
||||
while (upper - lower > 1)
|
||||
{
|
||||
size_type m = lower + (upper - lower) / 2;
|
||||
Dirent d = getDirent(m);
|
||||
if (d.getNamespace() >= ch)
|
||||
upper = m;
|
||||
else
|
||||
lower = m;
|
||||
}
|
||||
|
||||
size_type ret = d.getNamespace() < ch ? upper : lower;
|
||||
namespaceBeginCache[ch] = ret;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_type FileImpl::getNamespaceEndOffset(char ch)
|
||||
{
|
||||
log_trace("getNamespaceEndOffset(" << ch << ')');
|
||||
|
||||
NamespaceCache::const_iterator it = namespaceEndCache.find(ch);
|
||||
if (it != namespaceEndCache.end())
|
||||
return it->second;
|
||||
|
||||
size_type lower = 0;
|
||||
size_type upper = getCountArticles();
|
||||
log_debug("namespace " << ch << " lower=" << lower << " upper=" << upper);
|
||||
while (upper - lower > 1)
|
||||
{
|
||||
size_type m = lower + (upper - lower) / 2;
|
||||
Dirent d = getDirent(m);
|
||||
if (d.getNamespace() > ch)
|
||||
upper = m;
|
||||
else
|
||||
lower = m;
|
||||
log_debug("namespace " << d.getNamespace() << " m=" << m << " lower=" << lower << " upper=" << upper);
|
||||
}
|
||||
|
||||
namespaceEndCache[ch] = upper;
|
||||
|
||||
return upper;
|
||||
|
||||
}
|
||||
|
||||
std::string FileImpl::getNamespaces()
|
||||
{
|
||||
if (namespaces.empty())
|
||||
{
|
||||
Dirent d = getDirent(0);
|
||||
namespaces = d.getNamespace();
|
||||
|
||||
size_type idx;
|
||||
while ((idx = getNamespaceEndOffset(d.getNamespace())) < getCountArticles())
|
||||
{
|
||||
d = getDirent(idx);
|
||||
namespaces += d.getNamespace();
|
||||
}
|
||||
|
||||
}
|
||||
return namespaces;
|
||||
}
|
||||
|
||||
const std::string& FileImpl::getMimeType(uint16_t idx) const
|
||||
{
|
||||
if (idx > mimeTypes.size())
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "unknown mime type code " << idx;
|
||||
throw std::runtime_error(msg.str());
|
||||
}
|
||||
|
||||
return mimeTypes[idx];
|
||||
}
|
||||
|
||||
std::string FileImpl::getChecksum()
|
||||
{
|
||||
if (!header.hasChecksum())
|
||||
return std::string();
|
||||
|
||||
zimFile.seekg(header.getChecksumPos());
|
||||
unsigned char chksum[16];
|
||||
zimFile.read(reinterpret_cast<char*>(chksum), 16);
|
||||
if (!zimFile)
|
||||
{
|
||||
log_warn("error reading checksum");
|
||||
return std::string();
|
||||
}
|
||||
|
||||
char hexdigest[33];
|
||||
hexdigest[32] = '\0';
|
||||
static const char hex[] = "0123456789abcdef";
|
||||
char* p = hexdigest;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
{
|
||||
*p++ = hex[chksum[i] >> 4];
|
||||
*p++ = hex[chksum[i] & 0xf];
|
||||
}
|
||||
log_debug("chksum=" << hexdigest);
|
||||
return hexdigest;
|
||||
}
|
||||
|
||||
bool FileImpl::verify()
|
||||
{
|
||||
if (!header.hasChecksum())
|
||||
return false;
|
||||
|
||||
#ifdef WITH_CXXTOOLS
|
||||
cxxtools::Md5stream md5;
|
||||
#else
|
||||
Md5stream md5;
|
||||
#endif
|
||||
|
||||
zimFile.seekg(0);
|
||||
char ch;
|
||||
for (offset_type n = 0; n < header.getChecksumPos() && zimFile.get(ch); ++n)
|
||||
md5 << ch;
|
||||
|
||||
unsigned char chksumFile[16];
|
||||
unsigned char chksumCalc[16];
|
||||
|
||||
zimFile.read(reinterpret_cast<char*>(chksumFile), 16);
|
||||
|
||||
if (!zimFile)
|
||||
throw ZimFileFormatError("failed to read checksum from zim file");
|
||||
|
||||
md5.getDigest(chksumCalc);
|
||||
if (std::memcmp(chksumFile, chksumCalc, 16) != 0)
|
||||
throw ZimFileFormatError("invalid checksum in zim file");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,323 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2010 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/fstream.h>
|
||||
#include "log.h"
|
||||
#include "config.h"
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#ifdef WITH_CXXTOOLS
|
||||
#include <cxxtools/systemerror.h>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <io.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifndef O_LARGEFILE
|
||||
#define O_LARGEFILE 0
|
||||
#endif
|
||||
|
||||
#ifndef O_BINARY
|
||||
#define O_BINARY 0
|
||||
#endif
|
||||
|
||||
log_define("zim.fstream")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class FileNotFound : public std::runtime_error
|
||||
{
|
||||
public:
|
||||
FileNotFound()
|
||||
: std::runtime_error("file not found")
|
||||
{ }
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// OpenfileInfo
|
||||
//
|
||||
streambuf::OpenfileInfo::OpenfileInfo(const std::string& fname_)
|
||||
: fname(fname_),
|
||||
#ifdef HAVE_OPEN64
|
||||
fd(::open64(fname.c_str(), O_RDONLY | O_LARGEFILE | O_BINARY))
|
||||
#else
|
||||
fd(::open(fname.c_str(), O_RDONLY | O_LARGEFILE | O_BINARY))
|
||||
#endif
|
||||
{
|
||||
if (fd < 0)
|
||||
throw FileNotFound();
|
||||
}
|
||||
|
||||
streambuf::OpenfileInfo::~OpenfileInfo()
|
||||
{
|
||||
::close(fd);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// FileInfo
|
||||
//
|
||||
streambuf::FileInfo::FileInfo(const std::string& fname_, int fd)
|
||||
: fname(fname_)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
__int64 ret = ::_lseeki64(fd, 0, SEEK_END);
|
||||
#elif defined(HAVE_LSEEK64)
|
||||
off64_t ret = ::lseek64(fd, 0, SEEK_END);
|
||||
#else
|
||||
off_t ret = ::lseek(fd, 0, SEEK_END);
|
||||
#endif
|
||||
if (ret < 0)
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "error " << errno << " seeking to end in file " << fname << ": " << strerror(errno);
|
||||
throw std::runtime_error(msg.str());
|
||||
}
|
||||
|
||||
fsize = static_cast<zim::offset_type>(ret);
|
||||
}
|
||||
|
||||
std::streambuf::int_type streambuf::overflow(std::streambuf::int_type ch)
|
||||
{
|
||||
return traits_type::eof();
|
||||
}
|
||||
|
||||
std::streambuf::int_type streambuf::underflow()
|
||||
{
|
||||
log_debug("underflow; bufsize=" << buffer.size());
|
||||
|
||||
int n;
|
||||
do
|
||||
{
|
||||
n = ::read(currentFile->fd, &buffer[0], buffer.size());
|
||||
if (n < 0)
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "error " << errno << " reading from file: " << strerror(errno);
|
||||
throw std::runtime_error(msg.str());
|
||||
}
|
||||
else if (n == 0)
|
||||
{
|
||||
FilesType::iterator it;
|
||||
for (it = files.begin(); it != files.end(); ++it)
|
||||
{
|
||||
if ((*it)->fname == currentFile->fname)
|
||||
{
|
||||
++it;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (it == files.end())
|
||||
return traits_type::eof();
|
||||
|
||||
setCurrentFile((*it)->fname, 0);
|
||||
}
|
||||
} while (n == 0);
|
||||
|
||||
char* p = &buffer[0];
|
||||
setg(p, p, p + n);
|
||||
return traits_type::to_int_type(*gptr());
|
||||
}
|
||||
|
||||
int streambuf::sync()
|
||||
{
|
||||
return traits_type::eof();
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void parseFilelist(const std::string& list, std::vector<std::string>& out)
|
||||
{
|
||||
enum {
|
||||
state_0,
|
||||
state_t,
|
||||
state_e
|
||||
} state = state_0;
|
||||
|
||||
for (std::string::const_iterator it = list.begin(); it != list.end(); ++it)
|
||||
{
|
||||
switch (state)
|
||||
{
|
||||
case state_0:
|
||||
out.push_back(std::string(1, *it));
|
||||
state = state_t;
|
||||
break;
|
||||
|
||||
case state_t:
|
||||
if (*it == ':')
|
||||
out.push_back(std::string(1, *it));
|
||||
else if (*it == '\\')
|
||||
state = state_e;
|
||||
else
|
||||
out.back() += *it;
|
||||
break;
|
||||
|
||||
case state_e:
|
||||
out.back() += *it;
|
||||
state = state_t;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
streambuf::streambuf(const std::string& fname, unsigned bufsize, unsigned noOpenFiles)
|
||||
: buffer(bufsize),
|
||||
openFilesCache(noOpenFiles),
|
||||
mtime(0)
|
||||
{
|
||||
log_debug("streambuf for " << fname << " with " << bufsize << " bytes");
|
||||
|
||||
try
|
||||
{
|
||||
currentFile = new OpenfileInfo(fname);
|
||||
files.push_back(new FileInfo(fname, currentFile->fd));
|
||||
openFilesCache.put(fname, currentFile);
|
||||
}
|
||||
catch (const FileNotFound&)
|
||||
{
|
||||
int errnoSave = errno;
|
||||
try
|
||||
{
|
||||
for (char ch0 = 'a'; ch0 <= 'z'; ++ch0)
|
||||
{
|
||||
std::string fname0 = fname + ch0;
|
||||
for (char ch1 = 'a'; ch1 <= 'z'; ++ch1)
|
||||
{
|
||||
std::string fname1 = fname0 + ch1;
|
||||
|
||||
currentFile = new OpenfileInfo(fname1);
|
||||
files.push_back(new FileInfo(fname1, currentFile->fd));
|
||||
|
||||
openFilesCache.put(fname1, currentFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (const FileNotFound&)
|
||||
{
|
||||
if (files.empty())
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "error " << errnoSave << " opening file \"" << fname << "\": " << strerror(errnoSave);
|
||||
throw std::runtime_error(msg.str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setCurrentFile((*files.begin())->fname, 0);
|
||||
}
|
||||
|
||||
void streambuf::setCurrentFile(const std::string& fname, zim::offset_type off)
|
||||
{
|
||||
std::pair<bool, OpenfileInfoPtr> f = openFilesCache.getx(fname);
|
||||
if (f.first)
|
||||
{
|
||||
currentFile = f.second;
|
||||
}
|
||||
else
|
||||
{
|
||||
// file not found in cache
|
||||
currentFile = new OpenfileInfo(fname);
|
||||
openFilesCache.put(fname, currentFile);
|
||||
}
|
||||
|
||||
if (f.first || off != 0) // found in cache or seek requested
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
offset_type ret = ::_lseeki64(currentFile->fd, off, SEEK_SET);
|
||||
#elif defined(HAVE_LSEEK64)
|
||||
off64_t ret = ::lseek64(currentFile->fd, off, SEEK_SET);
|
||||
#else
|
||||
off_t ret = ::lseek(currentFile->fd, off, SEEK_SET);
|
||||
#endif
|
||||
if (ret < 0)
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "error " << errno << " seeking to "<< off << " in file " << fname << ": " << strerror(errno);
|
||||
throw std::runtime_error(msg.str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void streambuf::seekg(zim::offset_type off)
|
||||
{
|
||||
setg(0, 0, 0);
|
||||
currentPos = off;
|
||||
|
||||
zim::offset_type o = off;
|
||||
FilesType::iterator it;
|
||||
for (it = files.begin(); it != files.end() && (*it)->fsize < o; ++it)
|
||||
o -= (*it)->fsize;
|
||||
|
||||
if (it == files.end())
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "error seeking to "<< off;
|
||||
throw std::runtime_error(msg.str());
|
||||
}
|
||||
|
||||
setCurrentFile((*it)->fname, o);
|
||||
}
|
||||
|
||||
zim::offset_type streambuf::fsize() const
|
||||
{
|
||||
zim::offset_type o = 0;
|
||||
for (FilesType::const_iterator it = files.begin(); it != files.end(); ++it)
|
||||
o += (*it)->fsize;
|
||||
return o;
|
||||
}
|
||||
|
||||
time_t streambuf::getMTime() const
|
||||
{
|
||||
if (mtime || files.empty())
|
||||
return mtime;
|
||||
|
||||
const char* fname = files.front()->fname.c_str();
|
||||
|
||||
#ifdef HAVE_STAT64
|
||||
struct stat64 st;
|
||||
int ret = ::stat64(fname, &st);
|
||||
#else
|
||||
struct stat st;
|
||||
int ret = ::stat(fname, &st);
|
||||
#endif
|
||||
if (ret != 0)
|
||||
#ifdef WITH_CXXTOOLS
|
||||
throw cxxtools::SystemError("stat");
|
||||
#else
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "stat failed with errno " << errno << " : " << strerror(errno);
|
||||
throw std::runtime_error(msg.str());
|
||||
}
|
||||
#endif
|
||||
mtime = st.st_mtime;
|
||||
|
||||
return mtime;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,165 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2007 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/indexarticle.h>
|
||||
#include <zim/zintstream.h>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include "log.h"
|
||||
#include "ptrstream.h"
|
||||
|
||||
log_define("zim.indexarticle")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
bool IndexArticle::noOffset = false;
|
||||
|
||||
void IndexArticle::readEntries()
|
||||
{
|
||||
if (!good() || categoriesRead)
|
||||
return;
|
||||
|
||||
log_debug("read entries for article " << getUrl());
|
||||
|
||||
if (getParameter().empty())
|
||||
readEntriesB();
|
||||
else
|
||||
readEntriesZ();
|
||||
|
||||
categoriesRead = true;
|
||||
}
|
||||
|
||||
void IndexArticle::readEntriesZ()
|
||||
{
|
||||
std::istringstream s(getParameter());
|
||||
zim::ZIntStream extra(s);
|
||||
|
||||
unsigned flagfield; // field with one bit (bits 0-3) for each cateogry
|
||||
extra.get(flagfield);
|
||||
|
||||
log_debug("flags: h" << std::hex << flagfield);
|
||||
|
||||
unsigned offset = 0;
|
||||
for (unsigned c = 0; c <= 3; ++c)
|
||||
{
|
||||
bool catNotEmpty = (flagfield & 1);
|
||||
flagfield >>= 1;
|
||||
|
||||
if (catNotEmpty)
|
||||
{
|
||||
log_debug("read category " << c);
|
||||
|
||||
unsigned len;
|
||||
Entry entry;
|
||||
bool s = extra.get(len) && extra.get(entry.index);
|
||||
if (s && getNamespace() == 'X')
|
||||
s = extra.get(entry.pos);
|
||||
else
|
||||
entry.pos = 0;
|
||||
|
||||
unsigned pos = entry.pos;
|
||||
|
||||
if (!s)
|
||||
throw std::runtime_error("invalid index entry");
|
||||
|
||||
log_debug("first index " << entry.index << " pos " << entry.pos);
|
||||
entries[c].push_back(entry);
|
||||
|
||||
log_debug("read data from offset " << offset << " len " << len);
|
||||
zim::Blob b = getData();
|
||||
ptrstream data(const_cast<char*>(b.data() + offset), const_cast<char*>(b.data() + offset + len));
|
||||
ZIntStream zdata(data);
|
||||
|
||||
unsigned index;
|
||||
unsigned indexOffset = 0;
|
||||
while (zdata.get(index))
|
||||
{
|
||||
entry.index = indexOffset + index;
|
||||
|
||||
if (!noOffset)
|
||||
indexOffset += index;
|
||||
|
||||
if (getNamespace() == 'X')
|
||||
{
|
||||
unsigned p;
|
||||
if (!zdata.get(p))
|
||||
throw std::runtime_error("invalid index entry");
|
||||
pos += p;
|
||||
entry.pos = p;
|
||||
}
|
||||
else
|
||||
entry.pos = 0;
|
||||
|
||||
log_debug("index " << entry.index << " pos " << entry.pos);
|
||||
|
||||
entries[c].push_back(entry);
|
||||
}
|
||||
|
||||
offset += len;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
class Eof { };
|
||||
|
||||
zim::size_type getSizeValue(std::istream& in)
|
||||
{
|
||||
zim::size_type ret;
|
||||
in.read(reinterpret_cast<char*>(&ret), sizeof(zim::size_type));
|
||||
if (!in)
|
||||
throw Eof();
|
||||
ret = fromLittleEndian<zim::size_type>(&ret);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
void IndexArticle::readEntriesB()
|
||||
{
|
||||
try
|
||||
{
|
||||
zim::size_type categoryCount[4];
|
||||
zim::Blob b = getData();
|
||||
ptrstream data(const_cast<char*>(b.data()), const_cast<char*>(b.end()));
|
||||
for (unsigned c = 0; c < 4; ++c)
|
||||
categoryCount[c] = getSizeValue(data);
|
||||
|
||||
for (unsigned c = 0; c < 4; ++c)
|
||||
{
|
||||
log_debug("read " << categoryCount[c] << " entries for category " << c);
|
||||
for (unsigned n = 0; n < categoryCount[c]; ++n)
|
||||
{
|
||||
Entry entry;
|
||||
entry.index = getSizeValue(data);
|
||||
if (getNamespace() == 'X')
|
||||
entry.pos = getNamespace() ? getSizeValue(data) : 0;
|
||||
entries[c].push_back(entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (const Eof&)
|
||||
{
|
||||
log_error("end of file when reading index entries for article " << getTitle());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,36 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#ifdef WITH_CXXTOOLS
|
||||
|
||||
#include <cxxtools/log.h>
|
||||
|
||||
#else
|
||||
|
||||
#define log_define(e)
|
||||
#define log_fatal(e)
|
||||
#define log_error(e)
|
||||
#define log_warn(e)
|
||||
#define log_info(e)
|
||||
#define log_debug(e)
|
||||
#define log_trace(e)
|
||||
|
||||
#endif
|
||||
@ -1,182 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/lzmastream.h>
|
||||
#include <zim/zim.h>
|
||||
#include "log.h"
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
|
||||
log_define("zim.lzma.compress")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
namespace
|
||||
{
|
||||
lzma_ret checkError(lzma_ret ret)
|
||||
{
|
||||
if (ret != LZMA_OK && ret != LZMA_STREAM_END)
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "lzma-error " << ret;
|
||||
switch (ret)
|
||||
{
|
||||
case LZMA_OK: msg << ": LZMA_OK"; break;
|
||||
case LZMA_STREAM_END: msg << ": LZMA_STREAM_END"; break;
|
||||
case LZMA_NO_CHECK: msg << ": LZMA_NO_CHECK"; break;
|
||||
case LZMA_UNSUPPORTED_CHECK: msg << ": LZMA_UNSUPPORTED_CHECK"; break;
|
||||
case LZMA_GET_CHECK: msg << ": LZMA_GET_CHECK"; break;
|
||||
case LZMA_MEM_ERROR: msg << ": LZMA_MEM_ERROR"; break;
|
||||
case LZMA_MEMLIMIT_ERROR: msg << ": LZMA_MEMLIMIT_ERROR"; break;
|
||||
case LZMA_FORMAT_ERROR: msg << ": LZMA_FORMAT_ERROR"; break;
|
||||
case LZMA_OPTIONS_ERROR: msg << ": LZMA_OPTIONS_ERROR"; break;
|
||||
case LZMA_DATA_ERROR: msg << ": LZMA_DATA_ERROR"; break;
|
||||
case LZMA_BUF_ERROR: msg << ": LZMA_BUF_ERROR"; break;
|
||||
case LZMA_PROG_ERROR: msg << ": LZMA_PROG_ERROR"; break;
|
||||
}
|
||||
log_error(msg.str());
|
||||
throw LzmaError(ret, msg.str());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
LzmaStreamBuf::LzmaStreamBuf(std::streambuf* sink_, uint32_t preset, lzma_check check, unsigned bufsize_)
|
||||
: obuffer(bufsize_),
|
||||
sink(sink_)
|
||||
{
|
||||
std::memset(reinterpret_cast<void*>(&stream), 0, sizeof(stream));
|
||||
|
||||
checkError(
|
||||
::lzma_easy_encoder(&stream, preset, check));
|
||||
|
||||
setp(&obuffer[0], &obuffer[0] + obuffer.size());
|
||||
}
|
||||
|
||||
LzmaStreamBuf::~LzmaStreamBuf()
|
||||
{
|
||||
::lzma_end(&stream);
|
||||
}
|
||||
|
||||
LzmaStreamBuf::int_type LzmaStreamBuf::overflow(int_type c)
|
||||
{
|
||||
// initialize input-stream
|
||||
stream.next_in = reinterpret_cast<const uint8_t*>(&obuffer[0]);
|
||||
stream.avail_in = pptr() - &obuffer[0];
|
||||
|
||||
// initialize zbuffer for compressed data
|
||||
char zbuffer[8192];
|
||||
stream.next_out = reinterpret_cast<uint8_t*>(zbuffer);
|
||||
stream.avail_out = sizeof(zbuffer);
|
||||
|
||||
// compress
|
||||
checkError(::lzma_code(&stream, LZMA_RUN));
|
||||
|
||||
// copy zbuffer to sink / consume deflated data
|
||||
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
|
||||
if (count > 0)
|
||||
{
|
||||
std::streamsize n = sink->sputn(zbuffer, count);
|
||||
if (n < count)
|
||||
return traits_type::eof();
|
||||
}
|
||||
|
||||
// move remaining characters to start of obuffer
|
||||
if (stream.avail_in > 0)
|
||||
memmove(&obuffer[0], stream.next_in, stream.avail_in);
|
||||
|
||||
// reset outbuffer
|
||||
setp(&obuffer[0] + stream.avail_in, &obuffer[0] + obuffer.size());
|
||||
if (c != traits_type::eof())
|
||||
sputc(traits_type::to_char_type(c));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
LzmaStreamBuf::int_type LzmaStreamBuf::underflow()
|
||||
{
|
||||
return traits_type::eof();
|
||||
}
|
||||
|
||||
int LzmaStreamBuf::sync()
|
||||
{
|
||||
// initialize input-stream for
|
||||
stream.next_in = reinterpret_cast<const uint8_t*>(&obuffer[0]);
|
||||
stream.avail_in = pptr() - &obuffer[0];
|
||||
char zbuffer[8192];
|
||||
while (stream.avail_in > 0)
|
||||
{
|
||||
// initialize zbuffer
|
||||
stream.next_out = (uint8_t*)zbuffer;
|
||||
stream.avail_out = sizeof(zbuffer);
|
||||
|
||||
checkError(::lzma_code(&stream, LZMA_FINISH));
|
||||
|
||||
// copy zbuffer to sink
|
||||
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
|
||||
if (count > 0)
|
||||
{
|
||||
std::streamsize n = sink->sputn(zbuffer, count);
|
||||
if (n < count)
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
// reset outbuffer
|
||||
setp(&obuffer[0], &obuffer[0] + obuffer.size());
|
||||
return 0;
|
||||
}
|
||||
|
||||
int LzmaStreamBuf::end()
|
||||
{
|
||||
char zbuffer[8192];
|
||||
// initialize input-stream for
|
||||
stream.next_in = reinterpret_cast<const uint8_t*>(&obuffer[0]);
|
||||
stream.avail_in = pptr() - &obuffer[0];
|
||||
lzma_ret ret;
|
||||
do
|
||||
{
|
||||
// initialize zbuffer
|
||||
stream.next_out = (uint8_t*)zbuffer;
|
||||
stream.avail_out = sizeof(zbuffer);
|
||||
|
||||
ret = checkError(::lzma_code(&stream, LZMA_FINISH));
|
||||
|
||||
// copy zbuffer to sink
|
||||
std::streamsize count = sizeof(zbuffer) - stream.avail_out;
|
||||
if (count > 0)
|
||||
{
|
||||
std::streamsize n = sink->sputn(zbuffer, count);
|
||||
if (n < count)
|
||||
throw LzmaError(static_cast<lzma_ret>(0), "failed to send compressed data to sink in lzmastream");
|
||||
}
|
||||
} while (ret != LZMA_STREAM_END);
|
||||
|
||||
// reset outbuffer
|
||||
setp(&obuffer[0], &obuffer[0] + obuffer.size());
|
||||
return 0;
|
||||
}
|
||||
|
||||
void LzmaStream::end()
|
||||
{
|
||||
if (streambuf.end() != 0)
|
||||
setstate(failbit);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,340 +0,0 @@
|
||||
/* MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
|
||||
*/
|
||||
|
||||
/* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
|
||||
rights reserved.
|
||||
|
||||
License to copy and use this software is granted provided that it
|
||||
is identified as the "RSA Data Security, Inc. MD5 Message-Digest
|
||||
Algorithm" in all material mentioning or referencing this software
|
||||
or this function.
|
||||
|
||||
License is also granted to make and use derivative works provided
|
||||
that such works are identified as "derived from the RSA Data
|
||||
Security, Inc. MD5 Message-Digest Algorithm" in all material
|
||||
mentioning or referencing the derived work.
|
||||
|
||||
RSA Data Security, Inc. makes no representations concerning either
|
||||
the merchantability of this software or the suitability of this
|
||||
software for any particular purpose. It is provided "as is"
|
||||
without express or implied warranty of any kind.
|
||||
|
||||
These notices must be retained in any copies of any part of this
|
||||
documentation and/or software.
|
||||
*/
|
||||
|
||||
#include "md5.h"
|
||||
#include <string.h>
|
||||
|
||||
#define MD5_CTX zim_MD5_CTX
|
||||
|
||||
/* Constants for MD5Transform routine.
|
||||
*/
|
||||
#define S11 7
|
||||
#define S12 12
|
||||
#define S13 17
|
||||
#define S14 22
|
||||
#define S21 5
|
||||
#define S22 9
|
||||
#define S23 14
|
||||
#define S24 20
|
||||
#define S31 4
|
||||
#define S32 11
|
||||
#define S33 16
|
||||
#define S34 23
|
||||
#define S41 6
|
||||
#define S42 10
|
||||
#define S43 15
|
||||
#define S44 21
|
||||
|
||||
static void MD5Transform PROTO_LIST ((UINT4 [4], const unsigned char [64]));
|
||||
static void Encode PROTO_LIST
|
||||
((unsigned char *, UINT4 *, unsigned int));
|
||||
static void Decode PROTO_LIST
|
||||
((UINT4 *, const unsigned char *, unsigned int));
|
||||
/*
|
||||
static void MD5_memcpy PROTO_LIST ((POINTER, POINTER, unsigned int));
|
||||
static void MD5_memset PROTO_LIST ((POINTER, int, unsigned int));
|
||||
*/
|
||||
#define MD5_memcpy memcpy
|
||||
#define MD5_memset memset
|
||||
|
||||
static unsigned char PADDING[64] = {
|
||||
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/* F, G, H and I are basic MD5 functions.
|
||||
*/
|
||||
#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
|
||||
#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
|
||||
#define H(x, y, z) ((x) ^ (y) ^ (z))
|
||||
#define I(x, y, z) ((y) ^ ((x) | (~z)))
|
||||
|
||||
/* ROTATE_LEFT rotates x left n bits.
|
||||
*/
|
||||
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
|
||||
|
||||
/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
|
||||
Rotation is separate from addition to prevent recomputation.
|
||||
*/
|
||||
#define FF(a, b, c, d, x, s, ac) { \
|
||||
(a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
|
||||
(a) = ROTATE_LEFT ((a), (s)); \
|
||||
(a) += (b); \
|
||||
}
|
||||
#define GG(a, b, c, d, x, s, ac) { \
|
||||
(a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
|
||||
(a) = ROTATE_LEFT ((a), (s)); \
|
||||
(a) += (b); \
|
||||
}
|
||||
#define HH(a, b, c, d, x, s, ac) { \
|
||||
(a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
|
||||
(a) = ROTATE_LEFT ((a), (s)); \
|
||||
(a) += (b); \
|
||||
}
|
||||
#define II(a, b, c, d, x, s, ac) { \
|
||||
(a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
|
||||
(a) = ROTATE_LEFT ((a), (s)); \
|
||||
(a) += (b); \
|
||||
}
|
||||
|
||||
/* MD5 initialization. Begins an MD5 operation, writing a new context.
|
||||
*/
|
||||
void zim_MD5Init (MD5_CTX* context)
|
||||
{
|
||||
context->count[0] = context->count[1] = 0;
|
||||
/* Load magic initialization constants.
|
||||
*/
|
||||
context->state[0] = 0x67452301;
|
||||
context->state[1] = 0xefcdab89;
|
||||
context->state[2] = 0x98badcfe;
|
||||
context->state[3] = 0x10325476;
|
||||
}
|
||||
|
||||
/* MD5 block update operation. Continues an MD5 message-digest
|
||||
operation, processing another message block, and updating the
|
||||
context.
|
||||
*/
|
||||
void zim_MD5Update (
|
||||
MD5_CTX *context,
|
||||
const unsigned char *input, /* input block */
|
||||
unsigned int inputLen) /* length of input block */
|
||||
{
|
||||
unsigned int i, index, partLen;
|
||||
|
||||
/* Compute number of bytes mod 64 */
|
||||
index = (unsigned int)((context->count[0] >> 3) & 0x3F);
|
||||
|
||||
/* Update number of bits */
|
||||
if ((context->count[0] += ((UINT4)inputLen << 3))
|
||||
< ((UINT4)inputLen << 3))
|
||||
context->count[1]++;
|
||||
context->count[1] += ((UINT4)inputLen >> 29);
|
||||
|
||||
partLen = 64 - index;
|
||||
|
||||
/* Transform as many times as possible.
|
||||
*/
|
||||
if (inputLen >= partLen) {
|
||||
MD5_memcpy
|
||||
((POINTER)&context->buffer[index], (POINTER)input, partLen);
|
||||
MD5Transform (context->state, context->buffer);
|
||||
|
||||
for (i = partLen; i + 63 < inputLen; i += 64)
|
||||
MD5Transform (context->state, &input[i]);
|
||||
|
||||
index = 0;
|
||||
}
|
||||
else
|
||||
i = 0;
|
||||
|
||||
/* Buffer remaining input */
|
||||
MD5_memcpy
|
||||
((POINTER)&context->buffer[index], (POINTER)&input[i],
|
||||
inputLen-i);
|
||||
}
|
||||
|
||||
/* MD5 finalization. Ends an MD5 message-digest operation, writing the
|
||||
the message digest and zeroizing the context.
|
||||
*/
|
||||
void zim_MD5Final (
|
||||
unsigned char digest[16], /* message digest */
|
||||
MD5_CTX *context) /* context */
|
||||
{
|
||||
unsigned char bits[8];
|
||||
unsigned int index, padLen;
|
||||
|
||||
/* Save number of bits */
|
||||
Encode (bits, context->count, 8);
|
||||
|
||||
/* Pad out to 56 mod 64.
|
||||
*/
|
||||
index = (unsigned int)((context->count[0] >> 3) & 0x3f);
|
||||
padLen = (index < 56) ? (56 - index) : (120 - index);
|
||||
zim_MD5Update (context, PADDING, padLen);
|
||||
|
||||
/* Append length (before padding) */
|
||||
zim_MD5Update (context, bits, 8);
|
||||
/* Store state in digest */
|
||||
Encode (digest, context->state, 16);
|
||||
|
||||
/* Zeroize sensitive information.
|
||||
*/
|
||||
MD5_memset ((POINTER)context, 0, sizeof (*context));
|
||||
}
|
||||
|
||||
/* MD5 basic transformation. Transforms state based on block.
|
||||
*/
|
||||
static void MD5Transform (
|
||||
UINT4 state[4],
|
||||
const unsigned char block[64])
|
||||
{
|
||||
UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
|
||||
|
||||
Decode (x, block, 64);
|
||||
|
||||
/* Round 1 */
|
||||
FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
|
||||
FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
|
||||
FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
|
||||
FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
|
||||
FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
|
||||
FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
|
||||
FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
|
||||
FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
|
||||
FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
|
||||
FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
|
||||
FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
|
||||
FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
|
||||
FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
|
||||
FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
|
||||
FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
|
||||
FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
|
||||
|
||||
/* Round 2 */
|
||||
GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
|
||||
GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
|
||||
GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
|
||||
GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
|
||||
GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
|
||||
GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
|
||||
GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
|
||||
GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
|
||||
GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
|
||||
GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
|
||||
GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
|
||||
GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
|
||||
GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
|
||||
GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
|
||||
GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
|
||||
GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
|
||||
|
||||
/* Round 3 */
|
||||
HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
|
||||
HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
|
||||
HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
|
||||
HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
|
||||
HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
|
||||
HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
|
||||
HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
|
||||
HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
|
||||
HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
|
||||
HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
|
||||
HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
|
||||
HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
|
||||
HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
|
||||
HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
|
||||
HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
|
||||
HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
|
||||
|
||||
/* Round 4 */
|
||||
II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
|
||||
II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
|
||||
II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
|
||||
II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
|
||||
II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
|
||||
II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
|
||||
II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
|
||||
II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
|
||||
II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
|
||||
II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
|
||||
II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
|
||||
II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
|
||||
II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
|
||||
II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
|
||||
II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
|
||||
II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
|
||||
|
||||
state[0] += a;
|
||||
state[1] += b;
|
||||
state[2] += c;
|
||||
state[3] += d;
|
||||
|
||||
/* Zeroize sensitive information.
|
||||
*/
|
||||
MD5_memset ((POINTER)x, 0, sizeof (x));
|
||||
}
|
||||
|
||||
/* Encodes input (UINT4) into output (unsigned char). Assumes len is
|
||||
a multiple of 4.
|
||||
*/
|
||||
static void Encode (
|
||||
unsigned char *output,
|
||||
UINT4 *input,
|
||||
unsigned int len)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0, j = 0; j < len; i++, j += 4) {
|
||||
output[j] = (unsigned char)(input[i] & 0xff);
|
||||
output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
|
||||
output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
|
||||
output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
|
||||
}
|
||||
}
|
||||
|
||||
/* Decodes input (unsigned char) into output (UINT4). Assumes len is
|
||||
a multiple of 4.
|
||||
*/
|
||||
static void Decode (
|
||||
UINT4 *output,
|
||||
const unsigned char *input,
|
||||
unsigned int len)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0, j = 0; j < len; i++, j += 4)
|
||||
output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) |
|
||||
(((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* Note: Replace "for loop" with standard memcpy if possible.
|
||||
*/
|
||||
|
||||
static void MD5_memcpy (
|
||||
POINTER output,
|
||||
POINTER input,
|
||||
unsigned int len)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
output[i] = input[i];
|
||||
}
|
||||
|
||||
/* Note: Replace "for loop" with standard memset if possible.
|
||||
*/
|
||||
static void MD5_memset (
|
||||
POINTER output,
|
||||
int value,
|
||||
unsigned int len)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
((char *)output)[i] = (char)value;
|
||||
}
|
||||
#endif
|
||||
@ -1,107 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2003 Tommi Maekitalo
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* As a special exception, you may use this file as part of a free
|
||||
* software library without restriction. Specifically, if other files
|
||||
* instantiate templates or use macros or inline functions from this
|
||||
* file, or you compile this file and link it with other files to
|
||||
* produce an executable, this file does not by itself cause the
|
||||
* resulting executable to be covered by the GNU General Public
|
||||
* License. This exception does not however invalidate any other
|
||||
* reasons why the executable file might be covered by the GNU Library
|
||||
* General Public License.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
|
||||
rights reserved.
|
||||
|
||||
License to copy and use this software is granted provided that it
|
||||
is identified as the "RSA Data Security, Inc. MD5 Message-Digest
|
||||
Algorithm" in all material mentioning or referencing this software
|
||||
or this function.
|
||||
|
||||
License is also granted to make and use derivative works provided
|
||||
that such works are identified as "derived from the RSA Data
|
||||
Security, Inc. MD5 Message-Digest Algorithm" in all material
|
||||
mentioning or referencing the derived work.
|
||||
|
||||
RSA Data Security, Inc. makes no representations concerning either
|
||||
the merchantability of this software or the suitability of this
|
||||
software for any particular purpose. It is provided "as is"
|
||||
without express or implied warranty of any kind.
|
||||
|
||||
These notices must be retained in any copies of any part of this
|
||||
documentation and/or software.
|
||||
*/
|
||||
|
||||
/* RSAREF types and constants
|
||||
*/
|
||||
|
||||
/* PROTOTYPES should be set to one if and only if the compiler supports
|
||||
function argument prototyping.
|
||||
The following makes PROTOTYPES default to 0 if it has not already
|
||||
been defined with C compiler flags.
|
||||
*/
|
||||
|
||||
#ifndef ZIM_MD5_H
|
||||
#define ZIM_MD5_H
|
||||
|
||||
#ifndef PROTOTYPES
|
||||
#define PROTOTYPES 1
|
||||
#endif
|
||||
|
||||
/* POINTER defines a generic pointer type */
|
||||
typedef unsigned char *POINTER;
|
||||
|
||||
/* UINT2 defines a two byte word */
|
||||
typedef unsigned short int UINT2;
|
||||
|
||||
/* UINT4 defines a four byte word */
|
||||
typedef unsigned int UINT4;
|
||||
|
||||
/* PROTO_LIST is defined depending on how PROTOTYPES is defined above.
|
||||
If using PROTOTYPES, then PROTO_LIST returns the list, otherwise it
|
||||
returns an empty list.
|
||||
*/
|
||||
|
||||
#if PROTOTYPES
|
||||
#define PROTO_LIST(list) list
|
||||
#else
|
||||
#define PROTO_LIST(list) ()
|
||||
#endif
|
||||
|
||||
/* MD5 context. */
|
||||
typedef struct {
|
||||
UINT4 state[4]; /* state (ABCD) */
|
||||
UINT4 count[2]; /* number of bits, modulo 2^64 (lsb first) */
|
||||
unsigned char buffer[64]; /* input buffer */
|
||||
} zim_MD5_CTX;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void zim_MD5Init PROTO_LIST ((zim_MD5_CTX *));
|
||||
void zim_MD5Update PROTO_LIST
|
||||
((zim_MD5_CTX *, const unsigned char *, unsigned int));
|
||||
void zim_MD5Final PROTO_LIST ((unsigned char [16], zim_MD5_CTX *));
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* ZIM_MD5_H */
|
||||
@ -1,134 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2003 Tommi Maekitalo
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* As a special exception, you may use this file as part of a free
|
||||
* software library without restriction. Specifically, if other files
|
||||
* instantiate templates or use macros or inline functions from this
|
||||
* file, or you compile this file and link it with other files to
|
||||
* produce an executable, this file does not by itself cause the
|
||||
* resulting executable to be covered by the GNU General Public
|
||||
* License. This exception does not however invalidate any other
|
||||
* reasons why the executable file might be covered by the GNU Library
|
||||
* General Public License.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*
|
||||
* copied from cxxtools
|
||||
*/
|
||||
|
||||
#include "md5stream.h"
|
||||
#include <cstring>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Md5streambuf
|
||||
//
|
||||
Md5streambuf::Md5streambuf()
|
||||
{
|
||||
std::memset(digest, 0, 16);
|
||||
}
|
||||
|
||||
std::streambuf::int_type Md5streambuf::overflow(
|
||||
std::streambuf::int_type ch)
|
||||
{
|
||||
if (pptr() == 0)
|
||||
{
|
||||
// Ausgabepuffer ist leer - initialisieren
|
||||
zim_MD5Init(&context);
|
||||
}
|
||||
else
|
||||
{
|
||||
// konsumiere Zeichen aus dem Puffer
|
||||
zim_MD5Update(&context,
|
||||
(const unsigned char*)pbase(),
|
||||
pptr() - pbase());
|
||||
}
|
||||
|
||||
// setze Ausgabepuffer
|
||||
setp(buffer, buffer + bufsize);
|
||||
|
||||
if (ch != traits_type::eof())
|
||||
{
|
||||
// das Zeichen, welches den overflow ausgelöst hat, stecken
|
||||
// wir in den Puffer.
|
||||
*pptr() = traits_type::to_char_type(ch);
|
||||
pbump(1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::streambuf::int_type Md5streambuf::underflow()
|
||||
{
|
||||
// nur Ausgabestrom
|
||||
return traits_type::eof();
|
||||
}
|
||||
|
||||
int Md5streambuf::sync()
|
||||
{
|
||||
if (pptr() != pbase())
|
||||
{
|
||||
// konsumiere Zeichen aus dem Puffer
|
||||
zim_MD5Update(&context, (const unsigned char*)pbase(), pptr() - pbase());
|
||||
|
||||
// leere Ausgabepuffer
|
||||
setp(buffer, buffer + bufsize);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Md5streambuf::getDigest(unsigned char digest_[16])
|
||||
{
|
||||
if (pptr())
|
||||
{
|
||||
if (pptr() != pbase())
|
||||
{
|
||||
// konsumiere Zeichen aus dem Puffer
|
||||
zim_MD5Update(&context, (const unsigned char*)pbase(), pptr() - pbase());
|
||||
}
|
||||
|
||||
// deinitialisiere Ausgabepuffer
|
||||
setp(0, 0);
|
||||
|
||||
zim_MD5Final(digest, &context);
|
||||
}
|
||||
|
||||
std::memcpy(digest_, digest, 16);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Md5stream
|
||||
//
|
||||
const char* Md5stream::getHexDigest()
|
||||
{
|
||||
static const char hex[] = "0123456789abcdef";
|
||||
unsigned char md5[16];
|
||||
getDigest(md5);
|
||||
int i;
|
||||
char* p = hexdigest;
|
||||
for (i = 0; i < 16; ++i)
|
||||
{
|
||||
*p++ = hex[md5[i] >> 4];
|
||||
*p++ = hex[md5[i] & 0xf];
|
||||
}
|
||||
*p = '\0';
|
||||
return hexdigest;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,134 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2003 Tommi Maekitalo
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* As a special exception, you may use this file as part of a free
|
||||
* software library without restriction. Specifically, if other files
|
||||
* instantiate templates or use macros or inline functions from this
|
||||
* file, or you compile this file and link it with other files to
|
||||
* produce an executable, this file does not by itself cause the
|
||||
* resulting executable to be covered by the GNU General Public
|
||||
* License. This exception does not however invalidate any other
|
||||
* reasons why the executable file might be covered by the GNU Library
|
||||
* General Public License.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*
|
||||
* copied from cxxtools
|
||||
*/
|
||||
|
||||
#ifndef ZIM_MD5STREAM_H
|
||||
#define ZIM_MD5STREAM_H
|
||||
|
||||
#include "md5.h"
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <algorithm>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
|
||||
class Md5streambuf : public std::streambuf
|
||||
{
|
||||
public:
|
||||
Md5streambuf();
|
||||
|
||||
void getDigest(unsigned char digest[16]);
|
||||
|
||||
private:
|
||||
static const unsigned int bufsize = 64;
|
||||
char buffer[bufsize];
|
||||
zim_MD5_CTX context;
|
||||
unsigned char digest[16];
|
||||
|
||||
std::streambuf::int_type overflow(std::streambuf::int_type ch);
|
||||
std::streambuf::int_type underflow();
|
||||
int sync();
|
||||
};
|
||||
|
||||
/**
|
||||
This is a easy and safe interface to MD5-calculation.
|
||||
|
||||
To get a MD5-sum of data, instantiate a md5stream, copy your data
|
||||
into it and read the digest.
|
||||
|
||||
After calling getDigest or getHexDigest, the class can be reused
|
||||
for another md5-calculation. The algorithm is automatically
|
||||
reinitialized when the first character is received.
|
||||
|
||||
example:
|
||||
\code
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
Md5stream s;
|
||||
for (int i = 1; i < argc; ++i)
|
||||
{
|
||||
std::ifstream in(argv[i]);
|
||||
if (in)
|
||||
{
|
||||
s << in.rdbuf();
|
||||
std::cout << s.getHexDigest() << " " << argv[i] << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
\endcode
|
||||
*/
|
||||
class Md5stream : public std::ostream
|
||||
{
|
||||
public:
|
||||
typedef std::ostreambuf_iterator<char> iterator;
|
||||
|
||||
private:
|
||||
Md5streambuf streambuf;
|
||||
char hexdigest[33];
|
||||
|
||||
public:
|
||||
/// initializes md5-calculation
|
||||
Md5stream()
|
||||
: std::ostream(0)
|
||||
{
|
||||
init(&streambuf);
|
||||
}
|
||||
|
||||
/// ends md5-calculation and returns 16 bytes digest
|
||||
void getDigest(unsigned char digest[16])
|
||||
{ streambuf.getDigest(digest); }
|
||||
/// ends md5-calculation and digest as 32 bytes hex
|
||||
const char* getHexDigest();
|
||||
|
||||
/// returns output-iterator to Md5stream
|
||||
iterator begin()
|
||||
{ return iterator(&streambuf); }
|
||||
};
|
||||
|
||||
template <typename iterator_type>
|
||||
std::string md5(iterator_type from, iterator_type to)
|
||||
{
|
||||
Md5stream s;
|
||||
std::copy(from, to, std::ostream_iterator<char>(s));
|
||||
return s.getHexDigest();
|
||||
}
|
||||
|
||||
template <typename data_type>
|
||||
std::string md5(const data_type& data)
|
||||
{
|
||||
Md5stream s;
|
||||
s << data;
|
||||
return s.getHexDigest();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // ZIM_MD5STREAM_H
|
||||
@ -1,39 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "ptrstream.h"
|
||||
|
||||
namespace zim
|
||||
{
|
||||
std::streambuf::int_type ptrstreambuf::overflow(int_type c)
|
||||
{
|
||||
return traits_type::eof();
|
||||
}
|
||||
|
||||
std::streambuf::int_type ptrstreambuf::underflow()
|
||||
{
|
||||
return traits_type::eof();
|
||||
}
|
||||
|
||||
int ptrstreambuf::sync()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,56 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ZIM_PTRSTREAM_H
|
||||
#define ZIM_PTRSTREAM_H
|
||||
|
||||
#include <iostream>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
class ptrstreambuf : public std::streambuf
|
||||
{
|
||||
public:
|
||||
ptrstreambuf(char* start, char* end)
|
||||
{
|
||||
setp(start, end);
|
||||
setg(start, start, end);
|
||||
}
|
||||
|
||||
/// see std::streambuf
|
||||
int_type overflow(int_type c);
|
||||
/// see std::streambuf
|
||||
int_type underflow();
|
||||
/// see std::streambuf
|
||||
int sync();
|
||||
};
|
||||
|
||||
class ptrstream : public std::iostream
|
||||
{
|
||||
ptrstreambuf streambuf;
|
||||
|
||||
public:
|
||||
ptrstream(char* start, char* end)
|
||||
: std::iostream(0),
|
||||
streambuf(start, end)
|
||||
{ init(&streambuf); }
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -1,260 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2007 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/search.h>
|
||||
#include <zim/fileiterator.h>
|
||||
#include <zim/indexarticle.h>
|
||||
#include <sstream>
|
||||
#include "log.h"
|
||||
#include <map>
|
||||
#include <math.h>
|
||||
#include <cctype>
|
||||
#include <stdexcept>
|
||||
|
||||
log_define("zim.search")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
namespace
|
||||
{
|
||||
class PriorityGt : public std::binary_function<bool, SearchResult, SearchResult>
|
||||
{
|
||||
public:
|
||||
bool operator() (const SearchResult& s1, const SearchResult& s2) const
|
||||
{
|
||||
return s1.getPriority() > s2.getPriority()
|
||||
|| (s1.getPriority() == s2.getPriority()
|
||||
&& s1.getArticle().getTitle() > s2.getArticle().getTitle());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
double SearchResult::getPriority() const
|
||||
{
|
||||
if (!wordList.empty() && priority == 0.0)
|
||||
{
|
||||
log_debug("weightOcc=" << Search::getWeightOcc()
|
||||
<< " weightPlus=" << Search::getWeightPlus()
|
||||
<< " weightOccOff=" << Search::getWeightOccOff()
|
||||
<< " weightDist=" << Search::getWeightDist()
|
||||
<< " weightPos=" << Search::getWeightPos()
|
||||
<< " weightDistinctWords=" << Search::getWeightDistinctWords());
|
||||
|
||||
priority = 1.0;
|
||||
|
||||
log_debug("getPriority, " << wordList.size() << " words; idx=" << article.getIndex());
|
||||
|
||||
// weight occurencies of words in article and title
|
||||
for (WordListType::const_iterator itw = wordList.begin(); itw != wordList.end(); ++itw)
|
||||
{
|
||||
priority *= 1.0 + log(itw->second.count * Search::getWeightOcc()
|
||||
+ Search::getWeightPlus() * itw->second.addweight)
|
||||
+ Search::getWeightOccOff()
|
||||
+ Search::getWeightPlus() * itw->second.addweight;
|
||||
|
||||
std::string title = article.getTitle();
|
||||
for (std::string::iterator it = title.begin(); it != title.end(); ++it)
|
||||
*it = std::tolower(*it);
|
||||
|
||||
//std::string::size_type p = title.find(itw->first);
|
||||
//if (p != std::string::npos)
|
||||
//priority *= Search::getWeightTitle() / (p + 1) / title.size();
|
||||
}
|
||||
|
||||
log_debug("priority1: " << priority);
|
||||
|
||||
// weight distinct words
|
||||
priority += Search::getWeightDistinctWords() * wordList.size();
|
||||
|
||||
log_debug("priority2: " << priority);
|
||||
|
||||
// weight distance between different words
|
||||
PosListType::const_iterator itp = posList.begin();
|
||||
std::string word = itp->second;
|
||||
size_type pos = itp->first + word.size();
|
||||
for (++itp; itp != posList.end(); ++itp)
|
||||
{
|
||||
if (word != itp->second)
|
||||
{
|
||||
size_type dist = itp->first > pos ? (itp->first - pos)
|
||||
: itp->first < pos ? (pos - itp->first)
|
||||
: 1;
|
||||
priority += Search::getWeightDist() / dist;
|
||||
}
|
||||
word = itp->second;
|
||||
pos = itp->first + word.size();
|
||||
}
|
||||
|
||||
log_debug("priority3: " << priority);
|
||||
|
||||
// weight position of words in the document
|
||||
if (Search::getWeightPos())
|
||||
for (itp = posList.begin(); itp != posList.end(); ++itp)
|
||||
priority += Search::getWeightPos() / pow(1.01, static_cast<double>(itp->first));
|
||||
|
||||
if (Search::getWeightPosRel())
|
||||
for (itp = posList.begin(); itp != posList.end(); ++itp)
|
||||
priority += Search::getWeightPosRel() * itp->first / article.getData().size();
|
||||
|
||||
log_debug("priority of article " << article.getIndex() << " \"" << article.getTitle() << "\", " << wordList.size() << " words: " << priority);
|
||||
}
|
||||
|
||||
return priority;
|
||||
}
|
||||
|
||||
void SearchResult::foundWord(const std::string& word, size_type pos, unsigned addweight)
|
||||
{
|
||||
++wordList[word].count;
|
||||
wordList[word].addweight += addweight;
|
||||
posList[pos] = word;
|
||||
}
|
||||
|
||||
double Search::weightOcc = 10.0;
|
||||
double Search::weightOccOff = 1.0;
|
||||
double Search::weightPlus = 10.0;
|
||||
double Search::weightDist = 10;
|
||||
double Search::weightPos = 10;
|
||||
double Search::weightPosRel = 0;
|
||||
double Search::weightDistinctWords = 50;
|
||||
unsigned Search::searchLimit = 10000;
|
||||
|
||||
void Search::search(Results& results, const std::string& expr)
|
||||
{
|
||||
log_trace("search articles with expression \"" << expr << '"');
|
||||
|
||||
std::istringstream ssearch(expr);
|
||||
std::string token;
|
||||
|
||||
// map from article-idx to article + relevance-informations
|
||||
typedef std::map<size_type, SearchResult> IndexType;
|
||||
IndexType index;
|
||||
|
||||
while (ssearch >> token)
|
||||
{
|
||||
unsigned addweight = 0;
|
||||
while (token.size() > 0 && token.at(0) == '+')
|
||||
{
|
||||
++addweight;
|
||||
token.erase(0, 1);
|
||||
}
|
||||
|
||||
if (token.empty())
|
||||
{
|
||||
log_warn("empty token");
|
||||
continue;
|
||||
}
|
||||
|
||||
for (std::string::iterator it = token.begin(); it != token.end(); ++it)
|
||||
*it = std::tolower(*it);
|
||||
|
||||
log_debug("search for token \"" << token << '"');
|
||||
|
||||
IndexArticle indexarticle = indexfile.getArticleByTitle('X', token);
|
||||
|
||||
if (indexarticle.getTotalCount() > 0)
|
||||
{
|
||||
for (unsigned cat = 0; cat < 4; ++cat)
|
||||
{
|
||||
const IndexArticle::EntriesType ent = indexarticle.getCategory(cat);
|
||||
for (IndexArticle::EntriesType::const_iterator it = ent.begin(); it != ent.end(); ++it)
|
||||
{
|
||||
size_type articleIdx = it->index;
|
||||
size_type position = it->pos;
|
||||
|
||||
IndexType::iterator itIt = index.insert(
|
||||
IndexType::value_type(articleIdx,
|
||||
SearchResult(articlefile.getArticle(articleIdx)))).first;
|
||||
|
||||
itIt->second.foundWord(token, position, addweight + 3 - cat);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
log_debug("no entries found - try searching for titles");
|
||||
Results results;
|
||||
find(results, 'A', token);
|
||||
for (Results::const_iterator it = results.begin(); it != results.end(); ++it)
|
||||
{
|
||||
size_type articleIdx = it->getArticle().getIndex();
|
||||
|
||||
IndexType::iterator itIt = index.insert(
|
||||
IndexType::value_type(articleIdx,
|
||||
SearchResult(it->getArticle()))).first;
|
||||
|
||||
itIt->second.foundWord(token, 0, addweight + 3 - it->getArticle().getTitle().size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log_debug("copy/filter " << index.size() << " articles");
|
||||
results.setExpression(expr);
|
||||
for (IndexType::const_iterator it = index.begin(); it != index.end(); ++it)
|
||||
{
|
||||
if (it->second.getCountPositions() > 1)
|
||||
results.push_back(it->second);
|
||||
//else
|
||||
//log_debug("discard article " << it->first);
|
||||
}
|
||||
|
||||
if (results.empty())
|
||||
{
|
||||
for (IndexType::const_iterator it = index.begin(); it != index.end(); ++it)
|
||||
results.push_back(it->second);
|
||||
}
|
||||
|
||||
log_debug("sort " << results.size() << " articles");
|
||||
std::sort(results.begin(), results.end(), PriorityGt());
|
||||
}
|
||||
|
||||
void Search::find(Results& results, char ns, const std::string& praefix, unsigned limit)
|
||||
{
|
||||
log_debug("find results in namespace " << ns << " for praefix \"" << praefix << '"');
|
||||
for (File::const_iterator pos = articlefile.findByTitle(ns, praefix);
|
||||
pos != articlefile.end() && results.size() < limit; ++pos)
|
||||
{
|
||||
if (ns != pos->getNamespace() || pos->getTitle().compare(0, praefix.size(), praefix) > 0)
|
||||
{
|
||||
log_debug("article " << pos->getNamespace() << ", \"" << pos->getTitle() << "\" does not match " << ns << ", \"" << praefix << '"');
|
||||
break;
|
||||
}
|
||||
results.push_back(SearchResult(*pos));
|
||||
}
|
||||
log_debug(results.size() << " articles in result");
|
||||
}
|
||||
|
||||
void Search::find(Results& results, char ns, const std::string& begin,
|
||||
const std::string& end, unsigned limit)
|
||||
{
|
||||
log_debug("find results in namespace " << ns << " for praefix \"" << begin << '"');
|
||||
for (File::const_iterator pos = articlefile.findByTitle(ns, begin);
|
||||
pos != articlefile.end() && results.size() < limit; ++pos)
|
||||
{
|
||||
log_debug("check " << pos->getNamespace() << '/' << pos->getTitle());
|
||||
if (pos->getNamespace() != ns || pos->getTitle().compare(end) > 0)
|
||||
{
|
||||
log_debug("article " << pos->getNamespace() << ", \"" << pos->getTitle() << "\" does not match");
|
||||
break;
|
||||
}
|
||||
results.push_back(SearchResult(*pos));
|
||||
}
|
||||
log_debug(results.size() << " articles in result");
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,142 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/template.h>
|
||||
|
||||
namespace zim
|
||||
{
|
||||
void TemplateParser::state_data(char ch)
|
||||
{
|
||||
data += ch;
|
||||
|
||||
if (ch == '<')
|
||||
{
|
||||
state = &TemplateParser::state_lt;
|
||||
save = data.size() - 1;
|
||||
}
|
||||
}
|
||||
|
||||
void TemplateParser::state_lt(char ch)
|
||||
{
|
||||
data += ch;
|
||||
|
||||
if (ch == '%')
|
||||
state = &TemplateParser::state_token0;
|
||||
else
|
||||
state = &TemplateParser::state_data;
|
||||
}
|
||||
|
||||
void TemplateParser::state_token0(char ch)
|
||||
{
|
||||
data += ch;
|
||||
|
||||
if (ch == '/')
|
||||
state = &TemplateParser::state_link0;
|
||||
else
|
||||
{
|
||||
token = data.size() - 1;
|
||||
state = &TemplateParser::state_token;
|
||||
}
|
||||
}
|
||||
|
||||
void TemplateParser::state_token(char ch)
|
||||
{
|
||||
data += ch;
|
||||
|
||||
if (ch == '%')
|
||||
state = &TemplateParser::state_token_end;
|
||||
}
|
||||
|
||||
void TemplateParser::state_token_end(char ch)
|
||||
{
|
||||
if (ch == '>')
|
||||
{
|
||||
if (event)
|
||||
{
|
||||
event->onData(data.substr(0, save));
|
||||
event->onToken(data.substr(token, data.size() - token - 1));
|
||||
data.clear();
|
||||
}
|
||||
|
||||
state = &TemplateParser::state_data;
|
||||
}
|
||||
else
|
||||
{
|
||||
data += ch;
|
||||
state = &TemplateParser::state_data;
|
||||
}
|
||||
}
|
||||
|
||||
void TemplateParser::state_link0(char ch)
|
||||
{
|
||||
data += ch;
|
||||
|
||||
ns = ch;
|
||||
state = &TemplateParser::state_link;
|
||||
}
|
||||
|
||||
void TemplateParser::state_link(char ch)
|
||||
{
|
||||
data += ch;
|
||||
|
||||
if (ch == '/')
|
||||
{
|
||||
token = data.size();
|
||||
state = &TemplateParser::state_title;
|
||||
}
|
||||
else
|
||||
state = &TemplateParser::state_data;
|
||||
}
|
||||
|
||||
void TemplateParser::state_title(char ch)
|
||||
{
|
||||
data += ch;
|
||||
|
||||
if (ch == '%')
|
||||
{
|
||||
token_e = data.size() - 1;
|
||||
state = &TemplateParser::state_title_end;
|
||||
}
|
||||
}
|
||||
|
||||
void TemplateParser::state_title_end(char ch)
|
||||
{
|
||||
data += ch;
|
||||
|
||||
if (ch == '>')
|
||||
{
|
||||
if (event)
|
||||
{
|
||||
event->onData(data.substr(0, save));
|
||||
event->onLink(ns, data.substr(token, token_e - token));
|
||||
}
|
||||
|
||||
data.clear();
|
||||
state = &TemplateParser::state_data;
|
||||
}
|
||||
}
|
||||
|
||||
void TemplateParser::flush()
|
||||
{
|
||||
if (event)
|
||||
event->onData(data);
|
||||
data.clear();
|
||||
state = &TemplateParser::state_data;
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,164 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <zim/unlzmastream.h>
|
||||
#include <zim/zim.h>
|
||||
#include "log.h"
|
||||
#include "config.h"
|
||||
#include <sstream>
|
||||
#include <cstring>
|
||||
#include "envvalue.h"
|
||||
|
||||
log_define("zim.lzma.uncompress")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
namespace
|
||||
{
|
||||
lzma_ret checkError(lzma_ret ret)
|
||||
{
|
||||
if (ret != LZMA_OK && ret != LZMA_STREAM_END)
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "inflate-error " << ret;
|
||||
switch (ret)
|
||||
{
|
||||
case LZMA_OK: msg << ": LZMA_OK"; break;
|
||||
case LZMA_STREAM_END: msg << ": LZMA_STREAM_END"; break;
|
||||
case LZMA_NO_CHECK: msg << ": LZMA_NO_CHECK"; break;
|
||||
case LZMA_UNSUPPORTED_CHECK: msg << ": LZMA_UNSUPPORTED_CHECK"; break;
|
||||
case LZMA_GET_CHECK: msg << ": LZMA_GET_CHECK"; break;
|
||||
case LZMA_MEM_ERROR: msg << ": LZMA_MEM_ERROR"; break;
|
||||
case LZMA_MEMLIMIT_ERROR: msg << ": LZMA_MEMLIMIT_ERROR"; break;
|
||||
case LZMA_FORMAT_ERROR: msg << ": LZMA_FORMAT_ERROR"; break;
|
||||
case LZMA_OPTIONS_ERROR: msg << ": LZMA_OPTIONS_ERROR"; break;
|
||||
case LZMA_DATA_ERROR: msg << ": LZMA_DATA_ERROR"; break;
|
||||
case LZMA_BUF_ERROR: msg << ": LZMA_BUF_ERROR"; break;
|
||||
case LZMA_PROG_ERROR: msg << ": LZMA_PROG_ERROR"; break;
|
||||
}
|
||||
log_error(msg);
|
||||
throw UnlzmaError(ret, msg.str());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
UnlzmaStreamBuf::UnlzmaStreamBuf(std::streambuf* sinksource_, unsigned bufsize_)
|
||||
: iobuffer(new char_type[bufsize_]),
|
||||
bufsize(bufsize_),
|
||||
sinksource(sinksource_)
|
||||
{
|
||||
std::memset(reinterpret_cast<void*>(&stream), 0, sizeof(stream));
|
||||
|
||||
unsigned memsize = envMemSize("ZIM_LZMA_MEMORY_SIZE", LZMA_MEMORY_SIZE * 1024 * 1024);
|
||||
checkError(
|
||||
::lzma_stream_decoder(&stream, memsize, 0));
|
||||
}
|
||||
|
||||
UnlzmaStreamBuf::~UnlzmaStreamBuf()
|
||||
{
|
||||
::lzma_end(&stream);
|
||||
delete[] iobuffer;
|
||||
}
|
||||
|
||||
UnlzmaStreamBuf::int_type UnlzmaStreamBuf::overflow(int_type c)
|
||||
{
|
||||
if (pptr())
|
||||
{
|
||||
// initialize input-stream for
|
||||
stream.next_in = reinterpret_cast<const uint8_t*>(obuffer());
|
||||
stream.avail_in = pptr() - pbase();
|
||||
|
||||
lzma_ret ret;
|
||||
do
|
||||
{
|
||||
// initialize ibuffer
|
||||
stream.next_out = reinterpret_cast<uint8_t*>(ibuffer());
|
||||
stream.avail_out = ibuffer_size();
|
||||
|
||||
ret = ::lzma_code(&stream, LZMA_RUN);
|
||||
checkError(ret);
|
||||
|
||||
// copy zbuffer to sinksource
|
||||
std::streamsize count = ibuffer_size() - stream.avail_out;
|
||||
std::streamsize n = sinksource->sputn(reinterpret_cast<char*>(ibuffer()), count);
|
||||
if (n < count)
|
||||
return traits_type::eof();
|
||||
} while (ret != LZMA_STREAM_END && stream.avail_in > 0);
|
||||
}
|
||||
|
||||
// reset outbuffer
|
||||
setp(obuffer(), obuffer() + obuffer_size());
|
||||
if (c != traits_type::eof())
|
||||
sputc(traits_type::to_char_type(c));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
UnlzmaStreamBuf::int_type UnlzmaStreamBuf::underflow()
|
||||
{
|
||||
// read from sinksource and decompress into obuffer
|
||||
|
||||
stream.next_out = reinterpret_cast<uint8_t*>(obuffer());
|
||||
stream.avail_out = obuffer_size();
|
||||
|
||||
do
|
||||
{
|
||||
// fill ibuffer first if needed
|
||||
if (stream.avail_in == 0)
|
||||
{
|
||||
if (sinksource->in_avail() > 0)
|
||||
{
|
||||
// there is data already available
|
||||
// read compressed data from source into ibuffer
|
||||
stream.avail_in = sinksource->sgetn(ibuffer(), std::min(sinksource->in_avail(), ibuffer_size()));
|
||||
}
|
||||
else
|
||||
{
|
||||
// no data available
|
||||
stream.avail_in = sinksource->sgetn(ibuffer(), ibuffer_size());
|
||||
if (stream.avail_in == 0)
|
||||
return traits_type::eof();
|
||||
}
|
||||
|
||||
stream.next_in = (const uint8_t*)ibuffer();
|
||||
}
|
||||
|
||||
// we decompress it now into obuffer
|
||||
|
||||
// at least one character received from source - pass to decompressor
|
||||
|
||||
checkError(::lzma_code(&stream, LZMA_RUN));
|
||||
|
||||
setg(obuffer(), obuffer(), obuffer() + obuffer_size() - stream.avail_out);
|
||||
|
||||
} while (gptr() == egptr());
|
||||
|
||||
return sgetc();
|
||||
}
|
||||
|
||||
int UnlzmaStreamBuf::sync()
|
||||
{
|
||||
if (pptr() && overflow(traits_type::eof()) == traits_type::eof())
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -1,122 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/uuid.h>
|
||||
#include <iostream>
|
||||
#include <time.h>
|
||||
#include <zim/zim.h> // necessary to have the new types
|
||||
#include "log.h"
|
||||
#ifdef WITH_CXXTOOLS
|
||||
#include <cxxtools/md5stream.h>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
# include <time.h>
|
||||
|
||||
#define NOMINMAX
|
||||
# include <windows.h>
|
||||
#undef NOMINMAX
|
||||
|
||||
int gettimeofday(struct timeval* tp, void* tzp) {
|
||||
DWORD t;
|
||||
t = timeGetTime();
|
||||
tp->tv_sec = t / 1000;
|
||||
tp->tv_usec = t % 1000;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define getpid GetCurrentProcessId
|
||||
|
||||
#else
|
||||
# include <sys/time.h>
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
log_define("zim.uuid")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
namespace
|
||||
{
|
||||
char hex[] = "0123456789abcdef";
|
||||
inline char hi(char v)
|
||||
{ return hex[(v >> 4) & 0xf]; }
|
||||
|
||||
inline char lo(char v)
|
||||
{ return hex[v & 0xf]; }
|
||||
}
|
||||
|
||||
Uuid Uuid::generate()
|
||||
{
|
||||
Uuid ret;
|
||||
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, 0);
|
||||
|
||||
#ifdef WITH_CXXTOOLS
|
||||
|
||||
cxxtools::Md5stream m;
|
||||
|
||||
clock_t c = clock();
|
||||
|
||||
m << c << tv.tv_sec << tv.tv_usec;
|
||||
|
||||
m.getDigest(reinterpret_cast<unsigned char*>(&ret.data[0]));
|
||||
|
||||
#else
|
||||
|
||||
union {
|
||||
void* p;
|
||||
int32_t n;
|
||||
} u;
|
||||
u.p = &ret;
|
||||
|
||||
*reinterpret_cast<int32_t*>(ret.data) = u.n;
|
||||
*reinterpret_cast<int32_t*>(ret.data + 4) = static_cast<int32_t>(tv.tv_sec);
|
||||
*reinterpret_cast<int32_t*>(ret.data + 8) = static_cast<int32_t>(tv.tv_usec);
|
||||
*reinterpret_cast<int32_t*>(ret.data + 12) = static_cast<int32_t>(getpid());
|
||||
|
||||
#endif
|
||||
|
||||
log_debug("generated uuid: " << ret.data);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::ostream& operator<< (std::ostream& out, const Uuid& uuid)
|
||||
{
|
||||
for (unsigned n = 0; n < 4; ++n)
|
||||
out << hi(uuid.data[n]) << lo(uuid.data[n]);
|
||||
out << '-';
|
||||
for (unsigned n = 4; n < 6; ++n)
|
||||
out << hi(uuid.data[n]) << lo(uuid.data[n]);
|
||||
out << '-';
|
||||
for (unsigned n = 6; n < 8; ++n)
|
||||
out << hi(uuid.data[n]) << lo(uuid.data[n]);
|
||||
out << '-';
|
||||
for (unsigned n = 6; n < 8; ++n)
|
||||
out << hi(uuid.data[n]) << lo(uuid.data[n]);
|
||||
out << '-';
|
||||
for (unsigned n = 8; n < 16; ++n)
|
||||
out << hi(uuid.data[n]) << lo(uuid.data[n]);
|
||||
return out;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,103 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2007 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zim/zintstream.h>
|
||||
#include "log.h"
|
||||
|
||||
log_define("zim.zintstream")
|
||||
|
||||
namespace zim
|
||||
{
|
||||
size_type ZIntStream::get()
|
||||
{
|
||||
char ch;
|
||||
if (!_istream->get(ch))
|
||||
return *this;
|
||||
|
||||
if (ch == '\xff')
|
||||
{
|
||||
log_error("invalid bytestream in int decompressor");
|
||||
_istream->setstate(std::ios::failbit);
|
||||
}
|
||||
|
||||
size_type uuvalue = static_cast<size_type>(static_cast<unsigned char>(ch));
|
||||
uint64_t ubound = 0x80;
|
||||
size_type add = 0;
|
||||
unsigned short s = 7;
|
||||
unsigned short N = 0;
|
||||
size_type mask = 0x7F;
|
||||
while (ch & 0x80)
|
||||
{
|
||||
++N;
|
||||
ch <<= 1;
|
||||
--s;
|
||||
add += ubound;
|
||||
ubound <<= 7;
|
||||
mask >>= 1;
|
||||
}
|
||||
|
||||
uuvalue &= mask;
|
||||
|
||||
while (N-- && _istream->get(ch))
|
||||
{
|
||||
uuvalue |= static_cast<size_type>(static_cast<unsigned char>(ch)) << s;
|
||||
s += 8;
|
||||
}
|
||||
|
||||
if (_istream)
|
||||
{
|
||||
uuvalue += add;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_error("incomplete bytestream in int decompressor");
|
||||
_istream->setstate(std::ios::failbit);
|
||||
}
|
||||
|
||||
return uuvalue;
|
||||
}
|
||||
|
||||
ZIntStream& ZIntStream::put(size_type value)
|
||||
{
|
||||
size_type nmask = 0;
|
||||
size_type mask = 0x7F;
|
||||
uint64_t ubound = 0x80;
|
||||
unsigned short N = 0;
|
||||
|
||||
while (value >= ubound)
|
||||
{
|
||||
value -= ubound;
|
||||
ubound <<= 7;
|
||||
nmask = (nmask >> 1) | 0x80;
|
||||
mask = mask >> 1;
|
||||
++N;
|
||||
}
|
||||
|
||||
_ostream->put(static_cast<char>(nmask | (value & mask)));
|
||||
value >>= 7 - N;
|
||||
while (N--)
|
||||
{
|
||||
_ostream->put(static_cast<char>(value & 0xFF));
|
||||
value >>= 8;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in new issue