52 #ifndef _CACIFFILESYSTEM 53 #define _CACIFFILESYSTEM 54 #include "libGIFTAcInvertedFile/include/uses-declarations.h" 56 #include "libMRML/include/TID.h" 57 #include "libMRML/include/CSelfDestroyPointer.h" 58 #include "libMRML/include/CArraySelfDestroyPointer.h" 59 #include "libGIFTAcInvertedFile/include/CDocumentFrequencyList.h" 60 #include "libMRML/include/CMutex.h" 62 #include "libGIFTAcInvertedFile/include/CADIHash.h" 63 #include "libGIFTAcURL2FTS/include/CAcURL2FTS.h" 64 #include "libGIFTAcInvertedFile/include/CAcInvertedFile.h" 71 #define HASH_MAP hash_map 78 #include "libMRML/include/CMagic.h" 81 typedef TID TFeatureID ;
156 streampos inPosition,
157 ostream& inOpenOffsetFile);
268 double inDocumentFrequency)
const;
277 virtual pair<bool,TID>
URLToID(
const string& inURL)
const;
289 list<TID>::size_type)
const;
299 list<CAccessorElement>::size_type inSize)
const;
320 operator bool()
const;
An accessor to an inverted file.
Definition: CAcIFFileSystem.h:93
double FeatureToCollectionFrequency(TFeatureID) const
Collection frequency for a given feature.
string IDToURL(TID inID) const
Translate a DocumentID to a URL (for output)
double DIDToDFSquareSum(TID) const
Returns the document-frequency square sum for a given document ID.
CSelfDestroyPointer< istream > mInvertedFile
The inverted file.
Definition: CAcIFFileSystem.h:117
bool operator()() const
for testing if the inverted file is correctly constructed
This class captures the structure of an XML element.
Definition: CXMLElement.h:51
A list of Document Frequency Elements (the main part of an inverted file)
Definition: CDocumentFrequencyList.h:58
CAcIFFileSystem(const CXMLElement &inCollectionElement)
This opens an exsisting inverted file, and then inits this structure.
void getRandomIDs(list< TID > &, list< TID >::size_type) const
get a given number of random C-AccessorElement-s
string mOffsetFileName
Name of the Offset file.
Definition: CAcIFFileSystem.h:129
virtual pair< bool, TID > URLToID(const string &inURL) const
Translate an URL to its document ID.
HASH_MAP< TID, unsigned int > mFeatureDescription
map from the feature ID to the feature description
Definition: CAcIFFileSystem.h:145
unsigned int getFeatureDescription(TID inFeatureID) const
What kind of feature is the feature with ID inFeatureID?
TID getMaximumFeatureID() const
This is interesting for browsing.
double DIDToSquareDFLogICFSum(TID) const
Returns this function for a given document ID.
string mInvertedFileBuffer
A buffer, if the inverted file is to be held in ram.
Definition: CAcIFFileSystem.h:109
CADIHash.
Definition: CADIHash.h:53
An accessor to an inverted file.
Definition: CAcInvertedFile.h:83
bool checkConsistency()
Check the consistency of the inverted file system accessed by this accessor.
~CAcIFFileSystem()
Destructor.
CDocumentFrequencyList * FeatureToList(TFeatureID) const
List of documents containing the feature.
CIDToOffset mIDToOffset
map from feature id to the offset for this feature
Definition: CAcIFFileSystem.h:137
string mInvertedFileName
Name of the inverted file.
Definition: CAcIFFileSystem.h:126
CSelfDestroyPointer< CAcURL2FTS > mURL2FTS
In order to have just one parent, I have to limit on single inheritance.
Definition: CAcIFFileSystem.h:103
double DIDToMaxDocumentFrequency(TID) const
returns the maximum document frequency for one document ID
This class offers an abstraction from the locking method used.
Definition: CMutex.h:40
CMutex mMutex
the mutex for multi threading
Definition: CAcIFFileSystem.h:97
bool init(bool)
called by constructors
void getAllIDs(list< TID > &) const
List of the IDs of all documents present in the inverted file.
void writeOffsetFileElement(TID inFeatureID, streampos inPosition, ostream &inOpenOffsetFile)
add a pair of FeatureID,Offset to the open offset file (helper function for inverted file constructio...
bool generateInvertedFile()
Generating an inverted File, if there is none.
void getRandomAccessorElements(list< CAccessorElement > &outResult, list< CAccessorElement >::size_type inSize) const
For drawing random sets.
void getAllAccessorElements(list< CAccessorElement > &) const
List of triplets (ID,imageURL,thumbnailURL) of all the documents present in the inverted file...
int size() const
The number of images in this accessor.
HASH_MAP< TID, streampos > CIDToOffset
map from feature id to the offset for this feature
Definition: CAcIFFileSystem.h:135
virtual pair< bool, CAccessorElement > IDToAccessorElement(TID inID) const
Translate a DocumentID to an accessor Element.
HASH_MAP< TID, double > mFeatureToCollectionFrequency
map from feature to the collection frequency
Definition: CAcIFFileSystem.h:140
list< TID > * getAllFeatureIDs() const
Getting a list of all features contained in this.
CDocumentFrequencyList * getFeatureFile(string inFileName) const
loads a *.fts file.
string mFeatureDescriptionFileName
Name for the file with the feature description.
Definition: CAcIFFileSystem.h:132
CADIHash mDocumentInformation
additional information about the document like, e.g.
Definition: CAcIFFileSystem.h:150
CDocumentFrequencyList * URLToFeatureList(string inURL) const
List of features contained by a document.
CDocumentFrequencyList * DIDToFeatureList(TID inDID) const
List of features contained by a document with ID inDID.
bool newGenerateInvertedFile()
Generating an inverted File, if there is none.
ifstream mOffsetFile
Feature -> Offset in inverted file.
Definition: CAcIFFileSystem.h:120
ifstream mFeatureDescriptionFile
File of feature descriptions.
Definition: CAcIFFileSystem.h:123
bool findWithinStream(TID inFeatureID, TID inDocumentID, double inDocumentFrequency) const
Is the Document with inDocumentID contained in the document frequency list of the feature inFeatureID...
TID mMaximumFeatureID
the maximum feature ID arising in this file
Definition: CAcIFFileSystem.h:105
string mTemporaryIndexingFileBase
Some place for putting temporary indexing data.
Definition: CAcIFFileSystem.h:115