00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifndef _MERGE_SORT
00026 #define _MERGE_SORT
00027
00028
00029
00030
00031
00032 #include <memory>
00033 #include <algorithm>
00034 #include <fstream>
00035 #include <cassert>
00036
00037 using namespace std;
00038
00039
00040
00043 template<typename T>
00044 class CStreamPos{
00046 T mContent;
00047 public:
00048 CStreamPos(T inContent):mContent(inContent){
00049
00050 }
00051
00052 CStreamPos(long int inContent):mContent(inContent){
00053
00054 }
00055
00056 operator T()const{
00057 return mContent;
00058 }
00059 operator long int()const{
00060 return mContent;
00061 }
00062
00063 CStreamPos<T>& operator++ (){
00064 mContent=mContent+static_cast<T>(1);
00065 return *this;
00066 }
00067 CStreamPos<T> operator++ (int){
00068 CStreamPos<T> lReturnValue=*this;
00069 mContent=mContent+static_cast<T>(1);
00070 return lReturnValue;
00071 }
00072
00073 CStreamPos<T> operator% (int inMod)const{
00074 return mContent % inMod;
00075 }
00076 CStreamPos<T> operator/ (int inMod)const{
00077 return mContent / inMod;
00078 }
00079 bool operator< (CStreamPos<T>& inThan)const{
00080 return this->mContent < inThan.mContent;
00081 }
00082 bool operator! ()const{
00083 return !(this->mContent);
00084 }
00085 CStreamPos<T> operator+ (CStreamPos<T>& inSummand){
00086 return CStreamPos<T>(mContent + inSummand.mContent);
00087 }
00088 };
00089
00090 #if __GNUC__==2
00091 #define STREAMPOS_T long long int
00092 #else
00093 #define STREAMPOS_T CStreamPos<fstream::pos_type>
00094 #endif
00095
00096
00107 template<class T>
00108 void merge_streams(istream& in1, const STREAMPOS_T inCount1,
00109 istream& in2, const STREAMPOS_T inCount2,
00110 ostream& out,
00111 int inNumberOfPageElements=1){
00112
00113 {
00114
00115
00116
00117
00118
00119 const STREAMPOS_T lPageSize(sizeof(T)*inNumberOfPageElements);
00120
00121
00122
00123 if(!(inCount1)){
00124 for(STREAMPOS_T i=0;
00125 i<inCount2;
00126 i++){
00127 T l2;
00128 in2.read((char*)&l2,
00129 sizeof(l2));
00130 assert(in2);
00131
00132 out.write((char*)&l2,
00133 sizeof(l2));
00134 assert(out);
00135 }
00136 return;
00137 }
00138 if(!inCount2){
00139 for(STREAMPOS_T i=0;
00140 i<inCount1;
00141 i++){
00142 T l1;
00143 in1.read((char*)&l1,
00144 sizeof(l1));
00145 assert(in1);
00146 out.write((char*)&l1,
00147 sizeof(l1));
00148 assert(out);
00149 }
00150 return;
00151 }
00152
00153 STREAMPOS_T lI1(0);
00154 STREAMPOS_T lI2(0);
00155
00156
00157 T l1;
00158 in1.read((char*)&l1,
00159 sizeof(l1));
00160 assert(in1);
00161 T l2;
00162 in2.read((char*)&l2,
00163 sizeof(l2));
00164 assert(in2);
00165
00166 while((lI1<inCount1)
00167 &&(lI2<inCount2)){
00168 if(l1<l2){
00169
00170 out.write((char*)&l1,
00171 sizeof(l1));
00172 assert(out);
00173
00174
00175 lI1++;
00176 if(lI1<inCount1){
00177 in1.read((char*)&l1,
00178 sizeof(l1));
00179 assert(in1);
00180 }
00181 }else{
00182
00183 out.write((char*)&l2,
00184 sizeof(l2));
00185
00186 lI2++;
00187 if(lI2<inCount2){
00188 in2.read((char*)&l2,
00189 sizeof(l2));
00190 assert(in2);
00191 }
00192 }
00193 }
00194 while(lI1<inCount1){
00195
00196 out.write((char*)&l1,
00197 sizeof(l1));
00198
00199 lI1++;
00200 if(lI1<inCount1){
00201 in1.read((char*)&l1,
00202 sizeof(l1));
00203 assert(in1);
00204 }
00205 }
00206 while(lI2<inCount2){
00207
00208 out.write((char*)&l2,
00209 sizeof(l2));
00210 assert(out);
00211
00212 lI2++;
00213 if(lI2<inCount2){
00214 in2.read((char*)&l2,
00215 sizeof(l2));
00216 assert(in2);
00217 }
00218 }
00219 }
00220 }
00221 template<typename T>
00222 void first_level_quicksort(int inNumberOfPageElements,
00223 const char* inFileToBeSortedName,
00224 const char* inTemporaryName){
00225
00226 cout << "Starting quicksort: "
00227 << inNumberOfPageElements
00228 << " elements per page." << endl
00229 << "Sorting files " << inFileToBeSortedName << endl
00230 << "to " << inTemporaryName << endl;
00231 cout << "NOW ALLOCATING A PAGE" << inNumberOfPageElements << endl;
00232 auto_ptr<T> lPage(new T[inNumberOfPageElements]);
00233
00234 cout << "H" << flush;
00235
00236 const STREAMPOS_T lPageSize(sizeof(T)*inNumberOfPageElements);
00237
00238 cout << "I" << flush;
00239
00240 STREAMPOS_T lFileSize(0);
00241 ifstream lToBeSorted1(inFileToBeSortedName);
00242 assert(lToBeSorted1);
00243 lToBeSorted1.seekg(0,
00244 ios::end);
00245 lFileSize=lToBeSorted1.tellg();
00246 lToBeSorted1.clear();
00247 lToBeSorted1.seekg(0,
00248 ios::beg);
00249 cout << "E" << flush;
00250
00251 ofstream lTemporary(inTemporaryName);
00252 assert(lTemporary);
00253 cout << "R" << flush;
00254
00255 STREAMPOS_T lSum(0);
00256
00257 T* lBegin(lPage.get());
00258 T* lEnd(lPage.get());
00259
00260 cout << "FIRSTLEVELQUICK" << lFileSize << ";" << lSum<< endl;
00261 while((lSum<lFileSize)
00262 && lToBeSorted1){
00263
00264 cout << "." << flush;
00265
00266 int lRead(0);
00267 if(lSum+lPageSize < lFileSize){
00268 lToBeSorted1.read((char*)lPage.get(),
00269 lPageSize);
00270 lRead=lPageSize;
00271 }else{
00272 lToBeSorted1.read((char*)lPage.get(),
00273 lFileSize-lSum);
00274 lRead=lFileSize-lSum;
00275 }
00276 if(lRead){
00277 lEnd=lBegin+(lRead)/sizeof(T);
00278 sort(lBegin,lEnd);
00279 lTemporary.write((char*)lPage.get(),lRead);
00280 assert(lTemporary);
00281 }
00282 }
00283 cout << "." << endl;
00284
00285 }
00286
00297 template<class T>
00298 char* merge_sort_streams(const char* inFileToBeSortedName,
00299 const char* inTemporaryName,
00300 int inNumberOfPageElements=(1 << 20)
00301 ){
00302
00303 const char* lFileToBeSortedName(inFileToBeSortedName);
00304 const char* lTemporaryName(inTemporaryName);
00305
00306 STREAMPOS_T lFileSize(0);
00307 ifstream lToBeSorted1(inFileToBeSortedName);
00308 lToBeSorted1.seekg(0,
00309 ios::end);
00310 lFileSize=lToBeSorted1.tellg();
00311 lToBeSorted1.close();
00312
00313 ofstream lTemporary;
00314 ifstream lToBeSorted2;
00315
00316 #ifdef first_level_quick
00317 first_level_quicksort<T>(inNumberOfPageElements,
00318 inFileToBeSortedName,
00319 inTemporaryName);
00320 swap(lFileToBeSortedName,
00321 lTemporaryName);
00322 #else
00323 cout << "STARTING mit MERGESIZE1" << endl;
00324 inNumberOfPageElements=1;
00325 #endif
00326 STREAMPOS_T lCount(0);
00327 for(STREAMPOS_T iMergeSize(sizeof(T)*inNumberOfPageElements);
00328 (iMergeSize < lFileSize)
00329 || !(lCount%2)
00330
00331
00332
00333
00334 ;
00335 (iMergeSize = iMergeSize << 1),
00336 (lCount=lCount+static_cast<STREAMPOS_T>(1))){
00337
00338 cout << "MERGESORT MergeSize "
00339 << iMergeSize
00340 << endl;
00341
00342 lToBeSorted1.open(lFileToBeSortedName);
00343 lToBeSorted1.clear();
00344
00345 lToBeSorted2.open(lFileToBeSortedName);
00346 lToBeSorted2.clear();
00347
00348 lTemporary.open(lTemporaryName);
00349 lTemporary.clear();
00350
00351
00352 for(STREAMPOS_T i(0);
00353 i<lFileSize;
00354 i = i + iMergeSize*2){
00355 lToBeSorted1.seekg(i);
00356
00357 if(!lToBeSorted1){
00358 cerr << __FILE__ << ":" << __LINE__ << " lToBeSorted false, after seekg("
00359 << static_cast<long int>(i)
00360 << ")"
00361 << endl;
00362 }
00363
00364 assert(lToBeSorted1);
00365
00366 if(i+iMergeSize<lFileSize){
00367 lToBeSorted2.seekg(i+iMergeSize);
00368 assert(lToBeSorted2);
00369 }
00370
00371 STREAMPOS_T lMergeSize1=lFileSize-i;
00372 if(lFileSize < i){
00373 lMergeSize1=0;
00374 }
00375 STREAMPOS_T lMergeSize2=lFileSize-i-iMergeSize;
00376 if(lFileSize < i + iMergeSize){
00377 lMergeSize2=0;
00378 }
00379
00380
00381
00382
00383 if(lMergeSize1>iMergeSize){
00384 lMergeSize1=iMergeSize;
00385 }
00386 if(lMergeSize2>iMergeSize){
00387 lMergeSize2=iMergeSize;
00388 }
00389
00390 #if __GNUC__==2
00391 merge_streams<T>(lToBeSorted1,
00392 lMergeSize1/sizeof(T),
00393 lToBeSorted2,
00394 lMergeSize2/sizeof(T),
00395 lTemporary,
00396 inNumberOfPageElements
00397 );
00398 #else
00399 merge_streams<T>(lToBeSorted1,
00400 lMergeSize1.operator/(sizeof(T)),
00401 lToBeSorted2,
00402 lMergeSize2.operator/(sizeof(T)),
00403 lTemporary,
00404 inNumberOfPageElements
00405 );
00406 #endif
00407 }
00408
00409 lTemporary.close();
00410 lToBeSorted1.close();
00411 lToBeSorted2.close();
00412 swap(lFileToBeSortedName,
00413 lTemporaryName);
00414 cout << "endmerge" << endl;
00415 }
00416 return strdup(lFileToBeSortedName);
00417 }
00418 #endif