00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071 #include <math.h>
00072 #include <stdio.h>
00073 #include <stdlib.h>
00074 #include <string.h>
00075 #include <time.h>
00076
00077 #include "classifier/svm/gpm.h"
00078 #include "classifier/svm/gpdt.h"
00079 #include "classifier/svm/gpdtsolve.h"
00080 #include "lib/Signal.h"
00081 #include "lib/io.h"
00082
00083 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00084 #define y_in(i) y[index_in[(i)]]
00085 #define y_out(i) y[index_out[(i)]]
00086 #define alpha_in(i) alpha[index_in[(i)]]
00087 #define alpha_out(i) alpha[index_out[(i)]]
00088 #define minfty (-1.0e+30) // minus infinity
00089
00090 uint32_t Randnext = 1;
00091
00092 #define ThRand (Randnext = Randnext * 1103515245L + 12345L)
00093 #define ThRandPos ((Randnext = Randnext * 1103515245L + 12345L) & 0x7fffffff)
00094
00095 FILE *fp;
00096
00097
00098 void quick_si (int32_t a[], int32_t k);
00099 void quick_s3 (int32_t a[], int32_t k, int32_t ia[]);
00100 void quick_s2 (float64_t a[], int32_t k, int32_t ia[]);
00101
00102
00103
00104
00105 class sCache
00106 {
00107
00108 public:
00109 sCache (sKernel* sk, int32_t Mbyte, int32_t ell);
00110 ~sCache ();
00111
00112 cachetype *FillRow (int32_t row, int32_t IsC = 0);
00113 cachetype *GetRow (int32_t row);
00114
00115 int32_t DivideMP (int32_t *out, int32_t *in, int32_t n);
00116
00117
00118 void Iteration(void) { nit++; }
00119
00120
00121 int32_t CheckCycle(void)
00122 {
00123 int32_t us;
00124 cache_entry *pt = first_free->next;
00125
00126 for (us = 0; pt != first_free; us++, pt = pt->next);
00127 if (us != maxmw-1)
00128 return 1;
00129 else
00130 return 0;
00131 }
00132
00133 private:
00134
00135 struct cache_entry
00136 {
00137 int32_t row;
00138 int32_t last_access_it;
00139 cache_entry *prev, *next;
00140 cachetype *data;
00141 };
00142
00143 sKernel* KER;
00144 int32_t maxmw, ell;
00145 int32_t nit;
00146
00147 cache_entry *mw;
00148 cache_entry *first_free;
00149 cache_entry **pindmw;
00150 cachetype *onerow;
00151
00152 cachetype *FindFree(int32_t row, int32_t IsC);
00153 };
00154
00155
00156
00157
00158
00159 sCache::sCache(sKernel* sk, int32_t Mbyte, int32_t _ell) : KER(sk), ell(_ell)
00160 {
00161 int32_t i;
00162
00163
00164 maxmw = (sizeof(cache_entry) + sizeof(cache_entry *)
00165 + ell*sizeof(cachetype)) / 4;
00166
00167 maxmw = Mbyte*1024*(1024/4) / maxmw;
00168
00169
00170 mw = (cache_entry *)malloc(maxmw * sizeof(cache_entry));
00171 pindmw = (cache_entry **)malloc(ell * sizeof(cache_entry *));
00172 onerow = (cachetype *)malloc(ell * sizeof(cachetype ));
00173
00174
00175 for (i = 0; i < maxmw; i++)
00176 {
00177 mw[i].prev = (i == 0 ? &mw[maxmw-1] : &mw[i-1]);
00178 mw[i].next = (i == maxmw-1 ? &mw[0] : &mw[i+1]);
00179 mw[i].data = (cachetype *)malloc(ell*sizeof(cachetype));
00180 mw[i].row = -1;
00181 mw[i].last_access_it = -1;
00182 }
00183 for (i = 0; i < ell; i++)
00184 pindmw[i] = 0;
00185
00186 first_free = &mw[0];
00187 nit = 0;
00188 }
00189
00190
00191
00192
00193 sCache::~sCache()
00194 {
00195 int32_t i;
00196
00197 for (i = maxmw-1; i >= 0; i--)
00198 if (mw[i].data) free(mw[i].data);
00199 if (onerow) free(onerow);
00200 if (pindmw) free(pindmw);
00201 if (mw) free(mw);
00202 }
00203
00204
00205
00206
00207
00208 cachetype *sCache::GetRow(int32_t row)
00209 {
00210 cache_entry *c;
00211
00212 c = pindmw[row];
00213 if (c == NULL)
00214 return NULL;
00215
00216 c->last_access_it = nit;
00217 if (c == first_free)
00218 {
00219 first_free = first_free->next;
00220 }
00221 else
00222 {
00223
00224 c->prev->next = c->next;
00225 c->next->prev = c->prev;
00226 c->next = first_free;
00227 c->prev = first_free->prev;
00228 first_free->prev = c;
00229 c->prev->next = c;
00230 }
00231
00232 return c->data;
00233 }
00234
00235
00236
00237
00238
00239
00240 cachetype *sCache::FindFree(int32_t row, int32_t IsC)
00241 {
00242 cachetype *pt;
00243
00244 if (first_free->row != -1)
00245 {
00246 if (first_free->last_access_it == nit || IsC)
00247 return 0;
00248 else
00249 pindmw[first_free->row] = 0;
00250 }
00251 first_free->row = row;
00252 first_free->last_access_it = nit;
00253 pindmw[row] = first_free;
00254
00255 pt = first_free->data;
00256 first_free = first_free->next;
00257
00258 return pt;
00259 }
00260
00261
00262
00263
00264
00265 cachetype *sCache::FillRow(int32_t row, int32_t IsC)
00266 {
00267 int32_t j;
00268 cachetype *pt;
00269
00270 pt = GetRow(row);
00271 if (pt != NULL)
00272 return pt;
00273
00274 pt = FindFree(row, IsC);
00275 if (pt == 0)
00276 pt = onerow;
00277
00278
00279 for (j = 0; j < ell; j++)
00280 pt[j] = (cachetype)KER->Get(row, j);
00281 return pt;
00282 }
00283
00284
00285
00286
00287
00288 int32_t sCache::DivideMP(int32_t *out, int32_t *in, int32_t n)
00289 {
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300 int32_t *remained, nremained, k;
00301 int32_t i;
00302
00303 remained = (int32_t *) malloc(n*sizeof(int32_t));
00304
00305 nremained = 0;
00306 k = 0;
00307 for (i = 0; i < n; i++)
00308 {
00309 if (pindmw[in[i]] != NULL)
00310 out[k++] = i;
00311 else
00312 remained[nremained++] = i;
00313 }
00314 for (i = 0; i < nremained; i++)
00315 out[k++] = remained[i];
00316
00317 free(remained);
00318 return n;
00319 }
00320
00321
00322
00323
00324 int32_t QPproblem::optimal()
00325 {
00326
00327
00328
00329
00330 register int32_t i, j, margin_sv_number, z, k, s, kin, z1, znew=0, nnew;
00331
00332 float64_t gx_i, aux, s1, s2;
00333
00334
00335 for (j = 0; j < ell; j++)
00336 {
00337 grad[j] = y[j] - st[j];
00338 ing[j] = j;
00339 }
00340
00341 quick_s2(grad,ell,ing);
00342
00343
00344 margin_sv_number = 0;
00345
00346 for (i = chunk_size - 1; i >= 0; i--)
00347 if (is_free(index_in[i]))
00348 {
00349 margin_sv_number++;
00350 bee = y_in(i) - st[index_in[i]];
00351 break;
00352 }
00353
00354 if (margin_sv_number > 0)
00355 {
00356 aux=-1.0;
00357 for (i = nb-1; i >= 0; i--)
00358 {
00359 gx_i = bee + st[index_out[i]];
00360 if ((is_zero(index_out[i]) && (gx_i*y_out(i) < (1.0-delta))) ||
00361 (is_bound(index_out[i]) && (gx_i*y_out(i) > (1.0+delta))) ||
00362 (is_free(index_out[i]) &&
00363 ((gx_i*y_out(i) < 1.0-delta) || (gx_i*y_out(i) > 1.0+delta))))
00364 {
00365 if (fabs(gx_i*y_out(i) - 1.0) > aux)
00366 aux = fabs(gx_i*y_out(i) - 1.0);
00367 }
00368 }
00369 }
00370 else
00371 {
00372 for (i = ell - 1; i >= 0; i--)
00373 if (is_free(i))
00374 {
00375 margin_sv_number++;
00376 bee = y[i] - st[i];
00377 break;
00378 }
00379 if (margin_sv_number == 0)
00380 {
00381 s1 = -minfty;
00382 s2 = -minfty;
00383 for (j = 0; j < ell; j++)
00384 if ( (alpha[ing[j]] > DELTAsv) && (y[ing[j]] >= 0) )
00385 {
00386 s1 = grad[j];
00387 break;
00388 }
00389 for (j = 0; j < ell; j++)
00390 if ( (alpha[ing[j]] < c_const-DELTAsv) && (y[ing[j]] <= 0) )
00391 {
00392 s2 = grad[j];
00393 break;
00394 }
00395 if (s1 < s2)
00396 aux = s1;
00397 else
00398 aux = s2;
00399
00400 s1 = minfty;
00401 s2 = minfty;
00402 for (j = ell-1; j >=0; j--)
00403 if ( (alpha[ing[j]] > DELTAsv) && (y[ing[j]] <= 0) )
00404 {
00405 s1 = grad[j];
00406 break;
00407 }
00408 for (j = ell-1; j >=0; j--)
00409 if ( (alpha[ing[j]] < c_const-DELTAsv) && (y[ing[j]] >= 0) )
00410 {
00411 s2 = grad[j];
00412 break;
00413 }
00414 if (s2 > s1) s1 = s2;
00415
00416 bee = 0.5 * (s1+aux);
00417 }
00418
00419 aux = -1.0;
00420 for (i = ell-1; i >= 0; i--)
00421 {
00422 gx_i = bee + st[i];
00423 if ((is_zero(i) && (gx_i*y[i] < (1.0-delta))) ||
00424 (is_bound(i) && (gx_i*y[i] > (1.0+delta))) ||
00425 (is_free(i) &&
00426 ((gx_i*y[i] < 1.0-delta) || (gx_i*y[i] > 1.0+delta))))
00427 {
00428 if (fabs(gx_i*y[i] - 1.0) > aux)
00429 aux = fabs(gx_i*y[i] - 1.0);
00430 }
00431 }
00432 }
00433
00434 if (aux < 0.0)
00435 return 1;
00436 else
00437 {
00438 if (verbosity > 1)
00439 SG_INFO(" Max KKT violation: %lf\n", aux);
00440 else if (verbosity > 0)
00441 SG_INFO(" %lf\n", aux);
00442
00443 if (fabs(kktold-aux) < delta*0.01 && aux < delta*2.0)
00444 {
00445 if (DELTAvpm > InitialDELTAvpm*0.1)
00446 {
00447 DELTAvpm = (DELTAvpm*0.5 > InitialDELTAvpm*0.1 ?
00448 DELTAvpm*0.5 : InitialDELTAvpm*0.1);
00449 SG_INFO("Inner tolerance changed to: %lf\n", DELTAvpm);
00450 }
00451 }
00452
00453 kktold = aux;
00454
00455
00456
00457
00458
00459
00460 for (j = 0; j < chunk_size; j++)
00461 inold[j] = index_in[j];
00462
00463 z = -1;
00464 z1 = ell;
00465 k = 0;
00466 j = 0;
00467 while (k < q)
00468 {
00469 i = z + 1;
00470 while (i < z1)
00471 {
00472 if ( is_free(ing[i]) ||
00473 (-y[ing[i]]>=0 && is_zero(ing[i])) ||
00474 (-y[ing[i]]<=0 && is_bound(ing[i]))
00475 )
00476 {
00477 znew = i;
00478 break;
00479 }
00480 i++;
00481 }
00482 if (i == z1) break;
00483
00484 s = z1 - 1;
00485 while (znew < s)
00486 {
00487 if ( is_free(ing[s]) ||
00488 (y[ing[s]]>=0 && is_zero(ing[s])) ||
00489 (y[ing[s]]<=0 && is_bound(ing[s]))
00490 )
00491 {
00492 z1 = s;
00493 z = znew;
00494 break;
00495 }
00496 s--;
00497 }
00498 if (znew == s) break;
00499
00500 index_in[k++] = ing[z];
00501 index_in[k++] = ing[z1];
00502 }
00503
00504 if (k < q)
00505 {
00506 if (verbosity > 1)
00507 SG_INFO(" New q: %i\n", k);
00508 q = k;
00509 }
00510
00511 quick_si(index_in, q);
00512
00513 s = 0;
00514 j = 0;
00515 for (k = 0; k < chunk_size; k++)
00516 {
00517 z = inold[k];
00518 for (i = j; i < q; i++)
00519 if (z <= index_in[i])
00520 break;
00521
00522 if (i == q)
00523 {
00524 for (i = k; i < chunk_size; i++)
00525 {
00526 ing[s] = inold[i];
00527 s = s+1;
00528 }
00529 break;
00530 }
00531
00532 if (z == index_in[i])
00533 j = i + 1;
00534 else
00535 {
00536 ing[s] = z;
00537 s = s + 1;
00538 j = i;
00539 }
00540 }
00541
00542 for (i = 0; i < s; i++)
00543 {
00544 bmemrid[i] = bmem[ing[i]];
00545 pbmr[i] = i;
00546 }
00547
00548 quick_s3(bmemrid, s, pbmr);
00549
00550
00551 j = q;
00552 i = 0;
00553 while (j < chunk_size && i < s)
00554 {
00555 if (is_free(ing[pbmr[i]]))
00556 {
00557 index_in[j] = ing[pbmr[i]];
00558 j++;
00559 }
00560 i++;
00561 }
00562
00563
00564 if (j < chunk_size)
00565 {
00566 i = 0;
00567 while (j < chunk_size && i < s)
00568 {
00569 if (is_zero(ing[pbmr[i]]))
00570 {
00571 index_in[j] = ing[pbmr[i]];
00572 j++;
00573 }
00574 i++;
00575 }
00576 if (j < chunk_size)
00577 {
00578 i = 0;
00579 while (j < chunk_size && i < s)
00580 {
00581 if (is_bound(ing[pbmr[i]]))
00582 {
00583 index_in[j] = ing[pbmr[i]];
00584 j++;
00585 }
00586 i++;
00587 }
00588 }
00589 }
00590
00591 quick_si(index_in, chunk_size);
00592
00593 for (i = 0; i < chunk_size; i++)
00594 bmem[index_in[i]]++;
00595
00596 j = 0;
00597 k = 0;
00598 for (i = 0; i < chunk_size; i++)
00599 {
00600 for (z = j; z < index_in[i]; z++)
00601 {
00602 index_out[k] = z;
00603 k++;
00604 }
00605 j = index_in[i]+1;
00606 }
00607 for (z = j; z < ell; z++)
00608 {
00609 index_out[k] = z;
00610 k++;
00611 }
00612
00613 for (i = 0; i < nb; i++)
00614 bmem[index_out[i]] = 0;
00615
00616 kin = 0;
00617 j = 0;
00618 for (k = 0; k < chunk_size; k++)
00619 {
00620 z = index_in[k];
00621 for (i = j; i < chunk_size; i++)
00622 if (z <= inold[i])
00623 break;
00624 if (i == chunk_size)
00625 {
00626 for (s = k; s < chunk_size; s++)
00627 {
00628 incom[s] = -1;
00629 cec[index_in[s]]++;
00630 }
00631 kin = kin + chunk_size - k ;
00632 break;
00633 }
00634
00635 if (z == inold[i])
00636 {
00637 incom[k] = i;
00638 j = i+1;
00639 }
00640 else
00641 {
00642 incom[k] = -1;
00643 j = i;
00644 kin = kin + 1;
00645 cec[index_in[k]]++;
00646 }
00647 }
00648
00649 nnew = kin & (~1);
00650 if (nnew < 10)
00651 nnew = 10;
00652 if (nnew < chunk_size/10)
00653 nnew = chunk_size/10;
00654 if (nnew < q)
00655 {
00656 q = nnew;
00657 q = q & (~1);
00658 }
00659
00660 if (kin == 0)
00661 {
00662 DELTAkin *= 0.1;
00663 if (DELTAkin < 1.0e-6)
00664 {
00665 SG_INFO("\n***ERROR***: GPDT stops with tolerance");
00666 SG_INFO(
00667 " %lf because it is unable to change the working set.\n", kktold);
00668 return 1;
00669 }
00670 else
00671 {
00672 SG_INFO("Inner tolerance temporary changed to:");
00673 SG_INFO(" %e\n", DELTAvpm*DELTAkin);
00674 }
00675 }
00676 else
00677 DELTAkin = 1.0;
00678
00679 if (verbosity > 1)
00680 {
00681 SG_INFO(" Working set: new components: %i", kin);
00682 SG_INFO(", new parameter n: %i\n", q);
00683 }
00684
00685 return 0;
00686 }
00687 }
00688
00689
00690
00691
00692 int32_t QPproblem::Preprocess0(int32_t *aux, int32_t *sv)
00693 {
00694 int32_t i, j;
00695
00696 Randnext = 1;
00697 memset(sv, 0, ell*sizeof(int32_t));
00698 for (i = 0; i < chunk_size; i++)
00699 {
00700 do
00701 {
00702 j = ThRandPos % ell;
00703 } while (sv[j] != 0);
00704 sv[j] = 1;
00705 }
00706 return(chunk_size);
00707 }
00708
00709
00710
00711
00712 int32_t QPproblem::Preprocess1(sKernel* kernel, int32_t *aux, int32_t *sv)
00713 {
00714 int32_t s;
00715 int32_t sl;
00716 int32_t n, i, off, j, k, ll;
00717 int32_t nsv, nbsv;
00718 int32_t *sv_loc, *bsv_loc, *sp_y;
00719 float32_t *sp_D=NULL;
00720 float64_t *sp_alpha, *sp_h;
00721
00722 s = ell;
00723
00724 n = (s + preprocess_size - 1) / preprocess_size;
00725 sl = 1 + s / n;
00726
00727 if (verbosity > 0)
00728 {
00729 SG_INFO(" Preprocessing: examples = %d", s);
00730 SG_INFO(", subp. = %d", n);
00731 SG_INFO(", size = %d\n",sl);
00732 }
00733
00734 sv_loc = (int32_t *) malloc(s*sizeof(int32_t));
00735 bsv_loc = (int32_t *)malloc(s*sizeof(int32_t));
00736 sp_alpha = (float64_t *)malloc(sl*sizeof(float64_t));
00737 sp_h = (float64_t *)malloc(sl*sizeof(float64_t));
00738 sp_y = (int32_t *)malloc(sl*sizeof(int32_t));
00739
00740 if (sl < 500)
00741 sp_D = (float32_t *)malloc(sl*sl * sizeof(float32_t));
00742
00743 for (i = 0; i < sl; i++)
00744 sp_h[i] = -1.0;
00745 memset(alpha, 0, ell*sizeof(float64_t));
00746
00747
00748 for (i = 0; i < ell; i++)
00749 aux[i] = i;
00750 Randnext = 1;
00751 for (i = 0; i < ell; i++)
00752 {
00753 j = ThRandPos % ell;
00754 k = ThRandPos % ell;
00755 ll = aux[j]; aux[j] = aux[k]; aux[k] = ll;
00756 }
00757
00758 nbsv = nsv = 0;
00759 for (i = 0; i < n; i++)
00760 {
00761 if (verbosity > 0)
00762 SG_INFO("%d...", i);
00763 SplitParts(s, i, n, &ll, &off);
00764
00765 if (sl < 500)
00766 {
00767 for (j = 0; j < ll; j++)
00768 {
00769 sp_y[j] = y[aux[j+off]];
00770 for (k = j; k < ll; k++)
00771 sp_D[k*sl + j] = sp_D[j*sl + k]
00772 = y[aux[j+off]] * y[aux[k+off]]
00773 * (float32_t)kernel->Get(aux[j+off], aux[k+off]);
00774 }
00775
00776 memset(sp_alpha, 0, sl*sizeof(float64_t));
00777
00778
00779 gpm_solver(projection_solver, projection_projector, ll, sp_D, sp_h,
00780 c_const, 0.0, sp_y, sp_alpha, delta*10, NULL);
00781 }
00782 else
00783 {
00784 QPproblem p2;
00785 p2.Subproblem(*this, ll, aux + off);
00786 p2.chunk_size = (int32_t) ((float64_t)chunk_size / sqrt((float64_t)n));
00787 p2.q = (int32_t) ((float64_t)q / sqrt((float64_t)n));
00788 p2.maxmw = ll*ll*4 / (1024 * 1024);
00789 if (p2.maxmw > maxmw / 2)
00790 p2.maxmw = maxmw / 2;
00791 p2.verbosity = 0;
00792 p2.delta = delta * 10.0;
00793 p2.PreprocessMode = 0;
00794 kernel->KernelEvaluations += p2.gpdtsolve(sp_alpha);
00795 }
00796
00797 for (j = 0; j < ll; j++)
00798 {
00799
00800 if (sp_alpha[j] < (c_const-DELTAsv))
00801 sp_alpha[j] = 0.0;
00802 else
00803 sp_alpha[j] = c_const;
00804 if (sp_alpha[j] > DELTAsv)
00805 {
00806 if (sp_alpha[j] < (c_const-DELTAsv))
00807 sv_loc[nsv++] = aux[j+off];
00808 else
00809 bsv_loc[nbsv++] = aux[j+off];
00810 alpha[aux[j+off]] = sp_alpha[j];
00811 }
00812 }
00813 }
00814
00815 Randnext = 1;
00816
00817
00818 memset(sv, 0, ell*sizeof(int32_t));
00819 ll = (nsv < chunk_size ? nsv : chunk_size);
00820 for (i = 0; i < ll; i++)
00821 {
00822 do {
00823 j = sv_loc[ThRandPos % nsv];
00824 } while (sv[j] != 0);
00825 sv[j] = 1;
00826 }
00827
00828
00829 ll = ((nsv+nbsv) < chunk_size ? (nsv+nbsv) : chunk_size);
00830 for (; i < ll; i++)
00831 {
00832 do {
00833 j = bsv_loc[ThRandPos % nbsv];
00834 } while (sv[j] != 0);
00835 sv[j] = 1;
00836 }
00837
00838
00839
00840 for (; i < chunk_size; i++)
00841 {
00842 do {
00843 j = ThRandPos % ell;
00844 } while (sv[j] != 0);
00845 sv[j] = 1;
00846 }
00847
00848
00849
00850 if (sl < 500) free(sp_D);
00851 free(sp_y );
00852 free(sp_h );
00853 free(sv_loc );
00854 free(bsv_loc );
00855 free(sp_alpha);
00856
00857 if (verbosity > 0)
00858 {
00859 SG_INFO("\n Preprocessing: SV = %d", nsv);
00860 SG_INFO(", BSV = %d\n", nbsv);
00861 }
00862
00863 return(nsv);
00864 }
00865
00866
00867
00868
00869 float64_t QPproblem::gpdtsolve(float64_t *solution)
00870 {
00871 int32_t i, j, k, z, iin, jin, nit, tot_vpm_iter, lsCount;
00872 int32_t tot_vpm_secant, projCount, proximal_count;
00873 int32_t vpmWarningThreshold;
00874 int32_t nzin, nzout;
00875 int32_t *sp_y;
00876 int32_t *indnzin, *indnzout;
00877 float32_t *sp_D;
00878 float64_t *sp_h, *sp_hloc,
00879 *sp_alpha,*stloc;
00880 float64_t sp_e, aux, fval, tau_proximal_this, dfval;
00881 float64_t *vau;
00882 float64_t *weight;
00883 float64_t tot_prep_time, tot_vpm_time, tot_st_time, total_time;
00884 sCache *Cache;
00885 cachetype *ptmw;
00886 clock_t t, ti;
00887
00888 Cache = new sCache(KER, maxmw, ell);
00889 if (chunk_size > ell) chunk_size = ell;
00890
00891 if (chunk_size <= 20)
00892 vpmWarningThreshold = 30*chunk_size;
00893 else if (chunk_size <= 200)
00894 vpmWarningThreshold = 20*chunk_size + 200;
00895 else
00896 vpmWarningThreshold = 10*chunk_size + 2200;
00897
00898 kktold = 10000.0;
00899 if (delta <= 5e-3)
00900 {
00901 if ( (chunk_size <= 20) | ((float64_t)chunk_size/ell <= 0.001) )
00902 DELTAvpm = delta * 0.1;
00903 else if ( (chunk_size <= 200) | ((float64_t)chunk_size/ell <= 0.005) )
00904 DELTAvpm = delta * 0.5;
00905 else
00906 DELTAvpm = delta;
00907 }
00908 else
00909 {
00910 if ( (chunk_size <= 200) | ((float64_t)chunk_size/ell <= 0.005) )
00911 DELTAvpm = (1e-3 < delta*0.1) ? 1e-3 : delta*0.1;
00912 else
00913 DELTAvpm = 5e-3;
00914 }
00915
00916 InitialDELTAvpm = DELTAvpm;
00917 DELTAsv = EPS_SV * c_const;
00918 DELTAkin = 1.0;
00919
00920 q = q & (~1);
00921 nb = ell - chunk_size;
00922 tot_vpm_iter = 0;
00923 tot_vpm_secant = 0;
00924
00925 tot_prep_time = tot_vpm_time = tot_st_time = total_time = 0.0;
00926
00927 ti = clock();
00928
00929
00930 SG_DEBUG("ell:%d, chunk_size:%d, nb:%d dim:%d\n", ell, chunk_size,nb, dim);
00931 ing = (int32_t *) malloc(ell*sizeof(int32_t));
00932 inaux = (int32_t *) malloc(ell*sizeof(int32_t));
00933 index_in = (int32_t *) malloc(chunk_size*sizeof(int32_t));
00934 index_out = (int32_t *) malloc(ell*sizeof(int32_t));
00935 indnzout = (int32_t *) malloc(nb*sizeof(int32_t));
00936 alpha = (float64_t *)malloc(ell*sizeof(float64_t ));
00937
00938 memset(alpha, 0, ell*sizeof(float64_t));
00939 memset(ing, 0, ell*sizeof(int32_t));
00940
00941 if (verbosity > 0 && PreprocessMode != 0)
00942 SG_INFO("\n*********** Begin setup step...\n");
00943 t = clock();
00944
00945 switch(PreprocessMode)
00946 {
00947 case 1: Preprocess1(KER, inaux, ing); break;
00948 case 0:
00949 default:
00950 Preprocess0(inaux, ing); break;
00951 }
00952
00953 for (j = k = i = 0; i < ell; i++)
00954 if (ing[i] == 0)
00955 index_out[j++] = i;
00956 else
00957 index_in[k++] = i;
00958
00959 t = clock() - t;
00960 if (verbosity > 0 && PreprocessMode != 0)
00961 {
00962 SG_INFO( " Time for setup: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
00963 SG_INFO(
00964 "\n\n*********** Begin decomposition technique...\n");
00965 }
00966
00967
00968 bmem = (int32_t *)malloc(ell*sizeof(int32_t));
00969 bmemrid = (int32_t *)malloc(chunk_size*sizeof(int32_t));
00970 pbmr = (int32_t *)malloc(chunk_size*sizeof(int32_t));
00971 cec = (int32_t *)malloc(ell*sizeof(int32_t));
00972 indnzin = (int32_t *)malloc(chunk_size*sizeof(int32_t));
00973 inold = (int32_t *)malloc(chunk_size*sizeof(int32_t));
00974 incom = (int32_t *)malloc(chunk_size*sizeof(int32_t));
00975 vau = (float64_t *)malloc(ell*sizeof(float64_t ));
00976 grad = (float64_t *)malloc(ell*sizeof(float64_t ));
00977 weight = (float64_t *)malloc(dim*sizeof(float64_t ));
00978 st = (float64_t *)malloc(ell*sizeof(float64_t ));
00979 stloc = (float64_t *)malloc(ell*sizeof(float64_t ));
00980
00981 for (i = 0; i < ell; i++)
00982 {
00983 bmem[i] = 0;
00984 cec[i] = 0;
00985 st[i] = 0;
00986 }
00987
00988 sp_y = (int32_t *)malloc(chunk_size*sizeof(int32_t));
00989 sp_D = (float32_t *)malloc(chunk_size*chunk_size * sizeof(float32_t));
00990 sp_alpha = (float64_t *)malloc(chunk_size*sizeof(float64_t ));
00991 sp_h = (float64_t *)malloc(chunk_size*sizeof(float64_t ));
00992 sp_hloc = (float64_t *)malloc(chunk_size*sizeof(float64_t ));
00993
00994 for (i = 0; i < chunk_size; i++)
00995 cec[index_in[i]] = cec[index_in[i]]+1;
00996
00997 for (i = chunk_size-1; i >= 0; i--)
00998 {
00999 incom[i] = -1;
01000 sp_alpha[i] = 0.0;
01001 bmem[index_in[i]] = 1;
01002 }
01003
01004 if (verbosity == 1)
01005 {
01006 SG_INFO( " IT | Prep Time | Solver IT | Solver Time |");
01007 SG_INFO( " Grad Time | KKT violation\n");
01008 SG_INFO( "------+-----------+-----------+-------------+");
01009 SG_INFO( "-----------+--------------\n");
01010 }
01011
01012
01013
01014 nit = 0;
01015 do
01016 {
01017 t = clock();
01018 if ((nit % 10) == 9)
01019 {
01020 if ((t-ti) > 0)
01021 total_time += (float64_t)(t-ti) / CLOCKS_PER_SEC;
01022 else
01023 total_time += (float64_t)(ti-t) / CLOCKS_PER_SEC;
01024 ti = t;
01025 }
01026
01027 if (verbosity > 1)
01028 SG_INFO("\n*********** ITERATION: %d\n", nit + 1);
01029 else if (verbosity > 0)
01030 SG_INFO( "%5d |", nit + 1);
01031 else
01032 SG_INFO( ".");
01033 fflush(stdout);
01034
01035 nzout = 0;
01036 for (k = 0; k < nb; k++)
01037 if (alpha_out(k)>DELTAsv)
01038 {
01039 indnzout[nzout] = index_out[k];
01040 nzout++;
01041 }
01042
01043 sp_e = 0.;
01044 for (j = 0; j < nzout; j++)
01045 {
01046 vau[j] = y[indnzout[j]]*alpha[indnzout[j]];
01047 sp_e += vau[j];
01048 }
01049
01050 if (verbosity > 1)
01051 SG_INFO( " spe: %e ", sp_e);
01052
01053 for (i = 0; i < chunk_size; i++)
01054 sp_y[i] = y_in(i);
01055
01056
01057 for (i = 0; i < chunk_size; i++)
01058 {
01059 iin = index_in[i];
01060 ptmw = Cache->GetRow(iin);
01061 if (ptmw != 0)
01062 {
01063 for (j = 0; j <= i; j++)
01064 sp_D[i*chunk_size + j] = sp_y[i]*sp_y[j] * ptmw[index_in[j]];
01065 }
01066 else if (incom[i] == -1)
01067 for (j = 0; j <= i; j++)
01068 sp_D[i*chunk_size + j] = sp_y[i]*sp_y[j]
01069 * (float32_t)KER->Get(iin, index_in[j]);
01070 else
01071 {
01072 for (j = 0; j < i; j++)
01073 if (incom[j] == -1)
01074 sp_D[i*chunk_size + j]
01075 = sp_y[i]*sp_y[j] * (float32_t)KER->Get(iin, index_in[j]);
01076 else
01077 sp_D[i*chunk_size + j]
01078 = sp_D[incom[j]*chunk_size + incom[i]];
01079 sp_D[i*chunk_size + i]
01080 = sp_y[i]*sp_y[i] * (float32_t)KER->Get(iin, index_in[i]);
01081 }
01082 }
01083 for (i = 0; i < chunk_size; i++)
01084 {
01085 for (j = 0; j < i; j++)
01086 sp_D[j*chunk_size + i] = sp_D[i*chunk_size + j];
01087 }
01088
01089 if (nit == 0 && PreprocessMode > 0)
01090 {
01091 for (i = 0; i < chunk_size; i++)
01092 {
01093 iin = index_in[i];
01094 aux = 0.;
01095 ptmw = Cache->GetRow(iin);
01096 if (ptmw == NULL)
01097 for (j = 0; j < nzout; j++)
01098 aux += vau[j] * KER->Get(iin, indnzout[j]);
01099 else
01100 for (j = 0; j < nzout; j++)
01101 aux += vau[j] * ptmw[indnzout[j]];
01102 sp_h[i] = y_in(i) * aux - 1.0;
01103 }
01104 }
01105 else
01106 {
01107 for (i = 0; i < chunk_size; i++)
01108 vau[i] = alpha_in(i);
01109 for (i = 0; i < chunk_size; i++)
01110 {
01111 aux = 0.0;
01112 for (j = 0; j < chunk_size; j++)
01113 aux += sp_D[i*chunk_size + j] * vau[j];
01114 sp_h[i] = st[index_in[i]] * y_in(i) - 1.0 - aux;
01115 }
01116 }
01117
01118 t = clock() - t;
01119 if (verbosity > 1)
01120 SG_INFO(
01121 " Preparation Time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
01122 else if (verbosity > 0)
01123 SG_INFO( " %8.2lf |", (float64_t)t/CLOCKS_PER_SEC);
01124 tot_prep_time += (float64_t)t/CLOCKS_PER_SEC;
01125
01126
01127
01128 if (tau_proximal < 0.0)
01129 tau_proximal_this = 0.0;
01130 else
01131 tau_proximal_this = tau_proximal;
01132 proximal_count = 0;
01133 do {
01134 t = clock();
01135 for (i = 0; i < chunk_size; i++)
01136 {
01137 vau[i] = sp_D[i*chunk_size + i];
01138 sp_h[i] -= tau_proximal_this * alpha_in(i);
01139 sp_D[i*chunk_size + i] += (float32_t)tau_proximal_this;
01140 }
01141
01142 if (kktold < delta*100)
01143 for (i = 0; i < chunk_size; i++)
01144 sp_alpha[i] = alpha_in(i);
01145 else
01146 for (i = 0; i < chunk_size; i++)
01147 sp_alpha[i] = 0.0;
01148
01149
01150 i = gpm_solver(projection_solver, projection_projector, chunk_size,
01151 sp_D, sp_h, c_const, sp_e, sp_y, sp_alpha,
01152 DELTAvpm*DELTAkin, &lsCount, &projCount);
01153
01154 if (i > vpmWarningThreshold)
01155 {
01156 if (ker_type == 2)
01157 {
01158 SG_INFO("\n WARNING: inner subproblem hard to solve;");
01159 SG_INFO(" setting a smaller -q or");
01160 SG_INFO(" tuning -c and -g options might help.\n");
01161 }
01162 else
01163 {
01164 SG_INFO("\n WARNING: inner subproblem hard to solve;");
01165 SG_INFO(" set a smaller -q or");
01166 SG_INFO(" try a better data scaling.\n");
01167 }
01168 }
01169
01170 t = clock() - t;
01171 tot_vpm_iter += i;
01172 tot_vpm_secant += projCount;
01173 tot_vpm_time += (float64_t)t/CLOCKS_PER_SEC;
01174 if (verbosity > 1)
01175 {
01176 SG_INFO(" Solver it: %d", i);
01177 SG_INFO(", ls: %d", lsCount);
01178 SG_INFO(", time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
01179 }
01180 else if (verbosity > 0)
01181 {
01182 SG_INFO(" %6d", i);
01183 SG_INFO(" | %8.2lf |", (float64_t)t/CLOCKS_PER_SEC);
01184 }
01185
01186
01187
01188 for (i = 0; i < chunk_size; i++)
01189 sp_D[i*chunk_size + i] = (float32_t)vau[i];
01190 tau_proximal_this = 0.0;
01191 if (tau_proximal < 0.0)
01192 {
01193 dfval = 0.0;
01194 for (i = 0; i < chunk_size; i++)
01195 {
01196 aux = 0.0;
01197 for (j = 0; j < chunk_size; j++)
01198 aux += sp_D[i*chunk_size + j]*(alpha_in(j) - sp_alpha[j]);
01199 dfval += (0.5*aux - st[index_in[i]]*y_in(i) + 1.0) * (alpha_in(i) - sp_alpha[i]);
01200 }
01201
01202 aux=0.0;
01203 for (i = 0; i < chunk_size; i++)
01204 aux += (alpha_in(i) - sp_alpha[i])*(alpha_in(i) - sp_alpha[i]);
01205
01206 if ((-dfval/aux) < -0.5*tau_proximal)
01207 {
01208 tau_proximal_this = -tau_proximal;
01209 if (verbosity > 0)
01210 SG_DEBUG("tau threshold: %lf ", -dfval/aux);
01211 }
01212 }
01213 proximal_count++;
01214 } while (tau_proximal_this != 0.0 && proximal_count < 2);
01215
01216 t = clock();
01217
01218 nzin = 0;
01219 for (j = 0; j < chunk_size; j++)
01220 {
01221 if (nit == 0)
01222 aux = sp_alpha[j];
01223 else
01224 aux = sp_alpha[j] - alpha_in(j);
01225 if (fabs(aux) > DELTAsv)
01226 {
01227 indnzin[nzin] = index_in[j];
01228 grad[nzin] = aux * y_in(j);
01229 nzin++;
01230 }
01231 }
01232
01233
01234 if (KER->get_kernel()->has_property(KP_LINADD) && get_linadd_enabled())
01235 {
01236 KER->get_kernel()->clear_normal() ;
01237
01238 for (j = 0; j < nzin; j++)
01239 KER->get_kernel()->add_to_normal(indnzin[j], grad[j]);
01240
01241 if (nit == 0 && PreprocessMode > 0)
01242 {
01243 for (j = 0; j < nzout; j++)
01244 {
01245 jin = indnzout[j];
01246 KER->get_kernel()->add_to_normal(jin, alpha[jin] * y[jin]);
01247 }
01248 }
01249
01250 for (i = 0; i < ell; i++)
01251 st[i] += KER->get_kernel()->compute_optimized(i);
01252 }
01253 else
01254 {
01255 k = Cache->DivideMP(ing, indnzin, nzin);
01256 for (j = 0; j < k; j++)
01257 {
01258 ptmw = Cache->FillRow(indnzin[ing[j]]);
01259 for (i = 0; i < ell; i++)
01260 st[i] += grad[ing[j]] * ptmw[i];
01261 }
01262
01263 if (PreprocessMode > 0 && nit == 0)
01264 {
01265 clock_t ti2;
01266
01267 ti2 = clock();
01268 for (j = 0; j < nzout; j++)
01269 {
01270 jin = indnzout[j];
01271 ptmw = Cache->FillRow(jin);
01272 for (i = 0; i < ell; i++)
01273 st[i] += alpha[jin] * y[jin] * ptmw[i];
01274 }
01275 if (verbosity > 1)
01276 SG_INFO(
01277 " G*x0 time: %.2lf\n", (float64_t)(clock()-ti2)/CLOCKS_PER_SEC);
01278 }
01279 }
01280
01281
01282
01283 t = clock() - t;
01284 if (verbosity > 1)
01285 SG_INFO(
01286 " Gradient updating time: %.2lf\n", (float64_t)t/CLOCKS_PER_SEC);
01287 else if (verbosity > 0)
01288 SG_INFO( " %8.2lf |", (float64_t)t/CLOCKS_PER_SEC);
01289 tot_st_time += (float64_t)t/CLOCKS_PER_SEC;
01290
01291
01292 for (i = 0; i < chunk_size; i++)
01293 alpha_in(i) = sp_alpha[i];
01294
01295 if (verbosity > 1)
01296 {
01297 j = k = 0;
01298 for (i = 0; i < ell; i++)
01299 {
01300 if (is_free(i)) j++;
01301 if (is_bound(i)) k++;
01302 }
01303 SG_INFO(" SV: %d", j+k);
01304 SG_INFO(", BSV: %d\n", k);
01305 }
01306 Cache->Iteration();
01307 nit = nit+1;
01308 } while (!optimal() && !(CSignal::cancel_computations()));
01309
01310
01311
01312 t = clock();
01313 if ((t-ti) > 0)
01314 total_time += (float64_t)(t-ti) / CLOCKS_PER_SEC;
01315 else
01316 total_time += (float64_t)(ti-t) / CLOCKS_PER_SEC;
01317 ti = t;
01318
01319 memcpy(solution, alpha, ell * sizeof(float64_t));
01320
01321
01322 fval = 0.0;
01323 for (i = 0; i < ell; i++)
01324 fval += alpha[i]*(y[i]*st[i]*0.5 - 1.0);
01325
01326 SG_INFO("\n------+-----------+-----------+-------------+");
01327 SG_INFO("-----------+--------------\n");
01328 SG_INFO(
01329 "\n- TOTAL ITERATIONS: %i\n", nit);
01330
01331 if (verbosity > 1)
01332 {
01333 j = 0;
01334 k = 0;
01335 z = 0;
01336 for (i = ell - 1; i >= 0; i--)
01337 {
01338 if (cec[i] > 0) j++;
01339 if (cec[i] > 1) k++;
01340 if (cec[i] > 2) z++;
01341 }
01342 SG_INFO(
01343 "- Variables entering the working set at least one time: %i\n", j);
01344 SG_INFO(
01345 "- Variables entering the working set at least two times: %i\n", k);
01346 SG_INFO(
01347 "- Variables entering the working set at least three times: %i\n", z);
01348 }
01349
01350
01351 free(bmem);
01352 free(bmemrid);
01353 free(pbmr);
01354 free(cec);
01355 free(ing);
01356 free(inaux);
01357 free(indnzin);
01358 free(index_in);
01359 free(inold);
01360 free(incom);
01361 free(indnzout);
01362 free(index_out);
01363 free(vau);
01364 free(alpha);
01365 free(weight);
01366 free(grad);
01367 free(stloc);
01368 free(st);
01369 free(sp_h);
01370 free(sp_hloc);
01371 free(sp_y);
01372 free(sp_D);
01373 free(sp_alpha);
01374 delete Cache;
01375
01376 aux = KER->KernelEvaluations;
01377 SG_INFO( "- Total CPU time: %lf\n", total_time);
01378 if (verbosity > 0)
01379 {
01380 SG_INFO(
01381 "- Total kernel evaluations: %.0lf\n", aux);
01382 SG_INFO(
01383 "- Total inner solver iterations: %i\n", tot_vpm_iter);
01384 if (projection_projector == 1)
01385 SG_INFO(
01386 "- Total projector iterations: %i\n", tot_vpm_secant);
01387 SG_INFO(
01388 "- Total preparation time: %lf\n", tot_prep_time);
01389 SG_INFO(
01390 "- Total inner solver time: %lf\n", tot_vpm_time);
01391 SG_INFO(
01392 "- Total gradient updating time: %lf\n", tot_st_time);
01393 }
01394 SG_INFO( "- Objective function value: %lf\n", fval);
01395 objective_value=fval;
01396 return aux;
01397 }
01398
01399
01400
01401
01402 void quick_si(int32_t a[], int32_t n)
01403 {
01404 int32_t i, j, s, d, l, x, w, ps[20], pd[20];
01405
01406 l = 0;
01407 ps[0] = 0;
01408 pd[0] = n-1;
01409 do
01410 {
01411 s = ps[l];
01412 d = pd[l];
01413 l--;
01414 do
01415 {
01416 i = s;
01417 j = d;
01418 x = a[(s+d)/2];
01419 do
01420 {
01421 while (a[i] < x) i++;
01422 while (a[j] > x) j--;
01423 if (i <= j)
01424 {
01425 w = a[i];
01426 a[i] = a[j];
01427 i++;
01428 a[j] = w;
01429 j--;
01430 }
01431 } while(i<=j);
01432 if (j-s > d-i)
01433 {
01434 l++;
01435 ps[l] = s;
01436 pd[l] = j;
01437 s = i;
01438 }
01439 else
01440 {
01441 if (i < d)
01442 {
01443 l++;
01444 ps[l] = i;
01445 pd[l] = d;
01446 }
01447 d = j;
01448 }
01449 } while (s < d);
01450 } while (l >= 0);
01451 }
01452
01453
01454
01455
01456 void quick_s2(float64_t a[], int32_t n, int32_t ia[])
01457 {
01458 int32_t i, j, s, d, l, iw, ps[20], pd[20];
01459 float64_t x, w;
01460
01461 l = 0;
01462 ps[0] = 0;
01463 pd[0] = n-1;
01464 do
01465 {
01466 s = ps[l];
01467 d = pd[l];
01468 l--;
01469 do
01470 {
01471 i = s;
01472 j = d;
01473 x = a[(s+d)/2];
01474 do
01475 {
01476 while (a[i] < x) i++;
01477 while (a[j] > x) j--;
01478 if (i <= j)
01479 {
01480 iw = ia[i];
01481 w = a[i];
01482 ia[i] = ia[j];
01483 a[i] = a[j];
01484 i++;
01485 ia[j] = iw;
01486 a[j] = w;
01487 j--;
01488 }
01489 } while (i <= j);
01490 if (j-s > d-i)
01491 {
01492 l++;
01493 ps[l] = s;
01494 pd[l] = j;
01495 s = i;
01496 }
01497 else
01498 {
01499 if (i < d)
01500 {
01501 l++;
01502 ps[l] = i;
01503 pd[l] = d;
01504 }
01505 d = j;
01506 }
01507 } while (s < d);
01508 } while(l>=0);
01509 }
01510
01511
01512
01513
01514 void quick_s3(int32_t a[], int32_t n, int32_t ia[])
01515 {
01516 int32_t i, j, s, d, l, iw, w, x, ps[20], pd[20];
01517
01518 l = 0;
01519 ps[0] = 0;
01520 pd[0] = n-1;
01521 do
01522 {
01523 s = ps[l];
01524 d = pd[l];
01525 l--;
01526 do
01527 {
01528 i = s;
01529 j = d;
01530 x = a[(s+d)/2];
01531 do
01532 {
01533 while (a[i] < x) i++;
01534 while (a[j] > x) j--;
01535 if (i <= j)
01536 {
01537 iw = ia[i];
01538 w = a[i];
01539 ia[i] = ia[j];
01540 a[i] = a[j];
01541 i++;
01542 ia[j] = iw;
01543 a[j] = w;
01544 j--;
01545 }
01546 } while (i <= j);
01547 if (j-s > d-i)
01548 {
01549 l++;
01550 ps[l] = s;
01551 pd[l] = j;
01552 s = i;
01553 }
01554 else
01555 {
01556 if (i < d)
01557 {
01558 l++;
01559 ps[l] = i;
01560 pd[l] = d;
01561 }
01562 d = j;
01563 }
01564 } while (s < d);
01565 } while (l >= 0);
01566 }
01567
01568 #endif // DOXYGEN_SHOULD_SKIP_THIS
01569
01570
01571
01572