Impala Documentation

00001 #include <math.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <ctype.h>
00005 #include <float.h>
00006 #include <string.h>
00007 #include <stdarg.h>
00008 #include "svm.h"
00009 typedef float Qfloat;
00010 typedef signed char schar;
00011 #ifndef min
00012 template <class T> inline T min(T x,T y) { return (x<y)?x:y; }
00013 #endif
00014 #ifndef max
00015 template <class T> inline T max(T x,T y) { return (x>y)?x:y; }
00016 #endif
00017 template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00018 template <class S, class T> inline void clone(T*& dst, S* src, int n)
00019 {
00020         dst = new T[n];
00021         memcpy((void *)dst,(void *)src,sizeof(T)*n);
00022 }
00023 #define INF HUGE_VAL
00024 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00025 #if 1
00026 void info(char *fmt,...)
00027 {
00028         va_list ap;
00029         va_start(ap,fmt);
00030         vprintf(fmt,ap);
00031         va_end(ap);
00032 }
00033 void info_flush()
00034 {
00035         fflush(stdout);
00036 }
00037 #else
00038 void info(char *fmt,...) {}
00039 void info_flush() {}
00040 #endif
00041 
00042 //
00043 // Kernel Cache
00044 //
00045 // l is the number of total data items
00046 // size is the cache size limit in bytes
00047 //
00048 class Cache
00049 {
00050 public:
00051         Cache(int l,int size);
00052         ~Cache();
00053 
00054         // request data [0,len)
00055         // return some position p where [p,len) need to be filled
00056         // (p >= len if nothing needs to be filled)
00057         int get_data(const int index, Qfloat **data, int len);
00058         void swap_index(int i, int j);  // future_option
00059 private:
00060         int l;
00061         int size;
00062         struct head_t
00063         {
00064                 head_t *prev, *next;    // a cicular list
00065                 Qfloat *data;
00066                 int len;                // data[0,len) is cached in this entry
00067         };
00068 
00069         head_t* head;
00070         head_t lru_head;
00071         void lru_delete(head_t *h);
00072         void lru_insert(head_t *h);
00073 };
00074 
00075 Cache::Cache(int l_,int size_):l(l_),size(size_)
00076 {
00077         head = (head_t *)calloc(l,sizeof(head_t));      // initialized to 0
00078         size /= sizeof(Qfloat);
00079         size -= l * sizeof(head_t) / sizeof(Qfloat);
00080         lru_head.next = lru_head.prev = &lru_head;
00081 }
00082 
00083 Cache::~Cache()
00084 {
00085         for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00086                 free(h->data);
00087         free(head);
00088 }
00089 
00090 void Cache::lru_delete(head_t *h)
00091 {
00092         // delete from current location
00093         h->prev->next = h->next;
00094         h->next->prev = h->prev;
00095 }
00096 
00097 void Cache::lru_insert(head_t *h)
00098 {
00099         // insert to last position
00100         h->next = &lru_head;
00101         h->prev = lru_head.prev;
00102         h->prev->next = h;
00103         h->next->prev = h;
00104 }
00105 
00106 int Cache::get_data(const int index, Qfloat **data, int len)
00107 {
00108         head_t *h = &head[index];
00109         if(h->len) lru_delete(h);
00110         int more = len - h->len;
00111 
00112         if(more > 0)
00113         {
00114                 // free old space
00115                 while(size < more)
00116                 {
00117                         head_t *old = lru_head.next;
00118                         lru_delete(old);
00119                         free(old->data);
00120                         size += old->len;
00121                         old->data = 0;
00122                         old->len = 0;
00123                 }
00124 
00125                 // allocate new space
00126                 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00127                 size -= more;
00128                 std::swap(h->len,len);
00129         }
00130 
00131         lru_insert(h);
00132         *data = h->data;
00133         return len;
00134 }
00135 
00136 void Cache::swap_index(int i, int j)
00137 {
00138         if(i==j) return;
00139 
00140         if(head[i].len) lru_delete(&head[i]);
00141         if(head[j].len) lru_delete(&head[j]);
00142         std::swap(head[i].data,head[j].data);
00143         std::swap(head[i].len,head[j].len);
00144         if(head[i].len) lru_insert(&head[i]);
00145         if(head[j].len) lru_insert(&head[j]);
00146 
00147         if(i>j) std::swap(i,j);
00148         for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00149         {
00150                 if(h->len > i)
00151                 {
00152                         if(h->len > j)
00153                                 std::swap(h->data[i],h->data[j]);
00154                         else
00155                         {
00156                                 // give up
00157                                 lru_delete(h);
00158                                 free(h->data);
00159                                 size += h->len;
00160                                 h->data = 0;
00161                                 h->len = 0;
00162                         }
00163                 }
00164         }
00165 }
00166 
00167 //
00168 // Kernel evaluation
00169 //
00170 // the static method k_function is for doing single kernel evaluation
00171 // the constructor of Kernel prepares to calculate the l*l kernel matrix
00172 // the member function get_Q is for getting one column from the Q Matrix
00173 //
00174 class Kernel {
00175 public:
00176         Kernel(int l, svm_node * const * x, const svm_parameter& param);
00177         virtual ~Kernel();
00178 
00179         static double k_function(const svm_node *x, const svm_node *y,
00180                                  const svm_parameter& param);
00181         virtual Qfloat *get_Q(int column, int len) const = 0;
00182         virtual void swap_index(int i, int j) const     // no so const...
00183         {
00184                 std::swap(x[i],x[j]);
00185                 if(x_square) std::swap(x_square[i],x_square[j]);
00186         }
00187 protected:
00188 
00189         double (Kernel::*kernel_function)(int i, int j) const;
00190 
00191 private:
00192         const svm_node **x;
00193         double *x_square;
00194 
00195         // svm_parameter
00196         const int kernel_type;
00197         const double degree;
00198         const double gamma;
00199         const double coef0;
00200 
00201         static double dot(const svm_node *px, const svm_node *py);
00202         double kernel_linear(int i, int j) const
00203         {
00204                 return dot(x[i],x[j]);
00205         }
00206         double kernel_poly(int i, int j) const
00207         {
00208                 return pow(gamma*dot(x[i],x[j])+coef0,degree);
00209         }
00210         double kernel_rbf(int i, int j) const
00211         {
00212                 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00213         }
00214         double kernel_sigmoid(int i, int j) const
00215         {
00216                 return tanh(gamma*dot(x[i],x[j])+coef0);
00217         }
00218 };
00219 
00220 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00221 :kernel_type(param.kernel_type), degree(param.degree),
00222  gamma(param.gamma), coef0(param.coef0)
00223 {
00224         switch(kernel_type)
00225         {
00226                 case SVM_LINEAR:
00227                         kernel_function = &Kernel::kernel_linear;
00228                         break;
00229                 case POLY:
00230                         kernel_function = &Kernel::kernel_poly;
00231                         break;
00232                 case RBF:
00233                         kernel_function = &Kernel::kernel_rbf;
00234                         break;
00235                 case SIGMOID:
00236                         kernel_function = &Kernel::kernel_sigmoid;
00237                         break;
00238         }
00239 
00240         clone(x,x_,l);
00241 
00242         if(kernel_type == RBF)
00243         {
00244                 x_square = new double[l];
00245                 for(int i=0;i<l;i++)
00246                         x_square[i] = dot(x[i],x[i]);
00247         }
00248         else
00249                 x_square = 0;
00250 }
00251 
00252 Kernel::~Kernel()
00253 {
00254         delete[] x;
00255         delete[] x_square;
00256 }
00257 
00258 double Kernel::dot(const svm_node *px, const svm_node *py)
00259 {
00260         double sum = 0;
00261         while(px->index != -1 && py->index != -1)
00262         {
00263                 if(px->index == py->index)
00264                 {
00265                         sum += px->value * py->value;
00266                         ++px;
00267                         ++py;
00268                 }
00269                 else
00270                 {
00271                         if(px->index > py->index)
00272                                 ++py;
00273                         else
00274                                 ++px;
00275                 }                       
00276         }
00277         return sum;
00278 }
00279 
00280 double Kernel::k_function(const svm_node *x, const svm_node *y,
00281                           const svm_parameter& param)
00282 {
00283         switch(param.kernel_type)
00284         {
00285                 case SVM_LINEAR:
00286                         return dot(x,y);
00287                 case POLY:
00288                         return pow(param.gamma*dot(x,y)+param.coef0,param.degree);
00289                 case RBF:
00290                 {
00291                         double sum = 0;
00292                         while(x->index != -1 && y->index !=-1)
00293                         {
00294                                 if(x->index == y->index)
00295                                 {
00296                                         double d = x->value - y->value;
00297                                         sum += d*d;
00298                                         ++x;
00299                                         ++y;
00300                                 }
00301                                 else
00302                                 {
00303                                         if(x->index > y->index)
00304                                         {       
00305                                                 sum += y->value * y->value;
00306                                                 ++y;
00307                                         }
00308                                         else
00309                                         {
00310                                                 sum += x->value * x->value;
00311                                                 ++x;
00312                                         }
00313                                 }
00314                         }
00315 
00316                         while(x->index != -1)
00317                         {
00318                                 sum += x->value * x->value;
00319                                 ++x;
00320                         }
00321 
00322                         while(y->index != -1)
00323                         {
00324                                 sum += y->value * y->value;
00325                                 ++y;
00326                         }
00327                         
00328                         return exp(-param.gamma*sum);
00329                 }
00330                 case SIGMOID:
00331                         return tanh(param.gamma*dot(x,y)+param.coef0);
00332                 default:
00333                         return 0;       /* Unreachable */
00334         }
00335 }
00336 
00337 // Generalized SMO+SVMlight algorithm
00338 // Solves:
00339 //
00340 //      min 0.5(\alpha^T Q \alpha) + b^T \alpha
00341 //
00342 //              y^T \alpha = \delta
00343 //              y_i = +1 or -1
00344 //              0 <= alpha_i <= Cp for y_i = 1
00345 //              0 <= alpha_i <= Cn for y_i = -1
00346 //
00347 // Given:
00348 //
00349 //      Q, b, y, Cp, Cn, and an initial feasible point \alpha
00350 //      l is the size of vectors and matrices
00351 //      eps is the stopping criterion
00352 //
00353 // solution will be put in \alpha, objective value will be put in obj
00354 //
00355 class Solver {
00356 public:
00357         Solver() {};
00358         virtual ~Solver() {};
00359 
00360         struct SolutionInfo {
00361                 double obj;
00362                 double rho;
00363                 double upper_bound_p;
00364                 double upper_bound_n;
00365                 double r;       // for Solver_NU
00366         };
00367 
00368         void Solve(int l, const Kernel& Q, const double *b_, const schar *y_,
00369                    double *alpha_, double Cp, double Cn, double eps,
00370                    SolutionInfo* si, int shrinking);
00371 protected:
00372         int active_size;
00373         schar *y;
00374         double *G;              // gradient of objective function
00375         enum { LOWER_BOUND, UPPER_BOUND, FREE };
00376         char *alpha_status;     // LOWER_BOUND, UPPER_BOUND, FREE
00377         double *alpha;
00378         const Kernel *Q;
00379         double eps;
00380         double Cp,Cn;
00381         double *b;
00382         int *active_set;
00383         double *G_bar;          // gradient, if we treat free variables as 0
00384         int l;
00385         bool unshrinked;        // XXX
00386 
00387         double get_C(int i)
00388         {
00389                 return (y[i] > 0)? Cp : Cn;
00390         }
00391         void update_alpha_status(int i)
00392         {
00393                 if(alpha[i] >= get_C(i))
00394                         alpha_status[i] = UPPER_BOUND;
00395                 else if(alpha[i] <= 0)
00396                         alpha_status[i] = LOWER_BOUND;
00397                 else alpha_status[i] = FREE;
00398         }
00399         bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00400         bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00401         bool is_free(int i) { return alpha_status[i] == FREE; }
00402         void swap_index(int i, int j);
00403         void reconstruct_gradient();
00404         virtual int select_working_set(int &i, int &j);
00405         virtual double calculate_rho();
00406         virtual void do_shrinking();
00407 };
00408 
00409 void Solver::swap_index(int i, int j)
00410 {
00411         Q->swap_index(i,j);
00412         std::swap(y[i],y[j]);
00413         std::swap(G[i],G[j]);
00414         std::swap(alpha_status[i],alpha_status[j]);
00415         std::swap(alpha[i],alpha[j]);
00416         std::swap(b[i],b[j]);
00417         std::swap(active_set[i],active_set[j]);
00418         std::swap(G_bar[i],G_bar[j]);
00419 }
00420 
00421 void Solver::reconstruct_gradient()
00422 {
00423         // reconstruct inactive elements of G from G_bar and free variables
00424 
00425         if(active_size == l) return;
00426 
00427         int i;
00428         for(i=active_size;i<l;i++)
00429                 G[i] = G_bar[i] + b[i];
00430         
00431         for(i=0;i<active_size;i++)
00432                 if(is_free(i))
00433                 {
00434                         const Qfloat *Q_i = Q->get_Q(i,l);
00435                         double alpha_i = alpha[i];
00436                         for(int j=active_size;j<l;j++)
00437                                 G[j] += alpha_i * Q_i[j];
00438                 }
00439 }
00440 
00441 void Solver::Solve(int l, const Kernel& Q, const double *b_, const schar *y_,
00442                    double *alpha_, double Cp, double Cn, double eps,
00443                    SolutionInfo* si, int shrinking)
00444 {
00445         this->l = l;
00446         this->Q = &Q;
00447         clone(b, b_,l);
00448         clone(y, y_,l);
00449         clone(alpha,alpha_,l);
00450         this->Cp = Cp;
00451         this->Cn = Cn;
00452         this->eps = eps;
00453         unshrinked = false;
00454 
00455         // initialize alpha_status
00456         {
00457                 alpha_status = new char[l];
00458                 for(int i=0;i<l;i++)
00459                         update_alpha_status(i);
00460         }
00461 
00462         // initialize active set (for shrinking)
00463         {
00464                 active_set = new int[l];
00465                 for(int i=0;i<l;i++)
00466                         active_set[i] = i;
00467                 active_size = l;
00468         }
00469 
00470         // initialize gradient
00471         {
00472                 G = new double[l];
00473                 G_bar = new double[l];
00474                 int i;
00475                 for(i=0;i<l;i++)
00476                 {
00477                         G[i] = b[i];
00478                         G_bar[i] = 0;
00479                 }
00480                 for(i=0;i<l;i++)
00481                         if(!is_lower_bound(i))
00482                         {
00483                                 Qfloat *Q_i = Q.get_Q(i,l);
00484                                 double alpha_i = alpha[i];
00485                                 int j;
00486                                 for(j=0;j<l;j++)
00487                                         G[j] += alpha_i*Q_i[j];
00488                                 if(is_upper_bound(i))
00489                                         for(j=0;j<l;j++)
00490                                                 G_bar[j] += get_C(i) * Q_i[j];
00491                         }
00492         }
00493 
00494         // optimization step
00495 
00496         int iter = 0;
00497         int counter = min(l,1000)+1;
00498 
00499         while(1)
00500         {
00501                 // show progress and do shrinking
00502 
00503                 if(--counter == 0)
00504                 {
00505                         counter = min(l,1000);
00506                         if(shrinking) do_shrinking();
00507                         info("."); info_flush();
00508                 }
00509 
00510                 int i,j;
00511                 if(select_working_set(i,j)!=0)
00512                 {
00513                         // reconstruct the whole gradient
00514                         reconstruct_gradient();
00515                         // reset active set size and check
00516                         active_size = l;
00517                         info("*"); info_flush();
00518                         if(select_working_set(i,j)!=0)
00519                                 break;
00520                         else
00521                                 counter = 1;    // do shrinking next iteration
00522                 }
00523                 
00524                 ++iter;
00525 
00526                 // update alpha[i] and alpha[j], handle bounds carefully
00527                 
00528                 const Qfloat *Q_i = Q.get_Q(i,active_size);
00529                 const Qfloat *Q_j = Q.get_Q(j,active_size);
00530 
00531                 double C_i = get_C(i);
00532                 double C_j = get_C(j);
00533 
00534                 double old_alpha_i = alpha[i];
00535                 double old_alpha_j = alpha[j];
00536 
00537                 if(y[i]!=y[j])
00538                 {
00539                         double delta = (-G[i]-G[j])/std::max(Q_i[i]+Q_j[j]+2*Q_i[j],(Qfloat)0);
00540                         double diff = alpha[i] - alpha[j];
00541                         alpha[i] += delta;
00542                         alpha[j] += delta;
00543                         
00544                         if(diff > 0)
00545                         {
00546                                 if(alpha[j] < 0)
00547                                 {
00548                                         alpha[j] = 0;
00549                                         alpha[i] = diff;
00550                                 }
00551                         }
00552                         else
00553                         {
00554                                 if(alpha[i] < 0)
00555                                 {
00556                                         alpha[i] = 0;
00557                                         alpha[j] = -diff;
00558                                 }
00559                         }
00560                         if(diff > C_i - C_j)
00561                         {
00562                                 if(alpha[i] > C_i)
00563                                 {
00564                                         alpha[i] = C_i;
00565                                         alpha[j] = C_i - diff;
00566                                 }
00567                         }
00568                         else
00569                         {
00570                                 if(alpha[j] > C_j)
00571                                 {
00572                                         alpha[j] = C_j;
00573                                         alpha[i] = C_j + diff;
00574                                 }
00575                         }
00576                 }
00577                 else
00578                 {
00579                         double delta = (G[i]-G[j])/std::max(Q_i[i]+Q_j[j]-2*Q_i[j],(Qfloat)0);
00580                         double sum = alpha[i] + alpha[j];
00581                         alpha[i] -= delta;
00582                         alpha[j] += delta;
00583                         if(sum > C_i)
00584                         {
00585                                 if(alpha[i] > C_i)
00586                                 {
00587                                         alpha[i] = C_i;
00588                                         alpha[j] = sum - C_i;
00589                                 }
00590                         }
00591                         else
00592                         {
00593                                 if(alpha[j] < 0)
00594                                 {
00595                                         alpha[j] = 0;
00596                                         alpha[i] = sum;
00597                                 }
00598                         }
00599                         if(sum > C_j)
00600                         {
00601                                 if(alpha[j] > C_j)
00602                                 {
00603                                         alpha[j] = C_j;
00604                                         alpha[i] = sum - C_j;
00605                                 }
00606                         }
00607                         else
00608                         {
00609                                 if(alpha[i] < 0)
00610                                 {
00611                                         alpha[i] = 0;
00612                                         alpha[j] = sum;
00613                                 }
00614                         }
00615                 }
00616 
00617                 // update G
00618 
00619                 double delta_alpha_i = alpha[i] - old_alpha_i;
00620                 double delta_alpha_j = alpha[j] - old_alpha_j;
00621                 
00622                 for(int k=0;k<active_size;k++)
00623                 {
00624                         G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00625                 }
00626 
00627                 // update alpha_status and G_bar
00628 
00629                 {
00630                         bool ui = is_upper_bound(i);
00631                         bool uj = is_upper_bound(j);
00632                         update_alpha_status(i);
00633                         update_alpha_status(j);
00634                         int k;
00635                         if(ui != is_upper_bound(i))
00636                         {
00637                                 Q_i = Q.get_Q(i,l);
00638                                 if(ui)
00639                                         for(k=0;k<l;k++)
00640                                                 G_bar[k] -= C_i * Q_i[k];
00641                                 else
00642                                         for(k=0;k<l;k++)
00643                                                 G_bar[k] += C_i * Q_i[k];
00644                         }
00645 
00646                         if(uj != is_upper_bound(j))
00647                         {
00648                                 Q_j = Q.get_Q(j,l);
00649                                 if(uj)
00650                                         for(k=0;k<l;k++)
00651                                                 G_bar[k] -= C_j * Q_j[k];
00652                                 else
00653                                         for(k=0;k<l;k++)
00654                                                 G_bar[k] += C_j * Q_j[k];
00655                         }
00656                 }
00657         }
00658 
00659         // calculate rho
00660 
00661         si->rho = calculate_rho();
00662 
00663         // calculate objective value
00664         {
00665                 double v = 0;
00666                 int i;
00667                 for(i=0;i<l;i++)
00668                         v += alpha[i] * (G[i] + b[i]);
00669 
00670                 si->obj = v/2;
00671         }
00672 
00673         // put back the solution
00674         {
00675                 for(int i=0;i<l;i++)
00676                         alpha_[active_set[i]] = alpha[i];
00677         }
00678 
00679         // juggle everything back
00680         /*{
00681                 for(int i=0;i<l;i++)
00682                         while(active_set[i] != i)
00683                                 swap_index(i,active_set[i]);
00684                                 // or Q.swap_index(i,active_set[i]);
00685         }*/
00686 
00687         si->upper_bound_p = Cp;
00688         si->upper_bound_n = Cn;
00689 
00690         info("\noptimization finished, #iter = %d\n",iter);
00691 
00692         delete[] b;
00693         delete[] y;
00694         delete[] alpha;
00695         delete[] alpha_status;
00696         delete[] active_set;
00697         delete[] G;
00698         delete[] G_bar;
00699 }
00700 
00701 // return 1 if already optimal, return 0 otherwise
00702 int Solver::select_working_set(int &out_i, int &out_j)
00703 {
00704         // return i,j which maximize -grad(f)^T d , under constraint
00705         // if alpha_i == C, d != +1
00706         // if alpha_i == 0, d != -1
00707 
00708         double Gmax1 = -INF;            // max { -grad(f)_i * d | y_i*d = +1 }
00709         int Gmax1_idx = -1;
00710 
00711         double Gmax2 = -INF;            // max { -grad(f)_i * d | y_i*d = -1 }
00712         int Gmax2_idx = -1;
00713 
00714         for(int i=0;i<active_size;i++)
00715         {
00716                 if(y[i]==+1)    // y = +1
00717                 {
00718                         if(!is_upper_bound(i))  // d = +1
00719                         {
00720                                 if(-G[i] > Gmax1)
00721                                 {
00722                                         Gmax1 = -G[i];
00723                                         Gmax1_idx = i;
00724                                 }
00725                         }
00726                         if(!is_lower_bound(i))  // d = -1
00727                         {
00728                                 if(G[i] > Gmax2)
00729                                 {
00730                                         Gmax2 = G[i];
00731                                         Gmax2_idx = i;
00732                                 }
00733                         }
00734                 }
00735                 else            // y = -1
00736                 {
00737                         if(!is_upper_bound(i))  // d = +1
00738                         {
00739                                 if(-G[i] > Gmax2)
00740                                 {
00741                                         Gmax2 = -G[i];
00742                                         Gmax2_idx = i;
00743                                 }
00744                         }
00745                         if(!is_lower_bound(i))  // d = -1
00746                         {
00747                                 if(G[i] > Gmax1)
00748                                 {
00749                                         Gmax1 = G[i];
00750                                         Gmax1_idx = i;
00751                                 }
00752                         }
00753                 }
00754         }
00755 
00756         if(Gmax1+Gmax2 < eps)
00757                 return 1;
00758 
00759         out_i = Gmax1_idx;
00760         out_j = Gmax2_idx;
00761         return 0;
00762 }
00763 
00764 void Solver::do_shrinking()
00765 {
00766         int i,j,k;
00767         if(select_working_set(i,j)!=0) return;
00768         double Gm1 = -y[j]*G[j];
00769         double Gm2 = y[i]*G[i];
00770 
00771         // shrink
00772         
00773         for(k=0;k<active_size;k++)
00774         {
00775                 if(is_lower_bound(k))
00776                 {
00777                         if(y[k]==+1)
00778                         {
00779                                 if(-G[k] >= Gm1) continue;
00780                         }
00781                         else    if(-G[k] >= Gm2) continue;
00782                 }
00783                 else if(is_upper_bound(k))
00784                 {
00785                         if(y[k]==+1)
00786                         {
00787                                 if(G[k] >= Gm2) continue;
00788                         }
00789                         else    if(G[k] >= Gm1) continue;
00790                 }
00791                 else continue;
00792 
00793                 --active_size;
00794                 swap_index(k,active_size);
00795                 --k;    // look at the newcomer
00796         }
00797 
00798         // unshrink, check all variables again before final iterations
00799 
00800         if(unshrinked || -(Gm1 + Gm2) > eps*10) return;
00801         
00802         unshrinked = true;
00803         reconstruct_gradient();
00804 
00805         for(k=l-1;k>=active_size;k--)
00806         {
00807                 if(is_lower_bound(k))
00808                 {
00809                         if(y[k]==+1)
00810                         {
00811                                 if(-G[k] < Gm1) continue;
00812                         }
00813                         else    if(-G[k] < Gm2) continue;
00814                 }
00815                 else if(is_upper_bound(k))
00816                 {
00817                         if(y[k]==+1)
00818                         {
00819                                 if(G[k] < Gm2) continue;
00820                         }
00821                         else    if(G[k] < Gm1) continue;
00822                 }
00823                 else continue;
00824 
00825                 swap_index(k,active_size);
00826                 active_size++;
00827                 ++k;    // look at the newcomer
00828         }
00829 }
00830 
00831 double Solver::calculate_rho()
00832 {
00833         double r;
00834         int nr_free = 0;
00835         double ub = INF, lb = -INF, sum_free = 0;
00836         for(int i=0;i<active_size;i++)
00837         {
00838                 double yG = y[i]*G[i];
00839 
00840                 if(is_lower_bound(i))
00841                 {
00842                         if(y[i] > 0)
00843                                 ub = min(ub,yG);
00844                         else
00845                                 lb = std::max(lb,yG);
00846                 }
00847                 else if(is_upper_bound(i))
00848                 {
00849                         if(y[i] < 0)
00850                                 ub = min(ub,yG);
00851                         else
00852                                 lb = std::max(lb,yG);
00853                 }
00854                 else
00855                 {
00856                         ++nr_free;
00857                         sum_free += yG;
00858                 }
00859         }
00860 
00861         if(nr_free>0)
00862                 r = sum_free/nr_free;
00863         else
00864                 r = (ub+lb)/2;
00865 
00866         return r;
00867 }
00868 
00869 //
00870 // Solver for nu-svm classification and regression
00871 //
00872 // additional constraint: e^T \alpha = constant
00873 //
00874 class Solver_NU : public Solver
00875 {
00876 public:
00877         Solver_NU() {}
00878         void Solve(int l, const Kernel& Q, const double *b, const schar *y,
00879                    double *alpha, double Cp, double Cn, double eps,
00880                    SolutionInfo* si, int shrinking)
00881         {
00882                 this->si = si;
00883                 Solver::Solve(l,Q,b,y,alpha,Cp,Cn,eps,si,shrinking);
00884         }
00885 private:
00886         SolutionInfo *si;
00887         int select_working_set(int &i, int &j);
00888         double calculate_rho();
00889         void do_shrinking();
00890 };
00891 
00892 int Solver_NU::select_working_set(int &out_i, int &out_j)
00893 {
00894         // return i,j which maximize -grad(f)^T d , under constraint
00895         // if alpha_i == C, d != +1
00896         // if alpha_i == 0, d != -1
00897 
00898         double Gmax1 = -INF;    // max { -grad(f)_i * d | y_i = +1, d = +1 }
00899         int Gmax1_idx = -1;
00900 
00901         double Gmax2 = -INF;    // max { -grad(f)_i * d | y_i = +1, d = -1 }
00902         int Gmax2_idx = -1;
00903 
00904         double Gmax3 = -INF;    // max { -grad(f)_i * d | y_i = -1, d = +1 }
00905         int Gmax3_idx = -1;
00906 
00907         double Gmax4 = -INF;    // max { -grad(f)_i * d | y_i = -1, d = -1 }
00908         int Gmax4_idx = -1;
00909 
00910         for(int i=0;i<active_size;i++)
00911         {
00912                 if(y[i]==+1)    // y == +1
00913                 {
00914                         if(!is_upper_bound(i))  // d = +1
00915                         {
00916                                 if(-G[i] > Gmax1)
00917                                 {
00918                                         Gmax1 = -G[i];
00919                                         Gmax1_idx = i;
00920                                 }
00921                         }
00922                         if(!is_lower_bound(i))  // d = -1
00923                         {
00924                                 if(G[i] > Gmax2)
00925                                 {
00926                                         Gmax2 = G[i];
00927                                         Gmax2_idx = i;
00928                                 }
00929                         }
00930                 }
00931                 else            // y == -1
00932                 {
00933                         if(!is_upper_bound(i))  // d = +1
00934                         {
00935                                 if(-G[i] > Gmax3)
00936                                 {
00937                                         Gmax3 = -G[i];
00938                                         Gmax3_idx = i;
00939                                 }
00940                         }
00941                         if(!is_lower_bound(i))  // d = -1
00942                         {
00943                                 if(G[i] > Gmax4)
00944                                 {
00945                                         Gmax4 = G[i];
00946                                         Gmax4_idx = i;
00947                                 }
00948                         }
00949                 }
00950         }
00951 
00952         if(std::max(Gmax1+Gmax2,Gmax3+Gmax4) < eps)
00953                 return 1;
00954 
00955         if(Gmax1+Gmax2 > Gmax3+Gmax4)
00956         {
00957                 out_i = Gmax1_idx;
00958                 out_j = Gmax2_idx;
00959         }
00960         else
00961         {
00962                 out_i = Gmax3_idx;
00963                 out_j = Gmax4_idx;
00964         }
00965         return 0;
00966 }
00967 
00968 void Solver_NU::do_shrinking()
00969 {
00970         double Gmax1 = -INF;    // max { -grad(f)_i * d | y_i = +1, d = +1 }
00971         double Gmax2 = -INF;    // max { -grad(f)_i * d | y_i = +1, d = -1 }
00972         double Gmax3 = -INF;    // max { -grad(f)_i * d | y_i = -1, d = +1 }
00973         double Gmax4 = -INF;    // max { -grad(f)_i * d | y_i = -1, d = -1 }
00974 
00975         int k;
00976         for(k=0;k<active_size;k++)
00977         {
00978                 if(!is_upper_bound(k))
00979                 {
00980                         if(y[k]==+1)
00981                         {
00982                                 if(-G[k] > Gmax1) Gmax1 = -G[k];
00983                         }
00984                         else    if(-G[k] > Gmax3) Gmax3 = -G[k];
00985                 }
00986                 if(!is_lower_bound(k))
00987                 {
00988                         if(y[k]==+1)
00989                         {       
00990                                 if(G[k] > Gmax2) Gmax2 = G[k];
00991                         }
00992                         else    if(G[k] > Gmax4) Gmax4 = G[k];
00993                 }
00994         }
00995 
00996         double Gm1 = -Gmax2;
00997         double Gm2 = -Gmax1;
00998         double Gm3 = -Gmax4;
00999         double Gm4 = -Gmax3;
01000 
01001         for(k=0;k<active_size;k++)
01002         {
01003                 if(is_lower_bound(k))
01004                 {
01005                         if(y[k]==+1)
01006                         {
01007                                 if(-G[k] >= Gm1) continue;
01008                         }
01009                         else    if(-G[k] >= Gm3) continue;
01010                 }
01011                 else if(is_upper_bound(k))
01012                 {
01013                         if(y[k]==+1)
01014                         {
01015                                 if(G[k] >= Gm2) continue;
01016                         }
01017                         else    if(G[k] >= Gm4) continue;
01018                 }
01019                 else continue;
01020 
01021                 --active_size;
01022                 swap_index(k,active_size);
01023                 --k;    // look at the newcomer
01024         }
01025 
01026         // unshrink, check all variables again before final iterations
01027 
01028         if(unshrinked || std::max(-(Gm1+Gm2),-(Gm3+Gm4)) > eps*10) return;
01029         
01030         unshrinked = true;
01031         reconstruct_gradient();
01032 
01033         for(k=l-1;k>=active_size;k--)
01034         {
01035                 if(is_lower_bound(k))
01036                 {
01037                         if(y[k]==+1)
01038                         {
01039                                 if(-G[k] < Gm1) continue;
01040                         }
01041                         else    if(-G[k] < Gm3) continue;
01042                 }
01043                 else if(is_upper_bound(k))
01044                 {
01045                         if(y[k]==+1)
01046                         {
01047                                 if(G[k] < Gm2) continue;
01048                         }
01049                         else    if(G[k] < Gm4) continue;
01050                 }
01051                 else continue;
01052 
01053                 swap_index(k,active_size);
01054                 active_size++;
01055                 ++k;    // look at the newcomer
01056         }
01057 }
01058 
01059 double Solver_NU::calculate_rho()
01060 {
01061         int nr_free1 = 0,nr_free2 = 0;
01062         double ub1 = INF, ub2 = INF;
01063         double lb1 = -INF, lb2 = -INF;
01064         double sum_free1 = 0, sum_free2 = 0;
01065 
01066         for(int i=0;i<active_size;i++)
01067         {
01068                 if(y[i]==+1)
01069                 {
01070                         if(is_lower_bound(i))
01071                                 ub1 = min(ub1,G[i]);
01072                         else if(is_upper_bound(i))
01073                                 lb1 = std::max(lb1,G[i]);
01074                         else
01075                         {
01076                                 ++nr_free1;
01077                                 sum_free1 += G[i];
01078                         }
01079                 }
01080                 else
01081                 {
01082                         if(is_lower_bound(i))
01083                                 ub2 = min(ub2,G[i]);
01084                         else if(is_upper_bound(i))
01085                                 lb2 = std::max(lb2,G[i]);
01086                         else
01087                         {
01088                                 ++nr_free2;
01089                                 sum_free2 += G[i];
01090                         }
01091                 }
01092         }
01093 
01094         double r1,r2;
01095         if(nr_free1 > 0)
01096                 r1 = sum_free1/nr_free1;
01097         else
01098                 r1 = (ub1+lb1)/2;
01099         
01100         if(nr_free2 > 0)
01101                 r2 = sum_free2/nr_free2;
01102         else
01103                 r2 = (ub2+lb2)/2;
01104         
01105         si->r = (r1+r2)/2;
01106         return (r1-r2)/2;
01107 }
01108 
01109 //
01110 // Q matrices for various formulations
01111 //
01112 class SVC_Q: public Kernel
01113 { 
01114 public:
01115         SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01116         :Kernel(prob.l, prob.x, param)
01117         {
01118                 clone(y,y_,prob.l);
01119                 cache = new Cache(prob.l,(int)(param.cache_size*(1<<20)));
01120         }
01121         
01122         Qfloat *get_Q(int i, int len) const
01123         {
01124                 Qfloat *data;
01125                 int start;
01126                 if((start = cache->get_data(i,&data,len)) < len)
01127                 {
01128                         for(int j=start;j<len;j++)
01129                                 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01130                 }
01131                 return data;
01132         }
01133 
01134         void swap_index(int i, int j) const
01135         {
01136                 cache->swap_index(i,j);
01137                 Kernel::swap_index(i,j);
01138                 std::swap(y[i],y[j]);
01139         }
01140 
01141         ~SVC_Q()
01142         {
01143                 delete[] y;
01144                 delete cache;
01145         }
01146 private:
01147         schar *y;
01148         Cache *cache;
01149 };
01150 
01151 class ONE_CLASS_Q: public Kernel
01152 {
01153 public:
01154         ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01155         :Kernel(prob.l, prob.x, param)
01156         {
01157                 cache = new Cache(prob.l,(int)(param.cache_size*(1<<20)));
01158         }
01159         
01160         Qfloat *get_Q(int i, int len) const
01161         {
01162                 Qfloat *data;
01163                 int start;
01164                 if((start = cache->get_data(i,&data,len)) < len)
01165                 {
01166                         for(int j=start;j<len;j++)
01167                                 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01168                 }
01169                 return data;
01170         }
01171 
01172         void swap_index(int i, int j) const
01173         {
01174                 cache->swap_index(i,j);
01175                 Kernel::swap_index(i,j);
01176         }
01177 
01178         ~ONE_CLASS_Q()
01179         {
01180                 delete cache;
01181         }
01182 private:
01183         Cache *cache;
01184 };
01185 
01186 class SVR_Q: public Kernel
01187 { 
01188 public:
01189         SVR_Q(const svm_problem& prob, const svm_parameter& param)
01190         :Kernel(prob.l, prob.x, param)
01191         {
01192                 l = prob.l;
01193                 cache = new Cache(l,(int)(param.cache_size*(1<<20)));
01194                 sign = new schar[2*l];
01195                 index = new int[2*l];
01196                 for(int k=0;k<l;k++)
01197                 {
01198                         sign[k] = 1;
01199                         sign[k+l] = -1;
01200                         index[k] = k;
01201                         index[k+l] = k;
01202                 }
01203                 buffer[0] = new Qfloat[2*l];
01204                 buffer[1] = new Qfloat[2*l];
01205                 next_buffer = 0;
01206         }
01207 
01208         void swap_index(int i, int j) const
01209         {
01210                 std::swap(sign[i],sign[j]);
01211                 std::swap(index[i],index[j]);
01212         }
01213         
01214         Qfloat *get_Q(int i, int len) const
01215         {
01216                 Qfloat *data;
01217                 int real_i = index[i];
01218                 if(cache->get_data(real_i,&data,l) < l)
01219                 {
01220                         for(int j=0;j<l;j++)
01221                                 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01222                 }
01223 
01224                 // reorder and copy
01225                 Qfloat *buf = buffer[next_buffer];
01226                 next_buffer = 1 - next_buffer;
01227                 schar si = sign[i];
01228                 for(int j=0;j<len;j++)
01229                         buf[j] = si * sign[j] * data[index[j]];
01230                 return buf;
01231         }
01232 
01233         ~SVR_Q()
01234         {
01235                 delete cache;
01236                 delete[] sign;
01237                 delete[] index;
01238                 delete[] buffer[0];
01239                 delete[] buffer[1];
01240         }
01241 private:
01242         int l;
01243         Cache *cache;
01244         schar *sign;
01245         int *index;
01246         mutable int next_buffer;
01247         Qfloat* buffer[2];
01248 };
01249 
01250 //
01251 // construct and solve various formulations
01252 //
01253 static void solve_c_svc(
01254         const svm_problem *prob, const svm_parameter* param,
01255         double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01256 {
01257         int l = prob->l;
01258         double *minus_ones = new double[l];
01259         schar *y = new schar[l];
01260 
01261         int i;
01262 
01263         for(i=0;i<l;i++)
01264         {
01265                 alpha[i] = 0;
01266                 minus_ones[i] = -1;
01267                 if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
01268         }
01269 
01270         Solver s;
01271         s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01272                 alpha, Cp, Cn, param->eps, si, param->shrinking);
01273 
01274         double sum_alpha=0;
01275         for(i=0;i<l;i++)
01276                 sum_alpha += alpha[i];
01277 
01278         if (Cp==Cn)
01279                 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01280 
01281         for(i=0;i<l;i++)
01282                 alpha[i] *= y[i];
01283 
01284         delete[] minus_ones;
01285         delete[] y;
01286 }
01287 
01288 static void solve_nu_svc(
01289         const svm_problem *prob, const svm_parameter *param,
01290         double *alpha, Solver::SolutionInfo* si)
01291 {
01292         int i;
01293         int l = prob->l;
01294         double nu = param->nu;
01295 
01296         schar *y = new schar[l];
01297 
01298         for(i=0;i<l;i++)
01299                 if(prob->y[i]>0)
01300                         y[i] = +1;
01301                 else
01302                         y[i] = -1;
01303 
01304         double sum_pos = nu*l/2;
01305         double sum_neg = nu*l/2;
01306 
01307         for(i=0;i<l;i++)
01308                 if(y[i] == +1)
01309                 {
01310                         alpha[i] = min(1.0,sum_pos);
01311                         sum_pos -= alpha[i];
01312                 }
01313                 else
01314                 {
01315                         alpha[i] = min(1.0,sum_neg);
01316                         sum_neg -= alpha[i];
01317                 }
01318 
01319         double *zeros = new double[l];
01320 
01321         for(i=0;i<l;i++)
01322                 zeros[i] = 0;
01323 
01324         Solver_NU s;
01325         s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01326                 alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
01327         double r = si->r;
01328 
01329         info("C = %f\n",1/r);
01330 
01331         for(i=0;i<l;i++)
01332                 alpha[i] *= y[i]/r;
01333 
01334         si->rho /= r;
01335         si->obj /= (r*r);
01336         si->upper_bound_p = 1/r;
01337         si->upper_bound_n = 1/r;
01338 
01339         delete[] y;
01340         delete[] zeros;
01341 }
01342 
01343 static void solve_one_class(
01344         const svm_problem *prob, const svm_parameter *param,
01345         double *alpha, Solver::SolutionInfo* si)
01346 {
01347         int l = prob->l;
01348         double *zeros = new double[l];
01349         schar *ones = new schar[l];
01350         int i;
01351 
01352         int n = (int)(param->nu*prob->l);       // # of alpha's at upper bound
01353 
01354         for(i=0;i<n;i++)
01355                 alpha[i] = 1;
01356         alpha[n] = param->nu * prob->l - n;
01357         for(i=n+1;i<l;i++)
01358                 alpha[i] = 0;
01359 
01360         for(i=0;i<l;i++)
01361         {
01362                 zeros[i] = 0;
01363                 ones[i] = 1;
01364         }
01365 
01366         Solver s;
01367         s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01368                 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01369 
01370         delete[] zeros;
01371         delete[] ones;
01372 }
01373 
01374 static void solve_epsilon_svr(
01375         const svm_problem *prob, const svm_parameter *param,
01376         double *alpha, Solver::SolutionInfo* si)
01377 {
01378         int l = prob->l;
01379         double *alpha2 = new double[2*l];
01380         double *linear_term = new double[2*l];
01381         schar *y = new schar[2*l];
01382         int i;
01383 
01384         for(i=0;i<l;i++)
01385         {
01386                 alpha2[i] = 0;
01387                 linear_term[i] = param->p - prob->y[i];
01388                 y[i] = 1;
01389 
01390                 alpha2[i+l] = 0;
01391                 linear_term[i+l] = param->p + prob->y[i];
01392                 y[i+l] = -1;
01393         }
01394 
01395         Solver s;
01396         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01397                 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01398 
01399         double sum_alpha = 0;
01400         for(i=0;i<l;i++)
01401         {
01402                 alpha[i] = alpha2[i] - alpha2[i+l];
01403                 sum_alpha += fabs(alpha[i]);
01404         }
01405         info("nu = %f\n",sum_alpha/(param->C*l));
01406 
01407         delete[] alpha2;
01408         delete[] linear_term;
01409         delete[] y;
01410 }
01411 
01412 static void solve_nu_svr(
01413         const svm_problem *prob, const svm_parameter *param,
01414         double *alpha, Solver::SolutionInfo* si)
01415 {
01416         int l = prob->l;
01417         double C = param->C;
01418         double *alpha2 = new double[2*l];
01419         double *linear_term = new double[2*l];
01420         schar *y = new schar[2*l];
01421         int i;
01422 
01423         double sum = C * param->nu * l / 2;
01424         for(i=0;i<l;i++)
01425         {
01426                 alpha2[i] = alpha2[i+l] = min(sum,C);
01427                 sum -= alpha2[i];
01428 
01429                 linear_term[i] = - prob->y[i];
01430                 y[i] = 1;
01431 
01432                 linear_term[i+l] = prob->y[i];
01433                 y[i+l] = -1;
01434         }
01435 
01436         Solver_NU s;
01437         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01438                 alpha2, C, C, param->eps, si, param->shrinking);
01439 
01440         info("epsilon = %f\n",-si->r);
01441 
01442         for(i=0;i<l;i++)
01443                 alpha[i] = alpha2[i] - alpha2[i+l];
01444 
01445         delete[] alpha2;
01446         delete[] linear_term;
01447         delete[] y;
01448 }
01449 
01450 //
01451 // decision_function
01452 //
01453 struct decision_function
01454 {
01455         double *alpha;
01456         double rho;     
01457 };
01458 
01459 decision_function svm_train_one(
01460         const svm_problem *prob, const svm_parameter *param,
01461         double Cp, double Cn)
01462 {
01463         double *alpha = Malloc(double,prob->l);
01464         Solver::SolutionInfo si;
01465         switch(param->svm_type)
01466         {
01467                 case C_SVC:
01468                         solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01469                         break;
01470                 case NU_SVC:
01471                         solve_nu_svc(prob,param,alpha,&si);
01472                         break;
01473                 case ONE_CLASS:
01474                         solve_one_class(prob,param,alpha,&si);
01475                         break;
01476                 case EPSILON_SVR:
01477                         solve_epsilon_svr(prob,param,alpha,&si);
01478                         break;
01479                 case NU_SVR:
01480                         solve_nu_svr(prob,param,alpha,&si);
01481                         break;
01482         }
01483 
01484         info("obj = %f, rho = %f\n",si.obj,si.rho);
01485 
01486         // output SVs
01487 
01488         int nSV = 0;
01489         int nBSV = 0;
01490         for(int i=0;i<prob->l;i++)
01491         {
01492                 if(fabs(alpha[i]) > 0)
01493                 {
01494                         ++nSV;
01495                         if(prob->y[i] > 0)
01496                         {
01497                                 if(fabs(alpha[i]) >= si.upper_bound_p)
01498                                         ++nBSV;
01499                         }
01500                         else
01501                         {
01502                                 if(fabs(alpha[i]) >= si.upper_bound_n)
01503                                         ++nBSV;
01504                         }
01505                 }
01506         }
01507 
01508         info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01509 
01510         decision_function f;
01511         f.alpha = alpha;
01512         f.rho = si.rho;
01513         return f;
01514 }
01515 
01516 //
01517 // svm_model
01518 //
01519 struct svm_model
01520 {
01521         svm_parameter param;    // parameter
01522         int nr_class;           // number of classes, = 2 in regression/one class svm
01523         int l;                  // total #SV
01524         svm_node **SV;          // SVs (SV[l])
01525         double **sv_coef;       // coefficients for SVs in decision functions (sv_coef[n-1][l])
01526         double *rho;            // constants in decision functions (rho[n*(n-1)/2])
01527         double *probA;          // pariwise probability information
01528         double *probB;
01529 
01530         // for classification only
01531 
01532         int *label;             // label of each class (label[n])
01533         int *nSV;               // number of SVs for each class (nSV[n])
01534                                 // nSV[0] + nSV[1] + ... + nSV[n-1] = l
01535         // XXX
01536         int free_sv;            // 1 if svm_model is created by svm_load_model
01537                                 // 0 if svm_model is created by svm_train
01538 };
01539 
01540 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
01541 void sigmoid_train(
01542         int l, const double *dec_values, const double *labels, 
01543         double& A, double& B)
01544 {
01545         double prior1=0, prior0 = 0;
01546         int i;
01547 
01548         for (i=0;i<l;i++)
01549                 if (labels[i] > 0) prior1+=1;
01550                 else prior0+=1;
01551         
01552         int max_iter=100;       // Maximal number of iterations
01553         double min_step=1e-10;  // Minimal step taken in line search
01554         double sigma=1e-3;      // For numerically strict PD of Hessian
01555         double eps=1e-5;
01556         double hiTarget=(prior1+1.0)/(prior1+2.0);
01557         double loTarget=1/(prior0+2.0);
01558         double *t=Malloc(double,l);
01559         double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
01560         double newA,newB,newf,d1,d2;
01561         int iter; 
01562         
01563         // Initial Point and Initial Fun Value
01564         A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01565         double fval = 0.0;
01566 
01567         for (i=0;i<l;i++)
01568         {
01569                 if (labels[i]>0) t[i]=hiTarget;
01570                 else t[i]=loTarget;
01571                 fApB = dec_values[i]*A+B;
01572                 if (fApB>=0)
01573                         fval += t[i]*fApB + log(1+exp(-fApB));
01574                 else
01575                         fval += (t[i] - 1)*fApB +log(1+exp(fApB));
01576         }
01577         for (iter=0;iter<max_iter;iter++)
01578         {
01579                 // Update Gradient and Hessian (use H' = H + sigma I)
01580                 h11=sigma; // numerically ensures strict PD
01581                 h22=sigma;
01582                 h21=0.0;g1=0.0;g2=0.0;
01583                 for (i=0;i<l;i++)
01584                 {
01585                         fApB = dec_values[i]*A+B;
01586                         if (fApB >= 0)
01587                         {
01588                                 p=exp(-fApB)/(1.0+exp(-fApB));
01589                                 q=1.0/(1.0+exp(-fApB));
01590                         }
01591                         else
01592                         {
01593                                 p=1.0/(1.0+exp(fApB));
01594                                 q=exp(fApB)/(1.0+exp(fApB));
01595                         }
01596                         d2=p*q;
01597                         h11+=dec_values[i]*dec_values[i]*d2;
01598                         h22+=d2;
01599                         h21+=dec_values[i]*d2;
01600                         d1=t[i]-p;
01601                         g1+=dec_values[i]*d1;
01602                         g2+=d1;
01603                 }
01604 
01605                 // Stopping Criteria
01606                 if (fabs(g1)<eps && fabs(g2)<eps)
01607                         break;
01608 
01609                 // Finding Newton direction: -inv(H') * g
01610                 det=h11*h22-h21*h21;
01611                 dA=-(h22*g1 - h21 * g2) / det;
01612                 dB=-(-h21*g1+ h11 * g2) / det;
01613                 gd=g1*dA+g2*dB;
01614 
01615 
01616                 stepsize = 1;           // Line Search
01617                 while (stepsize >= min_step)
01618                 {
01619                         newA = A + stepsize * dA;
01620                         newB = B + stepsize * dB;
01621 
01622                         // New function value
01623                         newf = 0.0;
01624                         for (i=0;i<l;i++)
01625                         {
01626                                 fApB = dec_values[i]*newA+newB;
01627                                 if (fApB >= 0)
01628                                         newf += t[i]*fApB + log(1+exp(-fApB));
01629                                 else
01630                                         newf += (t[i] - 1)*fApB +log(1+exp(fApB));
01631                         }
01632                         // Check sufficient decrease
01633                         if (newf<fval+0.0001*stepsize*gd)
01634                         {
01635                                 A=newA;B=newB;fval=newf;
01636                                 break;
01637                         }
01638                         else
01639                                 stepsize = stepsize / 2.0;
01640                 }
01641 
01642                 if (stepsize < min_step)
01643                 {
01644                         info("Line search fails in two-class probability estimates\n");
01645                         break;
01646                 }
01647         }
01648 
01649         if (iter>=max_iter)
01650                 info("Reaching maximal iterations in two-class probability estimates\n");
01651         free(t);
01652 }
01653 
01654 double sigmoid_predict(double decision_value, double A, double B)
01655 {
01656         double fApB = decision_value*A+B;
01657         if (fApB >= 0)
01658                 return exp(-fApB)/(1.0+exp(-fApB));
01659         else
01660                 return 1.0/(1+exp(fApB)) ;
01661 }
01662 
01663 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
01664 void multiclass_probability(int k, double **r, double *p)
01665 {
01666         int t;
01667         int iter = 0, max_iter=100;
01668         double **Q=Malloc(double *,k);
01669         double *Qp=Malloc(double,k);
01670         double pQp, eps=0.001;
01671         
01672         for (t=0;t<k;t++)
01673         {
01674                 p[t]=1.0/k;  // Valid if k = 1
01675                 Q[t]=Malloc(double,k);
01676                 Q[t][t]=0;
01677                 int j;
01678                 for ( j=0;j<t;j++)
01679                 {
01680                         Q[t][t]+=r[j][t]*r[j][t];
01681                         Q[t][j]=Q[j][t];
01682                 }
01683                 for ( j=t+1;j<k;j++)
01684                 {
01685                         Q[t][t]+=r[j][t]*r[j][t];
01686                         Q[t][j]=-r[j][t]*r[t][j];
01687                 }
01688         }
01689         for (iter=0;iter<max_iter;iter++)
01690         {
01691                 // stopping condition, recalculate QP,pQP for numerical accuracy
01692                 pQp=0;
01693                 for (t=0;t<k;t++)
01694                 {
01695                         Qp[t]=0;
01696                         for (int j=0;j<k;j++)
01697                                 Qp[t]+=Q[t][j]*p[j];
01698                         pQp+=p[t]*Qp[t];
01699                 }
01700                 double max_error=0;
01701                 for (t=0;t<k;t++)
01702                 {
01703                         double error=fabs(Qp[t]-pQp);
01704                         if (error>max_error)
01705                                 max_error=error;
01706                 }
01707                 if (max_error<eps) break;
01708                 
01709                 for (t=0;t<k;t++)
01710                 {
01711                         double diff=(-Qp[t]+pQp)/Q[t][t];
01712                         p[t]+=diff;
01713                         pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01714                         for (int j=0;j<k;j++)
01715                         {
01716                                 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01717                                 p[j]/=(1+diff);
01718                         }
01719                 }
01720         }
01721         if (iter>=max_iter)
01722                 info("Exceeds max_iter in multiclass_prob\n");
01723         for(t=0;t<k;t++) free(Q[t]);
01724         free(Q);
01725         free(Qp);
01726 }
01727 
01728 // Cross-validation decision values for probability estimates
01729 void svm_binary_svc_probability(
01730         const svm_problem *prob, const svm_parameter *param,
01731         double Cp, double Cn, double& probA, double& probB)
01732 {
01733         int i;
01734         int nr_fold = 5;
01735         int *perm = Malloc(int,prob->l);
01736         double *dec_values = Malloc(double,prob->l);
01737 
01738         // random shuffle
01739         for(i=0;i<prob->l;i++) perm[i]=i;
01740         for(i=0;i<prob->l;i++)
01741         {
01742                 int j = i+rand()%(prob->l-i);
01743                 std::swap(perm[i],perm[j]);
01744         }
01745         for(i=0;i<nr_fold;i++)
01746         {
01747                 int begin = i*prob->l/nr_fold;
01748                 int end = (i+1)*prob->l/nr_fold;
01749                 int j,k;
01750                 struct svm_problem subprob;
01751 
01752                 subprob.l = prob->l-(end-begin);
01753                 subprob.x = Malloc(struct svm_node*,subprob.l);
01754                 subprob.y = Malloc(double,subprob.l);
01755                         
01756                 k=0;
01757                 for(j=0;j<begin;j++)
01758                 {
01759                         subprob.x[k] = prob->x[perm[j]];
01760                         subprob.y[k] = prob->y[perm[j]];
01761                         ++k;
01762                 }
01763                 for(j=end;j<prob->l;j++)
01764                 {
01765                         subprob.x[k] = prob->x[perm[j]];
01766                         subprob.y[k] = prob->y[perm[j]];
01767                         ++k;
01768                 }
01769                 int p_count=0,n_count=0;
01770                 for(j=0;j<k;j++)
01771                         if(subprob.y[j]>0)
01772                                 p_count++;
01773                         else
01774                                 n_count++;
01775 
01776                 if(p_count==0 && n_count==0)
01777                         for(j=begin;j<end;j++)
01778                                 dec_values[perm[j]] = 0;
01779                 else if(p_count > 0 && n_count == 0)
01780                         for(j=begin;j<end;j++)
01781                                 dec_values[perm[j]] = 1;
01782                 else if(p_count == 0 && n_count > 0)
01783                         for(j=begin;j<end;j++)
01784                                 dec_values[perm[j]] = -1;
01785                 else
01786                 {
01787                         svm_parameter subparam = *param;
01788                         subparam.probability=0;
01789                         subparam.C=1.0;
01790                         subparam.nr_weight=2;
01791                         subparam.weight_label = Malloc(int,2);
01792                         subparam.weight = Malloc(double,2);
01793                         subparam.weight_label[0]=+1;
01794                         subparam.weight_label[1]=-1;
01795                         subparam.weight[0]=Cp;
01796                         subparam.weight[1]=Cn;
01797                         struct svm_model *submodel = svm_train(&subprob,&subparam);
01798                         for(j=begin;j<end;j++)
01799                         {
01800                                 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]])); 
01801                                 // ensure +1 -1 order; reason not using CV subroutine
01802                                 dec_values[perm[j]] *= submodel->label[0];
01803                         }               
01804                         svm_destroy_model(submodel);
01805                         svm_destroy_param(&subparam);
01806                         free(subprob.x);
01807                         free(subprob.y);
01808                 }
01809         }               
01810         sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
01811         free(dec_values);
01812         free(perm);
01813 }
01814 
01815 // Return parameter of a Laplace distribution 
01816 double svm_svr_probability(
01817         const svm_problem *prob, const svm_parameter *param)
01818 {
01819         int i;
01820         int nr_fold = 5;
01821         double *ymv = Malloc(double,prob->l);
01822         double mae = 0;
01823 
01824         svm_parameter newparam = *param;
01825         newparam.probability = 0;
01826         svm_cross_validation(prob,&newparam,nr_fold,ymv);
01827         for(i=0;i<prob->l;i++)
01828         {
01829                 ymv[i]=prob->y[i]-ymv[i];
01830                 mae += fabs(ymv[i]);
01831         }               
01832         mae /= prob->l;
01833         double std=sqrt(2*mae*mae);
01834         int count=0;
01835         mae=0;
01836         for(i=0;i<prob->l;i++)
01837                 if (fabs(ymv[i]) > 5*std) 
01838                         count=count+1;
01839                 else 
01840                         mae+=fabs(ymv[i]);
01841         mae /= (prob->l-count);
01842         info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
01843         free(ymv);
01844         return mae;
01845 }
01846 
01847 //
01848 // Interface functions
01849 //
01850 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
01851 {
01852         svm_model *model = Malloc(svm_model,1);
01853         model->param = *param;
01854         model->free_sv = 0;     // XXX
01855 
01856         if(param->svm_type == ONE_CLASS ||
01857            param->svm_type == EPSILON_SVR ||
01858            param->svm_type == NU_SVR)
01859         {
01860                 // regression or one-class-svm
01861                 model->nr_class = 2;
01862                 model->label = NULL;
01863                 model->nSV = NULL;
01864                 model->probA = NULL; model->probB = NULL;
01865                 model->sv_coef = Malloc(double *,1);
01866 
01867                 if(param->probability && 
01868                    (param->svm_type == EPSILON_SVR ||
01869                     param->svm_type == NU_SVR))
01870                 {
01871                         model->probA = Malloc(double,1);
01872                         model->probA[0] = svm_svr_probability(prob,param);
01873                 }
01874 
01875                 decision_function f = svm_train_one(prob,param,0,0);
01876                 model->rho = Malloc(double,1);
01877                 model->rho[0] = f.rho;
01878 
01879                 int nSV = 0;
01880                 int i;
01881                 for(i=0;i<prob->l;i++)
01882                         if(fabs(f.alpha[i]) > 0) ++nSV;
01883                 model->l = nSV;
01884                 model->SV = Malloc(svm_node *,nSV);
01885                 model->sv_coef[0] = Malloc(double,nSV);
01886                 int j = 0;
01887                 for(i=0;i<prob->l;i++)
01888                         if(fabs(f.alpha[i]) > 0)
01889                         {
01890                                 model->SV[j] = prob->x[i];
01891                                 model->sv_coef[0][j] = f.alpha[i];
01892                                 ++j;
01893                         }               
01894 
01895                 free(f.alpha);
01896         }
01897         else
01898         {
01899                 // classification
01900                 // find out the number of classes
01901                 int l = prob->l;
01902                 int max_nr_class = 16;
01903                 int nr_class = 0;
01904                 int *label = Malloc(int,max_nr_class);
01905                 int *count = Malloc(int,max_nr_class);
01906                 int *index = Malloc(int,l);
01907 
01908                 int i;
01909                 for(i=0;i<l;i++)
01910                 {
01911                         int this_label = (int)prob->y[i];
01912                         int j;
01913                         for(j=0;j<nr_class;j++)
01914                                 if(this_label == label[j])
01915                                 {
01916                                         ++count[j];
01917                                         break;
01918                                 }
01919                         index[i] = j;
01920                         if(j == nr_class)
01921                         {
01922                                 if(nr_class == max_nr_class)
01923                                 {
01924                                         max_nr_class *= 2;
01925                                         label = (int *)realloc(label,max_nr_class*sizeof(int));
01926                                         count = (int *)realloc(count,max_nr_class*sizeof(int));
01927                                 }
01928                                 label[nr_class] = this_label;
01929                                 count[nr_class] = 1;
01930                                 ++nr_class;
01931                         }
01932                 }
01933 
01934                 // group training data of the same class
01935 
01936                 int *start = Malloc(int,nr_class);
01937                 start[0] = 0;
01938                 for(i=1;i<nr_class;i++)
01939                         start[i] = start[i-1]+count[i-1];
01940 
01941                 svm_node **x = Malloc(svm_node *,l);
01942                 
01943                 for(i=0;i<l;i++)
01944                 {
01945                         x[start[index[i]]] = prob->x[i];
01946                         ++start[index[i]];
01947                 }
01948                 
01949                 start[0] = 0;
01950                 for(i=1;i<nr_class;i++)
01951                         start[i] = start[i-1]+count[i-1];
01952 
01953                 // calculate weighted C
01954 
01955                 double *weighted_C = Malloc(double, nr_class);
01956                 for(i=0;i<nr_class;i++)
01957                         weighted_C[i] = param->C;
01958                 for(i=0;i<param->nr_weight;i++)
01959                 {       
01960                         int j;
01961                         for(j=0;j<nr_class;j++)
01962                                 if(param->weight_label[i] == label[j])
01963                                         break;
01964                         if(j == nr_class)
01965                                 fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
01966                         else
01967                                 weighted_C[j] *= param->weight[i];
01968                 }
01969 
01970                 // train k*(k-1)/2 models
01971                 
01972                 bool *nonzero = Malloc(bool,l);
01973                 for(i=0;i<l;i++)
01974                         nonzero[i] = false;
01975                 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
01976 
01977                 double *probA=NULL,*probB=NULL;
01978                 if (param->probability)
01979                 {
01980                         probA=Malloc(double,nr_class*(nr_class-1)/2);
01981                         probB=Malloc(double,nr_class*(nr_class-1)/2);
01982                 }
01983 
01984                 int p = 0;
01985                 for(i=0;i<nr_class;i++)
01986                         for(int j=i+1;j<nr_class;j++)
01987                         {
01988                                 svm_problem sub_prob;
01989                                 int si = start[i], sj = start[j];
01990                                 int ci = count[i], cj = count[j];
01991                                 sub_prob.l = ci+cj;
01992                                 sub_prob.x = Malloc(svm_node *,sub_prob.l);
01993                                 sub_prob.y = Malloc(double,sub_prob.l);
01994                                 int k;
01995                                 for(k=0;k<ci;k++)
01996                                 {
01997                                         sub_prob.x[k] = x[si+k];
01998                                         sub_prob.y[k] = +1;
01999                                 }
02000                                 for(k=0;k<cj;k++)
02001                                 {
02002                                         sub_prob.x[ci+k] = x[sj+k];
02003                                         sub_prob.y[ci+k] = -1;
02004                                 }
02005 
02006                                 if(param->probability)
02007                                         svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02008 
02009                                 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02010                                 for(k=0;k<ci;k++)
02011                                         if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02012                                                 nonzero[si+k] = true;
02013                                 for(k=0;k<cj;k++)
02014                                         if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02015                                                 nonzero[sj+k] = true;
02016                                 free(sub_prob.x);
02017                                 free(sub_prob.y);
02018                                 ++p;
02019                         }
02020 
02021                 // build output
02022 
02023                 model->nr_class = nr_class;
02024                 
02025                 model->label = Malloc(int,nr_class);
02026                 for(i=0;i<nr_class;i++)
02027                         model->label[i] = label[i];
02028                 
02029                 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02030                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02031                         model->rho[i] = f[i].rho;
02032 
02033                 if(param->probability)
02034                 {
02035                         model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02036                         model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02037                         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02038                         {
02039                                 model->probA[i] = probA[i];
02040                                 model->probB[i] = probB[i];
02041                         }
02042                 }
02043                 else
02044                 {
02045                         model->probA=NULL;
02046                         model->probB=NULL;
02047                 }
02048 
02049                 int total_sv = 0;
02050                 int *nz_count = Malloc(int,nr_class);
02051                 model->nSV = Malloc(int,nr_class);
02052                 for(i=0;i<nr_class;i++)
02053                 {
02054                         int nSV = 0;
02055                         for(int j=0;j<count[i];j++)
02056                                 if(nonzero[start[i]+j])
02057                                 {       
02058                                         ++nSV;
02059                                         ++total_sv;
02060                                 }
02061                         model->nSV[i] = nSV;
02062                         nz_count[i] = nSV;
02063                 }
02064                 
02065                 info("Total nSV = %d\n",total_sv);
02066 
02067                 model->l = total_sv;
02068                 model->SV = Malloc(svm_node *,total_sv);
02069                 p = 0;
02070                 for(i=0;i<l;i++)
02071                         if(nonzero[i]) model->SV[p++] = x[i];
02072 
02073                 int *nz_start = Malloc(int,nr_class);
02074                 nz_start[0] = 0;
02075                 for(i=1;i<nr_class;i++)
02076                         nz_start[i] = nz_start[i-1]+nz_count[i-1];
02077 
02078                 model->sv_coef = Malloc(double *,nr_class-1);
02079                 for(i=0;i<nr_class-1;i++)
02080                         model->sv_coef[i] = Malloc(double,total_sv);
02081 
02082                 p = 0;
02083                 for(i=0;i<nr_class;i++)
02084                         for(int j=i+1;j<nr_class;j++)
02085                         {
02086                                 // classifier (i,j): coefficients with
02087                                 // i are in sv_coef[j-1][nz_start[i]...],
02088                                 // j are in sv_coef[i][nz_start[j]...]
02089 
02090                                 int si = start[i];
02091                                 int sj = start[j];
02092                                 int ci = count[i];
02093                                 int cj = count[j];
02094                                 
02095                                 int q = nz_start[i];
02096                                 int k;
02097                                 for(k=0;k<ci;k++)
02098                                         if(nonzero[si+k])
02099                                                 model->sv_coef[j-1][q++] = f[p].alpha[k];
02100                                 q = nz_start[j];
02101                                 for(k=0;k<cj;k++)
02102                                         if(nonzero[sj+k])
02103                                                 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02104                                 ++p;
02105                         }
02106                 
02107                 free(label);
02108                 free(probA);
02109                 free(probB);
02110                 free(count);
02111                 free(index);
02112                 free(start);
02113                 free(x);
02114                 free(weighted_C);
02115                 free(nonzero);
02116                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02117                         free(f[i].alpha);
02118                 free(f);
02119                 free(nz_count);
02120                 free(nz_start);
02121         }
02122         return model;
02123 }
02124 
02125 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02126 {
02127         int i;
02128         int *perm = Malloc(int,prob->l);
02129 
02130         // random shuffle
02131         for(i=0;i<prob->l;i++) perm[i]=i;
02132         for(i=0;i<prob->l;i++)
02133         {
02134                 int j = i+rand()%(prob->l-i);
02135                 std::swap(perm[i],perm[j]);
02136         }
02137         for(i=0;i<nr_fold;i++)
02138         {
02139                 int begin = i*prob->l/nr_fold;
02140                 int end = (i+1)*prob->l/nr_fold;
02141                 int j,k;
02142                 struct svm_problem subprob;
02143 
02144                 subprob.l = prob->l-(end-begin);
02145                 subprob.x = Malloc(struct svm_node*,subprob.l);
02146                 subprob.y = Malloc(double,subprob.l);
02147                         
02148                 k=0;
02149                 for(j=0;j<begin;j++)
02150                 {
02151                         subprob.x[k] = prob->x[perm[j]];
02152                         subprob.y[k] = prob->y[perm[j]];
02153                         ++k;
02154                 }
02155                 for(j=end;j<prob->l;j++)
02156                 {
02157                         subprob.x[k] = prob->x[perm[j]];
02158                         subprob.y[k] = prob->y[perm[j]];
02159                         ++k;
02160                 }
02161                 struct svm_model *submodel = svm_train(&subprob,param);
02162                 if(param->probability && 
02163                    (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02164                 {
02165                         double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02166                         for(j=begin;j<end;j++)
02167                                 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02168                         free(prob_estimates);                   
02169                 }
02170                 else
02171                         for(j=begin;j<end;j++)
02172                                 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02173                 svm_destroy_model(submodel);
02174                 free(subprob.x);
02175                 free(subprob.y);
02176         }               
02177         free(perm);     
02178 }
02179 
02180 int svm_get_svm_type(const svm_model *model)
02181 {
02182         return model->param.svm_type;
02183 }
02184 
02185 int svm_get_nr_class(const svm_model *model)
02186 {
02187         return model->nr_class;
02188 }
02189 
02190 void svm_get_labels(const svm_model *model, int* label)
02191 {
02192         if (model->label != NULL)
02193                 for(int i=0;i<model->nr_class;i++)
02194                         label[i] = model->label[i];
02195 }
02196 
02197 double svm_get_svr_probability(const svm_model *model)
02198 {
02199         if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02200             model->probA!=NULL)
02201                 return model->probA[0];
02202         else
02203         {
02204                 info("Model doesn't contain information for SVR probability inference\n");
02205                 return 0;
02206         }
02207 }
02208 
02209 void svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02210 {
02211         if(model->param.svm_type == ONE_CLASS ||
02212            model->param.svm_type == EPSILON_SVR ||
02213            model->param.svm_type == NU_SVR)
02214         {
02215                 double *sv_coef = model->sv_coef[0];
02216                 double sum = 0;
02217                 for(int i=0;i<model->l;i++)
02218                         sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02219                 sum -= model->rho[0];
02220                 *dec_values = sum;
02221         }
02222         else
02223         {
02224                 int i;
02225                 int nr_class = model->nr_class;
02226                 int l = model->l;
02227                 
02228                 double *kvalue = Malloc(double,l);
02229                 for(i=0;i<l;i++)
02230                         kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02231 
02232                 int *start = Malloc(int,nr_class);
02233                 start[0] = 0;
02234                 for(i=1;i<nr_class;i++)
02235                         start[i] = start[i-1]+model->nSV[i-1];
02236 
02237                 int p=0;
02238                 int pos=0;
02239                 for(i=0;i<nr_class;i++)
02240                         for(int j=i+1;j<nr_class;j++)
02241                         {
02242                                 double sum = 0;
02243                                 int si = start[i];
02244                                 int sj = start[j];
02245                                 int ci = model->nSV[i];
02246                                 int cj = model->nSV[j];
02247                                 
02248                                 int k;
02249                                 double *coef1 = model->sv_coef[j-1];
02250                                 double *coef2 = model->sv_coef[i];
02251                                 for(k=0;k<ci;k++)
02252                                         sum += coef1[si+k] * kvalue[si+k];
02253                                 for(k=0;k<cj;k++)
02254                                         sum += coef2[sj+k] * kvalue[sj+k];
02255                                 sum -= model->rho[p++];
02256                                 dec_values[pos++] = sum;
02257                         }
02258 
02259                 free(kvalue);
02260                 free(start);
02261         }
02262 }
02263 
02264 double svm_predict(const svm_model *model, const svm_node *x)
02265 {
02266         if(model->param.svm_type == ONE_CLASS ||
02267            model->param.svm_type == EPSILON_SVR ||
02268            model->param.svm_type == NU_SVR)
02269         {
02270                 double res;
02271                 svm_predict_values(model, x, &res);
02272                 
02273                 if(model->param.svm_type == ONE_CLASS)
02274                         return (res>0)?1:-1;
02275                 else
02276                         return res;
02277         }
02278         else
02279         {
02280                 int i;
02281                 int nr_class = model->nr_class;
02282                 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02283                 svm_predict_values(model, x, dec_values);
02284 
02285                 int *vote = Malloc(int,nr_class);
02286                 for(i=0;i<nr_class;i++)
02287                         vote[i] = 0;
02288                 int pos=0;
02289                 for(i=0;i<nr_class;i++)
02290                         for(int j=i+1;j<nr_class;j++)
02291                         {
02292                                 if(dec_values[pos++] > 0)
02293                                         ++vote[i];
02294                                 else
02295                                         ++vote[j];
02296                         }
02297 
02298                 int vote_max_idx = 0;
02299                 for(i=1;i<nr_class;i++)
02300                         if(vote[i] > vote[vote_max_idx])
02301                                 vote_max_idx = i;
02302                 free(vote);
02303                 free(dec_values);
02304                 return model->label[vote_max_idx];
02305         }
02306 }
02307 
02308 double svm_predict_probability(
02309         const svm_model *model, const svm_node *x, double *prob_estimates)
02310 {
02311         if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02312             model->probA!=NULL && model->probB!=NULL)
02313         {
02314                 int i;
02315                 int nr_class = model->nr_class;
02316                 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02317                 svm_predict_values(model, x, dec_values);
02318 
02319                 double min_prob=1e-7;
02320                 double **pairwise_prob=Malloc(double *,nr_class);
02321                 for(i=0;i<nr_class;i++)
02322                         pairwise_prob[i]=Malloc(double,nr_class);
02323                 int k=0;
02324                 for(i=0;i<nr_class;i++)
02325                         for(int j=i+1;j<nr_class;j++)
02326                         {
02327                                 pairwise_prob[i][j]=min(std::max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02328                                 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02329                                 k++;
02330                         }
02331                 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02332 
02333                 int prob_max_idx = 0;
02334                 for(i=1;i<nr_class;i++)
02335                         if(prob_estimates[i] > prob_estimates[prob_max_idx])
02336                                 prob_max_idx = i;
02337                 for(i=0;i<nr_class;i++)
02338                         free(pairwise_prob[i]);
02339                 free(dec_values);
02340                 free(pairwise_prob);         
02341                 return model->label[prob_max_idx];
02342         }
02343         else 
02344                 return svm_predict(model, x);
02345 }
02346 
02347 const char *svm_type_table[] =
02348 {
02349         "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02350 };
02351 
02352 const char *kernel_type_table[]=
02353 {
02354         "linear","polynomial","rbf","sigmoid",NULL
02355 };
02356 
02357 int svm_save_model(const char *model_file_name, const svm_model *model)
02358 {
02359         FILE *fp = fopen(model_file_name,"w");
02360         if(fp==NULL) return -1;
02361 
02362         const svm_parameter& param = model->param;
02363 
02364         fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02365         fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02366 
02367         if(param.kernel_type == POLY)
02368                 fprintf(fp,"degree %g\n", param.degree);
02369 
02370         if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02371                 fprintf(fp,"gamma %g\n", param.gamma);
02372 
02373         if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02374                 fprintf(fp,"coef0 %g\n", param.coef0);
02375 
02376         int nr_class = model->nr_class;
02377         int l = model->l;
02378         fprintf(fp, "nr_class %d\n", nr_class);
02379         fprintf(fp, "total_sv %d\n",l);
02380         
02381         {
02382                 fprintf(fp, "rho");
02383                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02384                         fprintf(fp," %g",model->rho[i]);
02385                 fprintf(fp, "\n");
02386         }
02387         
02388         if(model->label)
02389         {
02390                 fprintf(fp, "label");
02391                 for(int i=0;i<nr_class;i++)
02392                         fprintf(fp," %d",model->label[i]);
02393                 fprintf(fp, "\n");
02394         }
02395 
02396         if(model->probA) // regression has probA only
02397         {
02398                 fprintf(fp, "probA");
02399                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02400                         fprintf(fp," %g",model->probA[i]);
02401                 fprintf(fp, "\n");
02402         }
02403         if(model->probB)
02404         {
02405                 fprintf(fp, "probB");
02406                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02407                         fprintf(fp," %g",model->probB[i]);
02408                 fprintf(fp, "\n");
02409         }
02410 
02411         if(model->nSV)
02412         {
02413                 fprintf(fp, "nr_sv");
02414                 for(int i=0;i<nr_class;i++)
02415                         fprintf(fp," %d",model->nSV[i]);
02416                 fprintf(fp, "\n");
02417         }
02418 
02419         fprintf(fp, "SV\n");
02420         const double * const *sv_coef = model->sv_coef;
02421         const svm_node * const *SV = model->SV;
02422 
02423         for(int i=0;i<l;i++)
02424         {
02425                 for(int j=0;j<nr_class-1;j++)
02426                         fprintf(fp, "%.16g ",sv_coef[j][i]);
02427 
02428                 const svm_node *p = SV[i];
02429                 while(p->index != -1)
02430                 {
02431                         fprintf(fp,"%d:%.8g ",p->index,p->value);
02432                         p++;
02433                 }
02434                 fprintf(fp, "\n");
02435         }
02436 
02437         fclose(fp);
02438         return 0;
02439 }
02440 
02441 svm_model *svm_load_model(const char *model_file_name)
02442 {
02443         FILE *fp = fopen(model_file_name,"rb");
02444         if(fp==NULL) return NULL;
02445         
02446         // read parameters
02447 
02448         svm_model *model = Malloc(svm_model,1);
02449         svm_parameter& param = model->param;
02450         model->rho = NULL;
02451         model->probA = NULL;
02452         model->probB = NULL;
02453         model->label = NULL;
02454         model->nSV = NULL;
02455 
02456         char cmd[81];
02457         while(1)
02458         {
02459                 fscanf(fp,"%80s",cmd);
02460 
02461                 if(strcmp(cmd,"svm_type")==0)
02462                 {
02463                         fscanf(fp,"%80s",cmd);
02464                         int i;
02465                         for(i=0;svm_type_table[i];i++)
02466                         {
02467                                 if(strcmp(svm_type_table[i],cmd)==0)
02468                                 {
02469                                         param.svm_type=i;
02470                                         break;
02471                                 }
02472                         }
02473                         if(svm_type_table[i] == NULL)
02474                         {
02475                                 fprintf(stderr,"unknown svm type.\n");
02476                                 free(model->rho);
02477                                 free(model->label);
02478                                 free(model->nSV);
02479                                 free(model);
02480                                 return NULL;
02481                         }
02482                 }
02483                 else if(strcmp(cmd,"kernel_type")==0)
02484                 {               
02485                         fscanf(fp,"%80s",cmd);
02486                         int i;
02487                         for(i=0;kernel_type_table[i];i++)
02488                         {
02489                                 if(strcmp(kernel_type_table[i],cmd)==0)
02490                                 {
02491                                         param.kernel_type=i;
02492                                         break;
02493                                 }
02494                         }
02495                         if(kernel_type_table[i] == NULL)
02496                         {
02497                                 fprintf(stderr,"unknown kernel function.\n");
02498                                 free(model->rho);
02499                                 free(model->label);
02500                                 free(model->nSV);
02501                                 free(model);
02502                                 return NULL;
02503                         }
02504                 }
02505                 else if(strcmp(cmd,"degree")==0)
02506                         fscanf(fp,"%lf",&param.degree);
02507                 else if(strcmp(cmd,"gamma")==0)
02508                         fscanf(fp,"%lf",&param.gamma);
02509                 else if(strcmp(cmd,"coef0")==0)
02510                         fscanf(fp,"%lf",&param.coef0);
02511                 else if(strcmp(cmd,"nr_class")==0)
02512                         fscanf(fp,"%d",&model->nr_class);
02513                 else if(strcmp(cmd,"total_sv")==0)
02514                         fscanf(fp,"%d",&model->l);
02515                 else if(strcmp(cmd,"rho")==0)
02516                 {
02517                         int n = model->nr_class * (model->nr_class-1)/2;
02518                         model->rho = Malloc(double,n);
02519                         for(int i=0;i<n;i++)
02520                                 fscanf(fp,"%lf",&model->rho[i]);
02521                 }
02522                 else if(strcmp(cmd,"label")==0)
02523                 {
02524                         int n = model->nr_class;
02525                         model->label = Malloc(int,n);
02526                         for(int i=0;i<n;i++)
02527                                 fscanf(fp,"%d",&model->label[i]);
02528                 }
02529                 else if(strcmp(cmd,"probA")==0)
02530                 {
02531                         int n = model->nr_class * (model->nr_class-1)/2;
02532                         model->probA = Malloc(double,n);
02533                         for(int i=0;i<n;i++)
02534                                 fscanf(fp,"%lf",&model->probA[i]);
02535                 }
02536                 else if(strcmp(cmd,"probB")==0)
02537                 {
02538                         int n = model->nr_class * (model->nr_class-1)/2;
02539                         model->probB = Malloc(double,n);
02540                         for(int i=0;i<n;i++)
02541                                 fscanf(fp,"%lf",&model->probB[i]);
02542                 }
02543                 else if(strcmp(cmd,"nr_sv")==0)
02544                 {
02545                         int n = model->nr_class;
02546                         model->nSV = Malloc(int,n);
02547                         for(int i=0;i<n;i++)
02548                                 fscanf(fp,"%d",&model->nSV[i]);
02549                 }
02550                 else if(strcmp(cmd,"SV")==0)
02551                 {
02552                         while(1)
02553                         {
02554                                 int c = getc(fp);
02555                                 if(c==EOF || c=='\n') break;    
02556                         }
02557                         break;
02558                 }
02559                 else
02560                 {
02561                         fprintf(stderr,"unknown text in model file\n");
02562                         free(model->rho);
02563                         free(model->label);
02564                         free(model->nSV);
02565                         free(model);
02566                         return NULL;
02567                 }
02568         }
02569 
02570         // read sv_coef and SV
02571 
02572         int elements = 0;
02573         long pos = ftell(fp);
02574 
02575         while(1)
02576         {
02577                 int c = fgetc(fp);
02578                 switch(c)
02579                 {
02580                         case '\n':
02581                                 // count the '-1' element
02582                         case ':':
02583                                 ++elements;
02584                                 break;
02585                         case EOF:
02586                                 goto out;
02587                         default:
02588                                 ;
02589                 }
02590         }
02591 out:
02592         fseek(fp,pos,SEEK_SET);
02593 
02594         int m = model->nr_class - 1;
02595         int l = model->l;
02596         model->sv_coef = Malloc(double *,m);
02597         int i;
02598         for(i=0;i<m;i++)
02599                 model->sv_coef[i] = Malloc(double,l);
02600         model->SV = Malloc(svm_node*,l);
02601         svm_node *x_space = Malloc(svm_node,elements);
02602 
02603         int j=0;
02604         for(i=0;i<l;i++)
02605         {
02606                 model->SV[i] = &x_space[j];
02607                 for(int k=0;k<m;k++)
02608                         fscanf(fp,"%lf",&model->sv_coef[k][i]);
02609                 while(1)
02610                 {
02611                         int c;
02612                         do {
02613                                 c = getc(fp);
02614                                 if(c=='\n') goto out2;
02615                         } while(isspace(c));
02616                         ungetc(c,fp);
02617                         fscanf(fp,"%d:%lf",&(x_space[j].index),&(x_space[j].value));
02618                         ++j;
02619                 }       
02620 out2:
02621                 x_space[j++].index = -1;
02622         }
02623 
02624         fclose(fp);
02625 
02626         model->free_sv = 1;     // XXX
02627         return model;
02628 }
02629 
02630 void svm_destroy_model(svm_model* model)
02631 {
02632         if(model->free_sv)
02633                 free((void *)(model->SV[0]));
02634         for(int i=0;i<model->nr_class-1;i++)
02635                 free(model->sv_coef[i]);
02636         free(model->SV);
02637         free(model->sv_coef);
02638         free(model->rho);
02639         free(model->label);
02640         free(model->probA);
02641         free(model->probB);
02642         free(model->nSV);
02643         free(model);
02644 }
02645 
02646 void svm_destroy_param(svm_parameter* param)
02647 {
02648         free(param->weight_label);
02649         free(param->weight);
02650 }
02651 
02652 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
02653 {
02654         // svm_type
02655 
02656         int svm_type = param->svm_type;
02657         if(svm_type != C_SVC &&
02658            svm_type != NU_SVC &&
02659            svm_type != ONE_CLASS &&
02660            svm_type != EPSILON_SVR &&
02661            svm_type != NU_SVR)
02662                 return "unknown svm type";
02663         
02664         // kernel_type
02665         
02666         int kernel_type = param->kernel_type;
02667         if(kernel_type != SVM_LINEAR &&
02668            kernel_type != POLY &&
02669            kernel_type != RBF &&
02670            kernel_type != SIGMOID)
02671                 return "unknown kernel type";
02672 
02673         // cache_size,eps,C,nu,p,shrinking
02674 
02675         if(param->cache_size <= 0)
02676                 return "cache_size <= 0";
02677 
02678         if(param->eps <= 0)
02679                 return "eps <= 0";
02680 
02681         if(svm_type == C_SVC ||
02682            svm_type == EPSILON_SVR ||
02683            svm_type == NU_SVR)
02684                 if(param->C <= 0)
02685                         return "C <= 0";
02686 
02687         if(svm_type == NU_SVC ||
02688            svm_type == ONE_CLASS ||
02689            svm_type == NU_SVR)
02690                 if(param->nu < 0 || param->nu > 1)
02691                         return "nu < 0 or nu > 1";
02692 
02693         if(svm_type == EPSILON_SVR)
02694                 if(param->p < 0)
02695                         return "p < 0";
02696 
02697         if(param->shrinking != 0 &&
02698            param->shrinking != 1)
02699                 return "shrinking != 0 and shrinking != 1";
02700 
02701         if(param->probability != 0 &&
02702            param->probability != 1)
02703                 return "probability != 0 and probability != 1";
02704 
02705         if(param->probability == 1 &&
02706            svm_type == ONE_CLASS)
02707                 return "one-class SVM probability output not supported yet";
02708 
02709 
02710         // check whether nu-svc is feasible
02711         
02712         if(svm_type == NU_SVC)
02713         {
02714                 int l = prob->l;
02715                 int max_nr_class = 16;
02716                 int nr_class = 0;
02717                 int *label = Malloc(int,max_nr_class);
02718                 int *count = Malloc(int,max_nr_class);
02719 
02720                 int i;
02721                 for(i=0;i<l;i++)
02722                 {
02723                         int this_label = (int)prob->y[i];
02724                         int j;
02725                         for(j=0;j<nr_class;j++)
02726                                 if(this_label == label[j])
02727                                 {
02728                                         ++count[j];
02729                                         break;
02730                                 }
02731                         if(j == nr_class)
02732                         {
02733                                 if(nr_class == max_nr_class)
02734                                 {
02735                                         max_nr_class *= 2;
02736                                         label = (int *)realloc(label,max_nr_class*sizeof(int));
02737                                         count = (int *)realloc(count,max_nr_class*sizeof(int));
02738                                 }
02739                                 label[nr_class] = this_label;
02740                                 count[nr_class] = 1;
02741                                 ++nr_class;
02742                         }
02743                 }
02744         
02745                 for(i=0;i<nr_class;i++)
02746                 {
02747                         int n1 = count[i];
02748                         for(int j=i+1;j<nr_class;j++)
02749                         {
02750                                 int n2 = count[j];
02751                                 if(param->nu*(n1+n2)/2 > min(n1,n2))
02752                                 {
02753                                         free(label);
02754                                         free(count);
02755                                         return "specified nu is infeasible";
02756                                 }
02757                         }
02758                 }
02759                 free(label);
02760                 free(count);
02761         }
02762 
02763         return NULL;
02764 }
02765 
02766 int svm_check_probability_model(const svm_model *model)
02767 {
02768         return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02769                 model->probA!=NULL && model->probB!=NULL) ||
02770                 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02771                  model->probA!=NULL);
02772 }
svm.cpp