% Active classification of handwritten digits % 18th Machine Learning Summer School % % Author: Ruben Martinez-Cantin % clear all, close all seed = 43; randn('seed',seed), rand('seed',seed) load mnist_0to2; % Select training, pool and testing sets % We assume binary classification digit0 = +1 / others = -1 trainset = [train0(1:100,:); train1(1:50,:); train2(1:50,:)]; ytr = [ones(100,1); -ones(100,1)]; poolset = [train0(101:980,:); train1(51:550,:); train2(51:550,:)]; ypo = [ones(880,1); -ones(1000,1)]; testset = [test0(1:980,:); test1(1:500,:); test2(1:500,:)]; yte = [ones(980,1); -ones(1000,1)]; % MATLAB only knows how to do operations with type "double" but the data is % stored as "uint8". This transforms uint8 -> double xtrain = mat2gray(trainset); xpool = mat2gray(poolset); xtest = mat2gray(testset); % Dimensionality reduction using PCA pcaset = mat2gray([train0; train1; train2]); nPCAComp = 16; [~,v,m] = reduceusingpca(pcaset,nPCAComp); xtr = project2pca(xtrain,v,m); xpo = project2pca(xpool,v,m); xte = project2pca(xtest,v,m); % Set up the GP cov = {@covSEiso}; sf = 1; ell = 0.7; hyp0.cov = log([ell;sf]); me = {@meanConst}; hyp0.mean = log([0.5]); lik = 'likLogistic'; inf = 'infLaplace'; % Classify Ncg = 50; % number of conjugate gradient steps hyp = minimize(hyp0,'gp', -Ncg, inf, me, cov, lik, xtr, ytr); % opt hypers % Choose k-best candidates Npool = 100; results = zeros(10,1); for ii = 1:10 j = randperm(size(xpo,1)); xnew = [xtr;xpo(j(1:Npool),:)]; ynew = [ytr;ypo(j(1:Npool),:)]; hyp = minimize(hyp0,'gp', -Ncg, inf, me, cov, lik, xnew, ynew); % opt hypers [ymu, ys2] = gp(hyp, inf, me, cov, lik, xnew, ynew, xte); % predict y = 2*[ymu>0]-1; results(ii) = sum(y==yte)/length(yte)*100; fprintf(1,'Passive classification results\n'); fprintf(1,'%2.2f %% success rate\n',results(ii)); end; fprintf(1,'Average passive classification results\n'); fprintf(1,'%2.2f %% success rate\n',mean(results)); % Active version [ymu, ys2] = gp(hyp, inf, me, cov, lik, xtr, ytr, xpo); % predict crit = abs(ymu) ./ sqrt(ys2); [~,i] = sort(crit); xnew = [xtr;xpo(i(1:Npool),:)]; ynew = [ytr;ypo(i(1:Npool),:)]; hyp = minimize(hyp0,'gp', -Ncg, inf, me, cov, lik, xnew, ynew); % opt hypers [ymu, ys2] = gp(hyp, inf, me, cov, lik, xnew, ynew, xte); % predict y = 2*[ymu>0]-1; fprintf(1,'Active classification results\n'); fprintf(1,'%2.2f %% success rate\n',sum(y==yte)/length(yte)*100);