function sele = seleSIM(x, y); % This is a Matlab-code for variable selection in the Single-Index model: % y = g(x \theta) + e % % input x: nxp % x: nx1 % output sele: candinal of selected variables in x % % Example: % n = 50; % x = randn(n, 10); % beta = [1 0 -1 0 1 0 1 0 -1 0]'; % y = (x*beta).^2 + randn(n,1); % sele = DCVsim(x,y) % % The correct output should be [1 3 5 7 9] indicating variables at % those positions, i.e. x1, x3, x5, x7, x9 are selected. [n,p] = size(x); x = x - repmat(mean(x),n,1); x = x./repmat(std(x),n,1); D = p; y = y/std(y); if p == 1 if mean((y-mean(y)).^2) < cvm(x,y) sele = []; else sele = [1]; end return end SS = 1:p; xx = x(:,SS); xx = xx./repmat(std(xx), n, 1); sigma = inv(xx'*xx/n)^0.5; xx = xx*sigma; cv0a = 1.0e20; for id = 1:1 [B, cv] = estSIM(xx, y, mean(std(xx))/n^(1/(id+4)), id); if cv < cv0a id0 = id; cv0a = cv; B0 = sigma*B; else break end end D = id0; while length(SS)>1; xx = x(:,SS)*B0; sigma = inv(xx'*xx/n)^0.5; xx = xx*sigma; caa = cvm(xx, y); v = sqrt(mean(abs(B0).^2, 2)); vsort = sort(v); ps = min(3,length(SS)); nd = 0; for id = 1:ps di = min(find(v==vsort(id))); si = find(abs( (1:length(SS))-di)>0); Sis = SS(si); Sid = SS(di); cv0a = 1.0e20; xx = x(:,Sis); sigma = inv(xx'*xx/n)^0.5; xx = xx*sigma; if length(Sis) > 1 if D >= length(Sis) D = length(Sis); B0 = sigma*eye(D); else iid = D; B = estSIM(xx, y, mean(std(xx))/n^(1/(iid+4)), iid); id0 = iid; B0 = sigma*B; end else B0 = 1; end xx = x(:,Sis)*B0; sigma = inv(xx'*xx/n)^0.5; xx = xx*sigma; ca = cvm(xx,y); if ca < caa SS = Sis; D = size(B0,2); break end idc = min(D, length(Sis)); if length(Sis)>=1 z = x(:,Sid); xx = x(:,Sis); z = z - xx*(inv(xx'*xx+eye(length(Sis))/n)*xx'*z); z = z/std(z); sigma = inv(xx'*xx/n)^0.5; xx = xx*sigma; Bi = estSIM1(xx, z, y, mean(std(xx))/n^(1/(idc+4+1)), idc); xx = [xx*Bi z]; sigma = inv(xx'*xx/n)^0.5; xx = xx*sigma; cc = cvm(xx, y); end if ca < cc SS = Sis; D = size(B0,2); cv0a = 1.0e20; xx = x(:,SS); sigma = inv(xx'*xx/n)^0.5; xx = xx*sigma; [B, cv] = estSIM(xx, y, mean(std(xx))/n^(1/(id+4)), id); if cv < cv0a id0 = id; cv0a = cv; B0 = sigma*B; else break end D = id0; break end nd = nd + 1; end if nd == ps break end end sele = SS; %%%%%%%%%%%%%%%%%% function e = cvm(x, y) [n, p] = size(x); h = mean(std(x))/n^(1/(p+4))+0.001; onen = ones(n,1); err = 1.0e20; h2 = 2*h*h; for j = 1:n; xij = x - repmat(x(j,:),n,1); ker = exp(-sum(xij.*xij, 2)/h2); ker(j) = 0; f(j,:) = ker'*y/(sum(ker) + 1.0e-10); end e = abs(y-f(:,1)); e = mean(e); %%%%%%%%%%%%%%%%% function [B, cv] = estSIM(xx, y, h, nd) [n,p] = size(xx); [ny, py] = size(y); onep = ones(p,1); onen = ones(n,1); B = eye(p); eyep = eye(p+1)/n^3; ab = ones(p,n); niter = max(p, 25); step = ceil(2*p/niter); h2 = 2*h*h; for iter = 1:niter; for i = 1:n; xi = xx - repmat(xx(i,:),n,1); kernel = exp(-sum((xi*B).^2,2)/h2 ); onexi = [xi onen]; xk = onexi.*repmat(kernel, 1, p+1); abi = inv(xk'*onexi+eyep)*(xk'*y); ab(:,i) = abi(1:p); end; [B0 D] = eig(ab*ab'); [D I] = sort(diag(D)); B = B0(:,I); B = B(:,p:p); end; xB = xx*B; ye = y; for i = 1:n; xi = xB - repmat(xB(i,:), n,1); kx = exp(-sum(xi.^2,2)/h2); kx(i) = 0; ye(i) = kx'*y/(sum(kx)+1/n); end cv = mean(abs(y-ye)); %%%%%%%%%%%%%%%%% function [B, cv] = estSIM1(xx, z, y, h, nd) [n,p] = size(xx); [ny, py] = size(y); onep = ones(p,1); onen = ones(n,1); B = eye(p); eyep = eye(p+1)/n^3; ab = ones(p,n); niter = max(p, 25); step = ceil(2*p/niter); h2 = 2*h*h; for iter = 1:niter; for i = 1:n; xi = xx - repmat(xx(i,:),n,1); kernel = exp(-(sum((xi*B).^2,2)+(z-z(i)).^2)/h2 ); onexi = [xi onen]; xk = onexi.*repmat(kernel, 1, p+1); abi = inv(xk'*onexi+eyep)*(xk'*y); ab(:,i) = abi(1:p); end; [B0 D] = eig(ab*ab'); [D I] = sort(diag(D)); B = B0(:,I); B = B(:,p:p); end; cv = 0;