Two classes:
Assumption:
Hypothesis design for feature selection in classification problem:
Feature with higher difference between class means has greater class separability, hence better feature.
N=200
w1<-cbind(rnorm(N,mean=5,sd=1),rnorm(N,mean=4,sd=1))
w2<-cbind(rnorm(N,mean=15,sd=1),rnorm(N,mean=6,sd=1))
X<-rbind(w1,w2)
cat(" Glimpse of Dataset X: \n")
print(X[1:5,])
cat("\n Dimension of Dataset: \t Samples:",dim(X)[1],"\t Features:",dim(X)[2])
true=c(rep(1,N),rep(2,N))
colvec = c("coral3","darkseagreen3")[true]
pchs= c(22,24)[true]
plot(X,col="black",bg=colvec,pch=pchs,xlab="Feature1",ylab="Feature2",main="Scatter plot of Data")
where
According to null-hypothesis ,
follows -distributution with degrees of freedom.
f=X[,1]
fw1=f[1:N]
fw2=f[(N+1):(2*N)]
xbar=mean(fw1)
ybar=mean(fw2)
s=sqrt(sum((fw1-xbar)^2)+sum((fw2-ybar)^2)/(2*N-2))
q=(xbar-ybar)/(s*sqrt(2/N))
cat("\n xbar=",xbar," ybar=",ybar)
cat("\n\n Test statistic q= ",q)
cat("\n q follows t- distribution with ",2*N-2," degrees of freedom")
Compute value
If p-value < 0.05: Feature has signficant difference between the class means
If p-value 0.05: Difference between class means along the feature is not significant
p.val=pt(q,2*N-2)
cat("\n p-value for hypothesis test on Feature 1= ",p.val)
alpha=0.05
if(p.val< alpha){
cat("\n Feature is relevant")
}else
cat("\n Feature is not relevant, lacks class separability")
f=X[,2]
fw1=f[1:N]
fw2=f[(N+1):(2*N)]
xbar=mean(fw1)
ybar=mean(fw2)
s=sqrt(sum((fw1-xbar)^2)+sum((fw2-ybar)^2)/(2*N-2))
q=(xbar-ybar)/(s*sqrt(2/N))
cat("\n xbar=",xbar," ybar=",ybar)
cat("\n\n Test statistic q= ",q)
p.val=pt(q,2*N-2)
cat("\n p-value for hypothesis test on Feature 2= ",p.val)
if(p.val< alpha){
cat("\n Feature is relevant")
}else
cat("\n Feature is not relevant, lacks class separability")
N=200
w1<-cbind(rnorm(N,mean=4,sd=1),rnorm(N,mean=7,sd=1),rnorm(N,mean=10,sd=1),rnorm(N,mean=-3.5,sd=1))
w2<-cbind(rnorm(N,mean=7,sd=1),rnorm(N,mean=16,sd=1),rnorm(N,mean=12,sd=1),rnorm(N,mean=2,sd=1))
X<-rbind(w1,w2)
colnames(X) <- c("Feature1","Feature2","Feature3","Feature4")
cat(" Glimpse of Dataset X: \n")
print(X[1:5,])
cat("\n Dimension of Dataset: \t Samples:",dim(X)[1],"\t Features:",dim(X)[2])
true=c(rep(1,N),rep(2,N))
colvec = c("deepskyblue3","orange2")[true]
pchs= c(22,24)[true]
pairs(X, col=colvec, pch=pchs)
d=ncol(X)
q.val=rep(0,d)
for(i in 1:d)
{
f=X[,i]
fw1=f[1:N]
fw2=f[(N+1):(2*N)]
xbar=mean(fw1)
ybar=mean(fw2)
s=sqrt(sum((fw1-xbar)^2)+sum((fw2-ybar)^2)/(2*N-2))
q=(xbar-ybar)/(s*sqrt(2/N))
p.val=pt(q,2*N-2)
cat("\n\n Test statistic q for Feature ",i," = ",q,"p-value=",p.val)
q.val[i]=q
}
ord=sort(abs(q.val),decreasing=TRUE,index.return=TRUE)$ix
cat("\n\n Ordering of features based on t-Test=",ord)
cat("IRIS dataset\n")
print(iris[1:5,])
X<-iris[,-5]
class=as.numeric(iris$Species)
cat("\n Samples: ",dim(X)[1],"\t Features: ",dim(X)[2],"\t Classes: 3:- ",paste0(levels(iris$Species),collapse=", "))
colvec = c("coral3","darkseagreen3","darkgoldenrod2")[class]
pchs= c(22,23,24)[class]
pairs(X, col=colvec, pch=pchs)
nclass=length(unique(class))
FDR=rep(0,ncol(X))
for(d in 1:ncol(X))
{
FDRd=0
f=X[,d]
for(i in 1:nclass)
{
for(j in 1:nclass)
{
if(i!=j)
{
fi=f[which(class==i)]
fj=f[which(class==j)]
FDRd=FDRd+((mean(fi)-mean(fj))^2)/(var(fi)+var(fj))
}
}
}
cat("\n FDR for Feature ",d," = ",FDRd)
FDR[d]=FDRd
}
ord=sort(abs(FDR),decreasing=TRUE,index.return=TRUE)$ix
cat("\n\n Ordering of features based on FDR: \n",paste0(colnames(X)[ord],collapse=", "))
N=250
w1<-cbind(rnorm(N,mean=5,sd=1.5),rnorm(N,mean=4,sd=1.2))
w2<-cbind(rnorm(N,mean=9,sd=1.3),rnorm(N,mean=8,sd=1.8))
w3<-cbind(rnorm(N,mean=15,sd=1),rnorm(N,mean=1,sd=1))
X<-rbind(w1,w2,w3)
cat(" Glimpse of Dataset X: \n")
print(X[1:5,])
cat("\n Dimension of Dataset: \t Samples:",dim(X)[1],"\t Features:",dim(X)[2])
true=c(rep(1,N),rep(2,N),rep(3,N))
colvec = c("lightpink2","turquoise2","darkolivegreen")[true]
pchs= c(22,24,21)[true]
plot(X,col="black",bg=colvec,pch=pchs,xlab="Feature1",ylab="Feature2",main="Scatter plot of Data")
classes:
Class-wise prior probability:
Class specific covariance matrix:
Global mean vector:
N=nrow(X)
nclass=length(unique(class))
mu0<-colMeans(X)
S<-list()
Mu<-list()
P<-list()
for(i in 1:nclass)
{
wi=X[which(class==i),]
Ni=nrow(wi)
P[[i]]<-Ni/N
Mu[[i]]<-colMeans(wi)
S[[i]]<-cov(wi)
cat("\n\n Class ",i,": ")
cat("\n Prior P: ",P[[i]])
cat("\n Mean Mu: ",Mu[[i]])
cat("\n Covariance Matrix: \n")
print(S[[i]])
}
Sw=0
Sb=0
for(i in 1:nclass)
{
mui0=Mu[[i]]-mu0
Sw=Sw+ P[[i]]*S[[i]]
Sb=Sb+ P[[i]]*outer(mui0,mui0)
}
Sm=Sw+Sb
cat("\n\n Within-class scatter matrix: \n")
print(Sw)
cat("\n\n Between-class scatter matrix: \n")
print(Sb)
cat("\n\n Mixture scatter matrix: \n")
print(Sm)
denotes determinant of matrix .
J1=sum(diag(Sm))/sum(diag(Sw))
J2=det(solve(Sw)%*%Sm)
J3=sum(diag(solve(Sw)%*%Sm))
cat("\n J1= ",J1,"\n J2= ",J2,"\n J3= ",J3)