Does variance maximization always work?
N=250
Ca<-cbind(rnorm(N,mean=5,sd=6.85),rnorm(N,mean=8,sd=0.3))
Cb<-cbind(rnorm(N,mean=11,sd=7.5),rnorm(N,mean=2,sd=0.6))
X<-rbind(Ca,Cb)
Theta=0.26
R=matrix(c(cos(Theta),-sin(Theta),sin(Theta),cos(Theta)),2,2)
X=X%*%R
cat(" Glimpse of Dataset X: \n")
print(X[1:5,])
cat("\n Dimension of Dataset: \t Samples:",dim(X)[1],"\t Features:",dim(X)[2])
class=c(rep(1,N),rep(2,N))
colvec = c("darkseagreen3","coral3")[class]
pchs= c(24,22)[class]
plot(X,col="black",bg=colvec,pch=pchs,xlab="Feature1",ylab="Feature2",main="Scatter plot of Data")
pc=prcomp(X, center=TRUE, scale=FALSE, retx=TRUE)
cat("\n Directions/Eigenvectors: \n")
print(pc$rotation)
cat("\n Principal Components: \n")
print(pc$x[1:5,])
cat("\n Variance along principal components: ")
cat(pc$sdev^2)
xm=min(X[,1])
ym=min(X[,2])
ProjF1=cbind(pc$x[,1],rep(ym,nrow(X)))
plot(X,col="black",bg=colvec,pch=pchs,xlab="Feature1",ylab="Feature2",main="Projection of Data along Feature1")
points(ProjF1,col="black",bg=colvec,pch=pchs,xlab="",ylab="",)
Project on a lower dimension which is a linear combination of original features as well as contains class information.
LDA Objective
Find direction a linear combination of original features that maximizes class separability of the projected data
Projected of sample :
Let
Maximize Fisher Discriminant Ratio (FDR) on
LDA Objective
Solution for in given by the eigenvector of corresponding largest eigenvalue.
N=nrow(X)
nclass=length(unique(class))
mu0<-colMeans(X)
S<-list()
Mu<-list()
Sw=0
for(i in 1:nclass)
{
wi=X[which(class==i),]
Ni=nrow(wi)
Mu[[i]]<-colMeans(wi)
S[[i]]<-cov(wi)
cat("\n\n Class ",i,": ")
cat("\n Mean Mu: ",Mu[[i]])
cat("\n Covariance Matrix: \n")
print(S[[i]])
Sw=Sw+S[[i]]
}
MuDiff=Mu[[1]]-Mu[[2]]
Sb=outer(MuDiff,MuDiff)
cat("\n Within cluster scatter Sw: \n")
print(Sw)
cat("\n Between cluster scatter Sb: \n")
print(Sb)
J=solve(Sw)%*%Sb
eigJ=eigen(J)
cat(" Eigenvalues of J: ",eigJ$values)
cat("\n Eigenvectors of J: \n\n")
print(eigJ$vectors)
v=eigJ$vectors[,1,drop=FALSE]
cat("\n LDA direction v= \n")
print(v)
Eigenvalues of
where
Rank of is 1 (outer product of two vectors).
Rank of is also atmost 1, so atmost 1 eigenvalue of will be non-zero.
Between-class scatter matrix
Additional constraint on mean :
Rank of is atmost , atmost eigenvalues of will be non-zero.
First LDA component
y=X%*%v
cat("\n LDA projection y= \n ")
cat(y[1:8],sep="\n")
pc1Mat=cbind(y,rep(0,nrow(X)))
plot(pc1Mat,col="black",bg=colvec,pch=pchs,ylab="",xlab="",main="LDA projection")
cat("IRIS dataset\n")
head(iris)
X<-iris[,-5]
class=as.numeric(iris$Species)
cat("\n Samples: ",dim(X)[1],"\t Features: ",dim(X)[2],"\t Classes: ",levels(iris$Species))
colvec = c("coral3","darkseagreen3","darkgoldenrod2")[class]
pchs= c(22,23,24)[class]
pairs(X, col=colvec, pch=pchs)
N=nrow(X)
nclass=length(unique(class))
mu0<-colMeans(X)
cat("\n Global mean: ", mu0)
S<-list()
Mu<-list()
P<-list()
for(i in 1:nclass)
{
wi=X[which(class==i),]
Ni=nrow(wi)
P[[i]]<-Ni/N
Mu[[i]]<-colMeans(wi)
S[[i]]<-cov(wi)
cat("\n\n Class ",i,": ")
cat("\n Prior P: ",P[[i]])
cat("\n Mean Mu: ",Mu[[i]])
cat("\n Covariance Matrix: \n")
print(S[[i]])
}
Sw=0
Sb=0
for(i in 1:nclass)
{
mui0=Mu[[i]]-mu0
Sw=Sw+ P[[i]]*S[[i]]
Sb=Sb+ P[[i]]*outer(mui0,mui0)
}
J=solve(Sw)%*%Sb
eigJ=eigen(J)
cat(" Eigenvalues of J: ",eigJ$values)
cat("\n Eigenvectors of J: \n\n")
print(eigJ$vectors)
v=eigJ$vectors[,1:2,drop=FALSE]
cat("\n dim X= ",dim(X)," dim v= ",dim(v))
y=as.matrix(X)%*%v
plot(y,col="black",bg=colvec,pch=pchs,xlab="LDA component 1",ylab="LDA component 2",main="LDA projection")
library(mlbench)
data(Ionosphere)
Dataset<-Ionosphere
cat("\n Predict the onset of diabetes in female Pima Indians from medical record data.")
cat("\n Dimension of dataset: ",dim(Dataset))
cat("\n Classes: ",levels(Dataset$Class))
head(Dataset)
class=as.numeric(Dataset$Class)
X<-Dataset[,-c(1,2,ncol(Dataset))]
X<-as.matrix(as.data.frame(lapply(X, as.numeric)))
colvec = c("cyan3","plum3")[class]
pchs= c(22,24)[class]
pairs(X[,1:4], col=colvec, pch=pchs)
show=5
N=nrow(X)
nclass=length(unique(class))
mu0<-colMeans(X)
cat("\n Global mean: ", mu0[1:show])
S<-list()
Mu<-list()
P<-list()
for(i in 1:nclass)
{
wi=X[which(class==i),]
Ni=nrow(wi)
P[[i]]<-Ni/N
Mu[[i]]<-colMeans(wi)
S[[i]]<-cov(wi)
cat("\n\n Class ",i,": ")
cat("\n Prior P: ",P[[i]])
cat("\n Mean Mu: ",Mu[[i]][1:show])
cat("\n Covariance Matrix: \n")
print(S[[i]][1:show,1:show])
}
Sw=0
Sb=0
for(i in 1:nclass)
{
mui0=Mu[[i]]-mu0
Sw=Sw+ P[[i]]*S[[i]]
Sb=Sb+ P[[i]]*outer(mui0,mui0)
}
J=solve(Sw)%*%Sb
eigJ=eigen(J)
cat(" Eigenvalues of J: \n",eigJ$values[1:show])
cat("\n Eigenvectors of J: \n\n")
print(eigJ$vectors[1:show,1:show])
v=eigJ$vectors[,1:2,drop=FALSE]
cat("\n dim X= ",dim(X)," dim v= ",dim(v))
y=as.matrix(X)%*%v
plot(y,col="black",bg=colvec,pch=pchs,xlab="LDA component 1",ylab="LDA component 2",main="LDA projection")
J=Sb
eigJ=eigen(J)
cat(" Eigenvalues of J: \n",eigJ$values[1:show])
cat("\n Eigenvectors of J: \n\n")
print(eigJ$vectors[1:show,1:show])
v=eigJ$vectors[,1:2,drop=FALSE]
cat("\n dim X= ",dim(X)," dim v= ",dim(v))
y=as.matrix(X)%*%v
plot(y,col="black",bg=colvec,pch=pchs,xlab="LDA component 1",ylab="LDA component 2",main="LDA projection")