RでKLダイバージェンス(仮)
0を除かないと発散するので、除くのがポイント
for(i in 1:length(list1)){
list_name<-list1[i]
v1<-qb[[list_name]]
a1<-v1[qb$y==1]
a2<-v1[qb$y==0]
aa1<-table(a1)
aa2<-table(a2)
ll<-unique(c(names(aa1),names(aa2)))
aaa1<-NULL
aaa2<-NULL
for(i in 1:length(ll)){
aaa1[i]<-aa1[ll[i]]
aaa2[i]<-aa2[ll[i]]
}
aaa1<-as.vector(aaa1)
aaa2<-as.vector(aaa2)
aaa1[is.na(aaa1)]<-0
aaa2[is.na(aaa2)]<-0
#aaa2[is.na(aaa2)]<-0
#m<-matrix(c(aaa1,aaa2),nrow=2)
prob<-(aaa1+aaa2)/sum(aaa1+aaa2)
aaa1<-aaa1[which(prob!=0)]
aaa2<-aaa2[which(prob!=0)]
prob<-prob[prob!=0]
entropy1<--sum(log((aaa1^(aaa1/sum(aaa1)))/sum(aaa1)))
entropy2<--sum(log((aaa2^(aaa2/sum(aaa2)))/sum(aaa2)))
aaa2b<-aaa2[(aaa1==0 & aaa2==0 )| (aaa1!=0)]
aaa1b<-aaa1[(aaa1==0 & aaa2==0) | (aaa1!=0)]
v1<-log(((aaa2b/sum(aaa2b))^(aaa2b/sum(aaa2b))))
v2<-log(((aaa1b/sum(aaa1b))^(aaa2b/sum(aaa2b))))
info1<- sum(v1-v2)
info2<-KL.empirical(aaa2b,aaa1b)
#chisq.test()
cat(list_name,"\n")
str<-chisq.test(aaa1,p=prob)
cat(str$p.value,"\n")
cat("ent1",entropy1,"\n")
cat("ent2",entropy2,"\n")
cat("info1",info1,"\n")
cat("info2",info2,"\n")