RでKLダイバージェンス(仮)
0を除かないと発散するので、除くのがポイント
for(i in 1:length(list1)){ list_name<-list1[i] v1<-qb[[list_name]] a1<-v1[qb$y==1] a2<-v1[qb$y==0] aa1<-table(a1) aa2<-table(a2) ll<-unique(c(names(aa1),names(aa2))) aaa1<-NULL aaa2<-NULL for(i in 1:length(ll)){ aaa1[i]<-aa1[ll[i]] aaa2[i]<-aa2[ll[i]] } aaa1<-as.vector(aaa1) aaa2<-as.vector(aaa2) aaa1[is.na(aaa1)]<-0 aaa2[is.na(aaa2)]<-0 #aaa2[is.na(aaa2)]<-0 #m<-matrix(c(aaa1,aaa2),nrow=2) prob<-(aaa1+aaa2)/sum(aaa1+aaa2) aaa1<-aaa1[which(prob!=0)] aaa2<-aaa2[which(prob!=0)] prob<-prob[prob!=0] entropy1<--sum(log((aaa1^(aaa1/sum(aaa1)))/sum(aaa1))) entropy2<--sum(log((aaa2^(aaa2/sum(aaa2)))/sum(aaa2))) aaa2b<-aaa2[(aaa1==0 & aaa2==0 )| (aaa1!=0)] aaa1b<-aaa1[(aaa1==0 & aaa2==0) | (aaa1!=0)] v1<-log(((aaa2b/sum(aaa2b))^(aaa2b/sum(aaa2b)))) v2<-log(((aaa1b/sum(aaa1b))^(aaa2b/sum(aaa2b)))) info1<- sum(v1-v2) info2<-KL.empirical(aaa2b,aaa1b) #chisq.test() cat(list_name,"\n") str<-chisq.test(aaa1,p=prob) cat(str$p.value,"\n") cat("ent1",entropy1,"\n") cat("ent2",entropy2,"\n") cat("info1",info1,"\n") cat("info2",info2,"\n")