scalaとRの速度比較

タスク:

ランダムウォークを2つ発生させて,それらをいくつかの時間スケールでbox平均をとり,
各時間スケールでグレンジャー因果検定をし,すべての時間スケールでもっとも小さい値を調べる.
上記を100回繰り返す.

  • 計測結果

R 369.96秒
scala 6.655秒

約55倍scalaのが速い.

import scala.util.Random
import scala.math._
import org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression;
import org.apache.commons.math3.distribution.FDistribution;
import scala.io.Source
import org.apache.commons.math3.stat.correlation.SpearmansCorrelation;

object GrangerTest2{

        var reg1=new OLSMultipleLinearRegression();
        var reg2=new OLSMultipleLinearRegression();

        var corcor= new SpearmansCorrelation();
        def granger_func(x:Array[Double], y:Array[Double],p:Int=1):Array[Double]={
                var n=x.length
                var x1=Array.ofDim[Double](n-p,p);
                var x2=Array.ofDim[Double](n-p,2*p);
                var y1=y.slice(p,n)

                (0 to (p-1)).foreach{i=>

                        ((0) to (n-p-1)).foreach{j=>


                                x1(j)(i)=y(j+i)

                                x2(j)(2*i)=y(j+i)

                                x2(j)(2*i+1)=x(j+i)
                        }
                }


                reg1.newSampleData(y1,x1)
                reg2.newSampleData(y1,x2)

                var resi1=reg1.estimateResiduals();
                var resi2=reg2.estimateResiduals();
                var ssqU=resi2.map{a=> a*a}.sum;
                var ssqR=resi1.map{a=> a*a}.sum;
                //println(ssqR,ssqU)
                var ftest=((ssqR-ssqU)/p)/(ssqU/(y1.length-2*p-1))
                var f=new FDistribution(p,y1.length-2*p-1);
                var pvalue=1-f.cumulativeProbability(ftest);
                //println("ftest:"+ftest.toString," pvalue:"+pvalue.toString);
                Array(ftest,pvalue)


        }

        def diff_boxmean(x:Array[Double],n:Int=2):Array[Double]={


                var y=List[Double]();
                var k:Int=0;
                var nn=n-1;
                var sum=0.0;

                x.foreach{ a =>
                        k match{
                                case k if k%n==n-1 => sum=sum+a/n;y=sum::y;sum=0;
                                case _ => sum=sum+a/n;
                        }

                        k=k+1;
                }
               var a2=y.reverse
                var tmp=a2(0);
                a2.map{b=> var c=b-tmp;tmp=b;c}.slice(1,a2.length).toArray

                //Array(0.1,0.1)
        }



         def main(args: Array[String]){
                //var x=Array(1.0,2.0,3.0,4.0,5.0,4.0,5.0)
                //var y=Array(1.0,2.0,3.0,4.0,6.0,9.0,3.0)
                var f=Source.fromFile("test.txt","utf-8").getLines.toArray

                var x=f.map{s=> s.split(" ")(0).toDouble}
                var y=f.map{s=> s.split(" ")(1).toDouble}


                (0 to 100-1).foreach{i=>
                        var tmp=0.0;
                        var r1=(0 to 2157).map{b=> tmp=tmp+Random.nextGaussian();tmp}

                        tmp=0.0;
                        var r2=(0 to 2157).map{b=> tmp=tmp+Random.nextGaussian();tmp}

                        //r1.foreach{println}
                        var mins=(0 to 199).map{j=>
                                var rr1=diff_boxmean(r1.toArray,j+1)
                                var rr2=diff_boxmean(r2.toArray,j+1)
                                var j2=granger_func(rr1,rr2,1);

                                j2(1)
                                //println(j2(1));
                        }.slice(10,81).min

                        println(i.toString+" "+mins.toString);

                }
        }





}


  • R
box_median<-function(x,merge=30){

  dd<-seq(1,length(x),by=merge)
  s1<-dd[1:(length(dd)-1)]
  f1<-dd[2:length(dd)]-1
  m1<-0
  for(i in 1:length(s1)){
   #m1[i]<-median(x[s1[i]:f1[i]])
   m1[i]<-mean(x[s1[i]:f1[i]])


   }
m1

}



qqq2<-0
a2b<-0

for(jj in 1:100){
j1<-cumsum(rnorm(length(j1)))
j2<-cumsum(rnorm(length(j2)))
for(i in 1:200){
  jj1<-diff(box_median(j2,merge=i))
   jj2<-diff(box_median(j1,merge=i))

 qqq2[i]<-grangertest((jj1),(jj2),order=1)[,4][[2]]

}
   
  a2b[jj]<-min(qqq2[10:80])
   cat(jj,"\n")
}