2016-11-29 73 views
0

我使用附带的代码集成版本Fitzhugh-Nagumo型号:的Python:加快我的龙格 - 库塔集成代码挑战

In [8]: import cProfile 

In [9]: %timeit integrate(fhn_rhs,np.stack((0.1,0.2)),np.linspace(0,100,1000),args=(P,)) 
10 loops, best of 3: 36.4 ms per loop 

In [10]: %timeit odeint(fhn_rhs,np.stack((0.1,0.2)),np.linspace(0,100,1000),args=(P,)) 
100 loops, best of 3: 3.45 ms per loop 

In [11]: cProfile.run('integrate(fhn_rhs,np.stack((0.1,0.2)),np.linspace(0,100,1000),args=(P,))') 
     45972 function calls in 0.098 seconds 

    Ordered by: standard name 

    ncalls tottime percall cumtime percall filename:lineno(function) 
     1 0.000 0.000 0.098 0.098 <string>:1(<module>) 
    3996 0.011 0.000 0.078 0.000 fhn.py:20(fhn_rhs) 
     1 0.002 0.002 0.097 0.097 fhn.py:42(integrate) 
     999 0.016 0.000 0.094 0.000 fhn.py:52(RK4step) 
     1 0.000 0.000 0.000 0.000 function_base.py:9(linspace) 
    7994 0.011 0.000 0.021 0.000 numeric.py:484(asanyarray) 
    3997 0.029 0.000 0.067 0.000 shape_base.py:282(stack) 
    11991 0.008 0.000 0.008 0.000 shape_base.py:337(<genexpr>) 
    3997 0.002 0.000 0.002 0.000 {len} 
     999 0.001 0.000 0.001 0.000 {method 'append' of 'list' objects} 
     1 0.000 0.000 0.000 0.000 {method 'astype' of 'numpy.ndarray' objects} 
     1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} 
     1 0.000 0.000 0.000 0.000 {numpy.core.multiarray.arange} 
    7995 0.010 0.000 0.010 0.000 {numpy.core.multiarray.array} 
    3997 0.006 0.000 0.006 0.000 {numpy.core.multiarray.concatenate} 
     1 0.000 0.000 0.000 0.000 {numpy.core.multiarray.result_type} 

from scipy.integrate import odeint 
import numpy as np 
import time 

P = {'epsilon':0.1, 
    'a1':1.0, 
    'a2':1.0, 
    'b':2.0, 
    'c':0.2} 

def fhn_rhs(V,t,P): 
    u,v = V[0],V[1] 
    u_t = u - u**3 - v 
    v_t = P['epsilon']*(u - P['b']*v - P['c']) 
    return np.stack((u_t,v_t)) 

def integrate(func,V0,t,args,step='RK4'): 
    start = time.clock() 
    P = args[0] 
    result=[V0] 
    for i,tmp in enumerate(t[1:]): 
     result.append(RK4step(func,result[i],tmp,P,(t[i+1]-t[i]))) 
    print "Integration took ",time.clock() - start, " s" 
    return np.array(result) 


def RK4step(rhs,V,t,P,dt): 
    k_1 = dt*rhs(V,t,P) 
    k_2 = dt*rhs((V+(1.0/2.0)*k_1),t,P) 
    k_3 = dt*rhs((V+(1.0/2.0)*k_2),t,P) 
    k_4 = dt*rhs((V+k_3),t,P) 
    return V+(1.0/6.0)*k_1+(1.0/3.0)*k_2+(1.0/3.0)*k_3+(1.0/6.0)*k_4 

之间我integratescipy.integrate.odeint提供了以下比较

关于如何让我的代码运行速度更快的建议?我能以某种方式使用numba来加速它吗?

+1

一般最慢的操作控制台和文件输出。在使用'timeit'之前你删除了它们吗? – LutzL

+0

是的,我在比较 – Ohm

+0

之前删除了控制台输出。然后就像我写的一样,差别太大了。编译与解释,隐式多步与显式一步,自适应与固定步长。 - 下一步可能是将该方法更改为嵌入式RK方法,如Runge-Kutta-Fehlberg或Dormand-Price。使用RK4,您可以通过将大小为'h'的两个步骤与大小为'2h'的一个并行步骤结合并使用Richardson外推法来模拟嵌入式方法。 – LutzL

回答

2

你没有比较相同的东西。若要查看odeint实际评估ODE函数的哪些点,请输入print t语句(当然,不计时)。并且通常具有自适应时间步骤的方法产生积分样本的稀疏列表并从其中内插期望的输出。

您将不得不使用RK4方法的错误估计器,并基于该错误估计器复制此自适应方案。


当然,并且使用矢量对象将永远不会与lsoda的已编译代码FORTRAN其执行期间用简单的阵列从odeint称为竞争性解释Python代码。


用于在自适应步长方式,采用与RK4插值的一个例子:

def RK4Step(f, x, y, h, k1): 
    k2=f(x+0.5*h, y+0.5*h*k1) 
    k3=f(x+0.5*h, y+0.5*h*k2) 
    k4=f(x+ h, y+ h*k3) 
    return (k1+2*(k2+k3)+k4)/6.0 

def RK4TwoStep(f, x, y, h, k1): 
    step1 = RK4Step(f, x , y , 0.5*h, k1  ) 
    x1, y1 = x+0.5*h, y+0.5*h*step1; 
    step2 = RK4Step(f, x1, y1, 0.5*h, f(x1, y1)) 
    return (step1+step2)/2 

def RK4odeint(fin,times,y0, tol): 
    # numpy-ify the inputs 
    f = lambda t,y : np.array(fin(t,y)) 
    y0 = np.array(y0) 
    # allocate output structure 
    yout = np.zeros_like([y0]*times.shape[0]); 
    yout[0] = y0; 
    # initialize integrator variables 
    h = times[1]-times[0]; 
    hmax = abs(times[-1]-times[0]); 

    # last and current point of the numerical integration 
    ycurr = ylast = qcurr = qlast = y0; 
    tcurr = tlast = times[0]; 
    fcurr = flast = f(tcurr, ycurr); 
    totalerr = 0.0 
    totalvar = 0.0 
    for i, t in enumerate(times[1:]): 
     # remember that t == t[i+1], result goes to yout[i+1] 
     while (t-tcurr)*h>0: 
      # advance the integration     
      k1, k2 = RK4Step(f,tcurr,ycurr,h, fcurr), RK4TwoStep(f,tcurr,ycurr,h, fcurr); 
      # RK4 is of fourth order, that is, 
      # k1 = (y(x+h)-y(x))/h + C*h^4 
      # k2 = (y(x+h)-y(x))/h + C*h^4/16 
      # Using the double step k2 gives 
      # C*h^4/16 = (k2-k1)/15 as local error density 
      # change h to match the global relative error density tol 
      # use |k2| as scale for the absolute error 
      # |k1-k2|/15*hfac^4 = tol*|k2|, h <- h*hfac 

      scale = max(abs(k2)) 
      steperr = max(abs(k1-k2))/2 
      # compute the ideal step size factor and sanitize the result to prevent ridiculous changes 
      hfac = ( tol*scale/(1e-16+steperr) )**0.25 
      hfac = min(10, max(0.01, hfac)) 

      # repeat the step if there is a significant step size correction 
      if (abs(h*hfac)<hmax and (0.6 > hfac or hfac > 3)): 
       # recompute with new step size 
       h *= hfac; 
       k2 = RK4TwoStep(f, tcurr, ycurr, h, fcurr) ; 
      # update and cycle the integration points 
      ylast = ycurr; ycurr = ycurr + h*k2; 
      tlast = tcurr; tcurr += h; 
      flast = fcurr; fcurr = f(tcurr, ycurr); 
      # cubic Bezier control points 
      qlast = ylast + (tcurr-tlast)/3*flast; 
      qcurr = ycurr - (tcurr-tlast)/3*fcurr; 

      totalvar += h*scale; 
      totalerr = (1+h*scale)*totalerr + h*steperr; 
      reportstr = "internal step to t=%12.8f \t" % tcurr; 

     #now tlast <= t <= tcurr, can interpolate the value for yout[i+1] using the cubic Bezier formula 
     s = (t - tlast)/(tcurr - tlast); 
     yout[i+1] = (1-s)**2*((1-s)*ylast + 3*s*qlast) + s**2*(3*(1-s)*qcurr + s*ycurr) 

    return np.array(yout)