2016-04-15 97 views
3

我的问题不在算法本身,而是在F#推到极限时的性能。在这个例子中,我试图用蛮力解决25个城市的问题(动态编程)F#旅行推销员的表现

(该算法似乎对一个小例子很好,我相信它能完成它的工作)

我在控制台输出窗口中打印一个计数变量来监视进度。 与第一次达到m=12迭代期望的一样,运行时间呈指数增长。

m=2 
1887ms; 
m=3 
1870ms; 
m=4 
1902ms; 
m=5 
2074ms; 
m=6 
2954ms; 
m=7 
6261ms; 
m=8 
16746ms; 
m=9 
38442ms; 
m=10 
80396ms; 
m=11 
140985ms; 
m=12 
207950ms; 

所以虽然第13次迭代之前的表现并不是很好,至少它看起来是“可管理的”。实际上,如果我的理解是正确的,那么迭代12和13是CPU最重的。尽管如此,我还是会从执行模式中预期,该迭代的执行时间大约为300000ms,而不是这种情况。

放大在MacOS X上运行的我的(新)iMAC Retina的控制显示器10.11.3 El Capitan配备了3.3Ghz i7四核,16 GB RAM和一个SSD,运行在Xamarin.Studio 6.0 。我看到该程序的内存使用量是一个很大的3 GB。它没有被优化,但它应该在机器的能力范围之内,并且不应该成为负担?

m=13非常非常非常非常缓慢的进展,按照速度,似乎需要几个小时来计算。在这个阶段,在显示器的CPU侧,它表示该进程使用CPU的105%(左侧的第一列)。 1小时(和完成迭代的2/3)后坠毁以下消息:

Error: Garbage collector could not allocate 16384 bytes of memory for major heap section.

我有点惊讶,我需要做垃圾回收,因为内存看起来并不像的主要问题?

我正在与2^24项和24列= 17M限定巨大Array2D *使用32 + 8 = 40位= 5个字节每个所以2 GB

也许这就是(FLOAT32 *为sbyte)的24项问题在于我做了一个for S in Subset_of_size_m循环?

这在迭代13(停在1,860,000)时必须完成2,700,000次,在这个循环中我做了一些列表的分配。那里可能需要垃圾收集?我在F#中从来没有做过这样的事情(只有在R的地方,它可以随时编写命令GC()remove(object))。在控制监视器中进行监视,与可用RAM相比,内存似乎永远不成问题?发生了什么?

我知道我也许应该找到一个更好的算法,它的内存密集程度较低,但无论如何它是学习这个问题的好机会,并且与其他解决问题的人相比,按计划,第一次(野蛮)尝试并不是完全荒谬的。

这里是源代码

//////// Travelling Salesman problem //////// 


open System 
open System.Collections 
open System.Collections.Generic 
open System.IO 

open System.Windows 

open FSharp.Charting 

exception InnerError of string 


let stopWatch = System.Diagnostics.Stopwatch.StartNew() 

///////////////// preparing the data ///////////////// 

// format of the files 
//[number_of_cities] 
//[x_1] [y_1] // coordinate 

let x = File.ReadAllLines "/Users/francois-guillaume.rideau/Documents/MOOC/TSP.txt" 

let split (text:string)= 
    text.Split [|'\t';' '|] 

let splitInto2Values (A: string []) = 
    (float A.[0],float A.[1]) 

let parseLine (line:string) = 
    line 
    |> split 
    |> splitInto2Values 



let num_cities = int x.[0] // 25 

let cities = x |> Array.tail |> Array.map parseLine // [x_1][y_1] 

let dist_matrix = Array2D.create num_cities num_cities 0.0f 
for i in 0..(num_cities-1)do 
    for j in 0..(num_cities-1) do 
     dist_matrix.[i,j]<- float32 (sqrt ((fst cities.[i] - fst cities.[j])*(fst cities.[i] - fst cities.[j]) + (snd cities.[i] - snd cities.[j])*(snd cities.[i] - snd cities.[j]))) 

let arrayOfArrays = [| [| 0.0f; 2.0f;1.0f;4.0f |]; [|2.0f;0.0f; 3.0f;5.0f |]; [|1.0f;3.0f; 0.0f;6.0f |]; [|4.0f;5.0f; 6.0f;0.0f |] |] 
let example = Array2D.init 4 4 (fun i j -> arrayOfArrays.[i].[j]) 

// Dynamic programming algorithm 

// Subproblems: 
// For every destination j in {1,2,......n} every subset S in {1,2,....n} that contains 1 and j, let 
// A(S,j) = minimum length of a path of a path from 1 to j that visits precisely the vertices of S [exactly once each] 

// create A = Array2D indexed by subsets S that contain 1 and destinations j 
// Base Case A[S,1] = 0 if S = {1} , +infinity otherwise 
// for m = 2,3,4,.... n (m = subproblem size) 
//  for each Set S in {1,2,...n} of size m that contains 1 
//   for each j in S, j different from 1: 
//    A[S,j] = min (k in S, k <> j) {A[S-{j},k]+Ckj} 
// Return min (j=2 to n) A[{1,2,3,....,n},j]+Cj1 

let limit = 100000000.0f 

//// the first city is city 0 in array D. we put it apart, 
//// then we represents S as integers. 
//// we take the binary representation of integer, and the pth bit indicates whether city p+1 belongs to S 
//// for example S =11 = 1+2+8 contains cities 2,3 and 9 (members 11 will return [(0, 1); (1, 2); (3, 8)]) 

/////////////////////////////// with path /////////////////////////////////////////// 

let TSP_all_c_Dynamic_Programming_with_path_main(D:float32 [,]) = // solves the TSP problem when ALL cities are connected together with an exhaustive search in exponential time O(n^2 2^n) 
                // the input is a distance matrix in float32 
                // memory usage is not optimized at all.... 

    let num_cities = Array2D.length1 D 
    let l2 = Array2D.length2 D 
    if (l2<>num_cities) then failwith "Distance matrix is not a squared matrix" 
    let powers_of_2 = [|1;2;4;8;16;32;64;128;256;512;1024;2048;4096;8192;16384;32768;65536;131072;262144;524288;1048576;2097152;4194304;8388608;16777216|] 
    let power2 k =  
     if ((k >= 25) || (k<0)) then raise (InnerError("power exponent not allowed")) 
      else powers_of_2.[k] 

    let num_subsets = power2 (num_cities-1) 
    let S_full = num_subsets-1 

    let A = Array2D.create num_subsets (num_cities-1) (limit,-1y) 

    A.[0,0]<-(-0.0f,-2y) 

    let rec sumbits (n:int):int= 
     let rec helper acc m = 
     match m with 
      | 0 -> acc 
      | 1 -> acc+1 // remove this ? 
      | _ -> let r = m%2 
        helper (acc+r) (m>>>1) 
     helper 0 n 

    let hashtable = Array2D.create (num_cities-1) num_subsets false // hashtable.[i-1,n]=true if (sumbits n) = i 
    for k in 1..(num_subsets-1) do hashtable.[(sumbits k)-1,k]<-true 
    // member returns [(p,2^p);....] if the p+1th city is in S 
    let members S = [for j in 0..(num_cities-2) do let a= powers_of_2.[j] &&& S 
                if (a<>0) then yield (j,a)] // list length = num_cities-1 


    for m in 2..num_cities do // S size m 
     printfn "m=%A" m 
     let stopWatch = System.Diagnostics.Stopwatch.StartNew() 

     let mutable count = 1 

     let Subset_of_size_m = hashtable.[m-2,0..] |> Seq.mapi (fun i x -> (i,x)) |> Seq.filter (fun (a,b)-> (b=true)) |> Seq.map fst |> Seq.toList 
     for S in Subset_of_size_m do   
      if m = 2 then let (i,S') = (members S).Head 
          A.[S',i]<- (D.[0,i+1],-1y) // distance from 0 to city i+1 
        else 
          let S'_list = List.fold (fun acc x -> let a = (((snd x)^^^S_full)&&&S)    // list of subsets of size m-1 
                   if a = S then acc else (fst x,a)::acc) [] (members S) 
          for (j,S') in S'_list do 
           A.[S,j] <- ([for (k,expk) in (members S') do 
               yield (fst A.[S',k]+D.[j+1,k+1],k) ]|> List.min |> fun (a,b)->(a,sbyte b))// can do faster than that if we remember from above ? 
          count <- count + 1 // to check progress in the console 
          if count%10000 =0 then printfn "count=%A" count 

     printfn "%f" stopWatch.Elapsed.TotalMilliseconds 

    // printfn "%A" A 
    // A.[num_subsets-1,0..] 
    A 

let calculate_path_TSP_all_c_Dynamic_Programming_with_path (D:float32 [,]) = 
    // calls the core subroutine defined above 
    let A' = TSP_all_c_Dynamic_Programming_with_path_main D 
                // memory usage is not optimized at all.... 

    // from here its smooth sailing, just analyzing the results. 

    let num_cities = Array2D.length1 D 
    let l2 = Array2D.length2 D 
    if (l2<>num_cities) then failwith "Distance matrix is not a squared matrix" 

    let powers_of_2 = [|1;2;4;8;16;32;64;128;256;512;1024;2048;4096;8192;16384;32768;65536;131072;262144;524288;1048576;2097152;4194304;8388608;16777216|] 
    let power2 k =  
     if ((k >= 25) || (k<0)) then raise (InnerError("power exponent not allowed")) 
      else powers_of_2.[k] 

    let num_subsets = power2 (num_cities-1) 
    let S_full = num_subsets-1 

    let A'' = A'.[S_full,0..] 

    let res' = [for i in 0..num_cities-2 do yield (fst A''.[i]+ example.[0,i+1]) ] |> Seq.mapi (fun i x -> (i, x)) |> Seq.minBy snd 
    printfn "TSP answer = %A" res' 

    let path = Array.create (num_cities+1) -1y 

    let mutable current_city = sbyte (fst res') 
    path.[num_cities-1]<- current_city// the last city 

    let mutable current_S = S_full 
    let mutable next_S = -2 
    let mutable next_city = -2y 

    for k in num_cities-2..-1..1 do 
     next_city <- snd A'.[current_S,int current_city] 
     next_S <- (S_full^^^powers_of_2.[int current_city]) &&& current_S 
     //printfn "k=%A current_S=%A next_city=%A" k current_S next_city 
     path.[k]<-next_city 
     current_S<-next_S 
     current_city<-next_city 

    for i in 0..num_cities do path.[i]<-path.[i]+1y 
    printfn "path=%A" path 


////// main program ///// 

calculate_path_TSP_all_c_Dynamic_Programming_with_path dist_matrix 
+0

你正在编译64位或32位应用程序吗? –

+0

我不知道这件事。我看到的是,即时通讯使用目标框架MONO/.NET4.5和调试/ x86版本与MS生成 –

+0

你有什么版本的单声道? –

回答

3

我还没有分析你的代码在所有的,但因为你的目标Debug/x86配置就意味着你应用程序:

  • 没有一定的优化(可能会增加内存使用量)以提供更多调试信息
  • 仅使用x86处理器指令集的32位指令,并且因此它不能使用超过4GB的内存(或le ss,depending on your OS

尝试将其更改为像Release/x64(*)。您还可以调整Mono内置的参数garbage collector

可以预计的是,当你的程序使用的数据占用大部分可用内存为一个过程,你的算法处理单元的数量将大大减少。在这种情况下,你的程序大部分时间都在做垃圾回收(每个GC只能释放少量的内存,因为大部分对象是活着的),而且只有一小部分时间做“真正”的计算(并且计算需要内存分配触发GC)。

当您调查内存使用情况时可能有帮助的其他内容是内存分析器。

你提到在R中你可以强制垃圾收集;在.NET中,你也可以通过调用GC.Collect()来做到这一点,然而,在大多数情况下使用它是不鼓励的(垃圾收集在必要时自动执行)。

*您需要一个编译器生成64位程序集和64位公共语言运行时来运行您的代码。 Mono 2.10 on MacOS X doesn't support 64-bits unless it is built from sources

Support for 64-bit VMs as of Mono 2.10 is only available if you build Mono from source code and install your own copy of the VM. In the future we will ship both mono and mono64 binaries for our users.

新版本具有普遍的安装,我想,他们支持64位。

+0

在Xamarin菜单中,我看到的唯一的其他选项是Release/x86 –

+0

在Visual Studio中,您可以在“配置管理器”中添加新配置;我的电脑上没有Xamarin Studio,所以我不知道如何添加一个。尝试[在此博客]中描述的对话框之一(https://xmonodev.wordpress.com/2013/12/02/setting-the-active-configuration-in-xamarin-studio/)。 –

+0

我看到我可以添加一个名为Debug/AnyCPU和Release/AnyCPU的配置,并且这就是VS2015上的 –