什么可能使非优化F＃代码比优化代码更快？

所以我决定给F＃一个尝试，并将我用C＃编写的算法移植到它。有一点，我注意到调试版本比发布版本运行速度更快。然后我使用优化设置进行游戏并获得了以下结果：什么可能使非优化F＃代码比优化代码更快？

时间显示算法的总执行时间超过100000次运行。我正在使用Visual Studio 2010 SP1附带的F＃编译器。目标平台是任何CPU。

Opt off, tail calls off: 5.81s 
Opt off, tail calls on : 5.79s 
Opt on , tail calls off: 6.48s 
Opt on , tail calls on : 6.40s

我对此非常疑惑 - 为什么优化会让代码运行速度变慢？该算法的C＃版本没有表现出这种行为（即使它以稍微不同的方式实现）

这是一个精简版的F＃代码，它是一种在分子中查找模式的算法。 F＃程序依赖的所有代码都是用F＃编写的。

namespace Motives 
    module Internal = 
    type Motive = 
     { ResidueSet: Set<Residue>; AtomSet: Set<IAtom> } 
     member this.Atoms : IAtom seq = 
     seq { 
      for r in this.ResidueSet do yield! r.Atoms 
      yield! this.AtomSet 
     } 
     static member fromResidues (residues : Residue seq) = residues |> Seq.fold (fun (m: Set<Residue>) r -> m.Add(r)) Set.empty |> fun rs -> { ResidueSet = rs; AtomSet = Set.empty } 
     static member fromAtoms (atoms : IAtom seq) = atoms |> Seq.fold (fun (m: Set<IAtom>) a -> m.Add(a)) Set.empty |> fun atoms -> { ResidueSet = Set.empty; AtomSet = atoms } 
     static member merge (m1: Motive) (m2: Motive) = { ResidueSet = Set.union m1.ResidueSet m2.ResidueSet; AtomSet = Set.union m1.AtomSet m2.AtomSet } 
     static member distance (m1: Motive) (m2: Motive) = Seq.min (seq { for a in m1.Atoms do for b in m2.Atoms -> a.Position.DistanceTo(b.Position) }) 

    type Structure with 
     static member fromMotive (m: Motive) (parent: IStructure) (addBonds: bool) : IStructure = 
     let atoms = AtomCollection.FromUniqueAtoms(m.Atoms) 
     let bonds = 
      match addBonds with 
      | true -> BondCollection.Create(atoms |> Seq.map (fun a -> parent.Bonds.[a]) |> Seq.concat) 
      | _ -> BondCollection.Empty 
     Structure.Create (parent.Id + "_" + atoms.[0].Id.ToString(), atoms, bonds) 

    // KDTree used for range queries 
    // AminoChains used for regex queries 
    type StructureContext = 
     { Structure: IStructure; KDTree: Lazy<KDAtomTree>; AminoChains: Lazy<(Residue array * string) list> } 
     static member create (structure: IStructure) = 
     match structure.IsPdbStructure() with 
     | false -> { Structure = structure; KDTree = Lazy.Create(fun() -> structure.Atoms.ToKDTree()); AminoChains = Lazy.CreateFromValue([]) } 
     | true -> 
      let aminoChains = new System.Func<(Residue array * string) list> (fun() -> 
      let residues = structure.PdbResidues() |> Seq.filter (fun r -> r.IsAmino) 
      residues 
      |> Seq.groupBy (fun r -> r.ChainIdentifier) 
      |> Seq.map (fun (k,rs) -> rs |> Array.ofSeq, String.concat "" (rs |> Seq.map (fun r -> r.ShortName))) 
      |> List.ofSeq) 
      { Structure = structure; KDTree = Lazy.Create(fun() -> structure.Atoms.ToKDTree()); AminoChains = Lazy.Create(aminoChains) } 

    // Remember the named motives from named patterns 
    type MatchContext = 
     { StructureContext: StructureContext; NamedMotives: Map<string, Motive> } 
     static member merge (c1: MatchContext) (c2: MatchContext) = 
     { StructureContext = c1.StructureContext; NamedMotives = c2.NamedMotives |> Map.fold (fun m k v -> m.Add(k,v)) c1.NamedMotives } 

    type MatchedMotive = Motive * MatchContext 

    type Pattern = 
     | EmptyPattern 
     | GeneratingPattern of (StructureContext -> MatchedMotive seq) 
     | ConstraintPattern of (MatchedMotive -> MatchedMotive option) * Pattern 
     static member matches (p: Pattern) (context: StructureContext) : MatchedMotive seq = 
     match p with 
     | GeneratingPattern generator -> generator context 
     | ConstraintPattern (transform, pattern) -> 
      Pattern.matches pattern context 
      |> Seq.choose (fun m -> transform m) 
     | _ -> Seq.empty 

    let ringPattern (names: string list) = 
     let fingerprint = 
     names 
     |> Seq.map (fun s -> ElementSymbol.Create(s).ToString()) 
     |> Seq.sort 
     |> String.concat "" 
     let generator (context: StructureContext) = 
     let rings = context.Structure.Rings().GetRingsByFingerprint(fingerprint) 
     rings |> Seq.map (fun r -> Motive.fromAtoms r.Atoms, { StructureContext = context; NamedMotives = Map.empty }) 
     GeneratingPattern generator 

    open Internal 

    type MotiveFinder (pattern: string) = 
    // I am using a hard coded pattern here for testing purposes 
    let pattern = ringPattern ["C"; "C"; "C"; "C"; "C"; "O"] 
    member this.Matches (structure: IStructure) = 
     Pattern.matches pattern (StructureContext.create structure) 
     |> Seq.map (fun (m, mc) -> Structure.fromMotive m mc.StructureContext.Structure false) 
     |> List.ofSeq 
     |> List.sortBy (fun s -> s.Atoms.[0].Id) 

/////////////////////////////////////////////////////////////////// 
// performance test 

let warmUp = (new MotiveFinder("")).Matches (StructureReader.ReadPdb(filename, computeBonds = true)) 
printfn "%i" (List.length warmUp) 

let structure = StructureReader.ReadPdb(filename, computeBonds = true) 
let stopWatch = System.Diagnostics.Stopwatch.StartNew() 
let nruns = 100000 
let result = 
    seq { 
    for i in 1 .. nruns do 
     yield (new MotiveFinder("")).Matches structure 
    } |> Seq.nth (nruns-1) 

stopWatch.Stop() 
printfn "Time elapsed: %f seconds" stopWatch.Elapsed.TotalSeconds

EDIT2：

我似乎已经缩小的问题设置类型的实现。

对于此代码：

let stopWatch = System.Diagnostics.Stopwatch.StartNew() 
let runs = 1000000 
let result = 
    seq { 
    for i in 1 .. runs do 
     let setA = [ 1 .. (i % 10) + 5 ] |> Set.ofList 
     let setB = [ 1 .. (i % 10) + 5 ] |> Set.ofList 
     yield Set.union setA setB 
    } |> Seq.nth (runs - 1) 

stopWatch.Stop() 
printfn "Time elapsed: %f seconds" stopWatch.Elapsed.TotalSeconds 
printfn "%A" result

我得到〜7.5S与优化功能，并〜8.0s与优化。仍然目标=任何CPU（并且我有i7-860处理器）。

EDIT3： 而且在我发布之前的编辑之后，我想我应该只在列表上试试。

所以对于

let stopWatch = System.Diagnostics.Stopwatch.StartNew() 
let runs = 1000000 
let result1 = 
    seq { 
    for i in 1 .. runs do 
     let list = [ 1 .. i % 100 + 5 ] 
     yield list 
    } |> Seq.nth (runs - 1) 

stopWatch.Stop() 
printfn "Time elapsed: %f seconds" stopWatch.Elapsed.TotalSeconds 
printfn "%A" result1

我得到〜有选择三分。与选择约3.5秒。上。

EDIT4：

如果我删除序列生成器，只是做两个断断续续

let stopWatch = System.Diagnostics.Stopwatch.StartNew() 
let runs = 1000000 
let mutable ret : int list = [] 
for i in 1 .. runs do 
    let list = [ 1 .. i % 100 + 5 ] 
    ret <- list 

stopWatch1.Stop() 
printfn "Time elapsed: %f seconds" stopWatch.Elapsed.TotalSeconds 
printfn "%A" ret

我得到〜3秒与优化。所以看起来问题在于优化seq构建器代码。

奇怪的是，我写在C＃中测试应用程序：

var watch = Stopwatch.StartNew(); 

int runs = 1000000; 

var result = Enumerable.Range(1, runs) 
    .Select(i => Microsoft.FSharp.Collections.ListModule.OfSeq(Enumerable.Range(1, i % 100 + 5))) 
    .ElementAt(runs - 1); 

watch.Stop(); 

Console.WriteLine(result); 
Console.WriteLine("Time: {0}s", watch.Elapsed.TotalSeconds);

和代码恰好为F＃的解决方案几乎两倍的速度运行〜1.7S。

EDIT5：

基于我已经发现了导致运行速度较慢的优化代码的东西与乔恩·哈罗普讨论（我仍然不知道为什么寿）。

如果我改变Motive.Atoms从

member this.Atoms : IAtom seq = 
    seq { 
    for r in this.ResidueSet do yield! r.Atoms 
     yield! this.AtomSet 
    }

到

member this.Atoms : IAtom seq = 
    Seq.append (this.ResidueSet |> Seq.collect (fun r -> r.Atoms)) this.AtomSet

然后在程序运行时〜7.1s两个优化和非优化版本。这比seq版本慢，但至少一致。

因此，似乎F＃编译器无法优化计算表达式，并且通过尝试这样做实际上使得它们变慢。

来源

2011-12-31 Dave

看起来编译器只是在优化特定算法时失败。 – 2011-12-31 01:13:42

@Niklas：恩，是的。但是，必须有一个原因，我不知道它是什么... – Dave 2011-12-31 01:19:53

不可能说没有工作代码的任何东西。你的'Residue'和'IAtom'类型是未定义的。你的'Structure'类型定义甚至不是有效的F＃代码。你能提供一个有效的例子吗？ – 2011-12-31 03:23:23

我也可以观察到你的封装代码和倒数第二个例子，在启用优化的情况下运行速度稍慢，但差异小于10％，虽然反常，但我并不惊讶优化有时会略微降低性能。

我应该注意到，您的编码风格留下了很多优化的空间，但没有整个源代码，我不可能帮助优化它。您的示例使用下面的代码：

let result1 = 
    seq { 
    for i in 1 .. runs do 
     let list = [ 1 .. i % 100 + 5 ] 
     yield list 
    } |> Seq.nth (runs - 1)

时，这是更短，更地道和幅度较快的订单：

let result1 = 
    Seq.init runs (fun i -> List.init ((i+1) % 100 + 5) ((+) 1)) 
    |> Seq.nth (runs - 1)

编辑

在您的评论下面你说你要执行函数参数，在这种情况下，我不会假定Seq.nth会为您做这个，所以我会使用for循环代替：

let mutable list = [] 
for i=1 to runs do 
    list <- List.init (i % 100 + 5) ((+) 1) 
list

这还是9 ×比原来快。

来源

2011-12-31 13:07:52

我对编码风格有所了解，我对F＃相当陌生。我只是感到惊讶，优化器使代码变得很简单，因为这个例子实际上比较慢，至少和未优化的版本一样快。 – Dave 2011-12-31 14:26:29

另外，似乎'Seq.init f |> Seq.nth i''实际上只执行一次函数'f'。这不是我想要的行为... – Dave 2011-12-31 15:20:49

作为我的先行例子。评论：'let t = ref 0; let x = Seq.init 1000（fun i - > t：=！t + i;！t）|> Seq.nth 999; printfn“％A％A”！t x'打印999 999。我错过了什么吗？ – Dave 2011-12-31 15:23:29

什么可能使非优化F＃代码比优化代码更快？

回答

相关问题