2011-05-25 95 views
1

我有一个像素着色器(下图),与XNA一起使用。在我的笔记本电脑(蹩脚的图形卡)上运行时有点生涩,但确定。我刚刚尝试在Xbox上运行它,这太可怕了!xbox上的像素着色器性能

游戏中没有任何东西(它只是一个分形渲染器),所以它必须是导致问题的像素着色器。我也认为这是PS代码,因为我已经降低了迭代次数,没关系。我也检查过,GC增量为零。

是否有任何HLSL函数在Xbox上是否定的?我必须在这里做错了事,性能不能那么差!

#include "FractalBase.fxh" 

float ZPower; 

float3 Colour; 
float3 ColourScale; 

float ComAbs(float2 Arg) 
{ 
    return sqrt(Arg.x * Arg.x + Arg.y * Arg.y); 
} 

float2 ComPow(float2 Arg, float Power) 
{ 
    float Mod = pow(Arg.x * Arg.x + Arg.y * Arg.y, Power/2); 
    float Ang = atan2(Arg.y, Arg.x) * Power; 

    return float2(Mod * cos(Ang), Mod * sin(Ang)); 
} 

float4 FractalPixelShader(float2 texCoord : TEXCOORD0, uniform float Iterations) : COLOR0 
{ 
    float2 c = texCoord.xy; 
    float2 z = 0; 

    float i; 

    float oldBailoutTest = 0; 
    float bailoutTest = 0; 

    for(i = 0; i < Iterations; i++) 
    { 
     z = ComPow(z, ZPower) + c; 

     bailoutTest = z.x * z.x + z.y * z.y; 

     if(bailoutTest >= ZPower * ZPower) 
     { 
      break; 
     } 

     oldBailoutTest = bailoutTest; 
    } 

    float normalisedIterations = i/Iterations; 
    float factor = (bailoutTest - oldBailoutTest)/(ZPower * ZPower - oldBailoutTest); 

    float4 Result = normalisedIterations + (1/factor/Iterations); 

    Result = (i >= Iterations - 1) ? float4(0.0, 0.0, 0.0, 1.0) : float4(Result.x * Colour.r * ColourScale.x, Result.y * Colour.g * ColourScale.y, Result.z * Colour.b * ColourScale.z, 1); 

    return Result; 
} 

technique Technique1 
{ 
    pass 
    { 
     VertexShader = compile vs_3_0 SpriteVertexShader(); 
     PixelShader = compile ps_3_0 FractalPixelShader(128); 
    } 
} 

下面是FractalBase.fxh:

float4x4 MatrixTransform : register(vs, c0); 

float2 Pan; 
float Zoom; 
float Aspect; 

void SpriteVertexShader(inout float4 Colour : COLOR0, 
         inout float2 texCoord : TEXCOORD0, 
         inout float4 position : SV_Position) 
{ 
    position = mul(position, MatrixTransform); 

    // Convert the position into from screen space into complex coordinates 
    texCoord = (position) * Zoom * float2(1, Aspect) - float2(Pan.x, -Pan.y); 
} 

编辑当我这样做,我得到了文物的负载我曾尝试用大量lerps去除条件,但是,(不是那种那“belong in a museum”!)。我改变了一些事情,并修正了一些逻辑错误,但关键是将GreaterThan结果乘以1 +ε,以计算仅使0.9999 = 0(整数)的舍入误差。请参见下面的固定码:

#include "FractalBase.fxh" 

float ZPower; 

float3 Colour; 
float3 ColourScale; 

float ComAbs(float2 Arg) 
{ 
    return sqrt(Arg.x * Arg.x + Arg.y * Arg.y); 
} 

float2 ComPow(float2 Arg, float Power) 
{ 
    float Mod = pow(Arg.x * Arg.x + Arg.y * Arg.y, Power/2); 
    float Ang = atan2(Arg.y, Arg.x) * Power; 

    return float2(Mod * cos(Ang), Mod * sin(Ang)); 
} 

float GreaterThan(float x, float y) 
{ 
    return ((x - y)/(2 * abs(x - y)) + 0.5) * 1.001; 
} 

float4 FractalPixelShader(float2 texCoord : TEXCOORD0, uniform float Iterations) : COLOR0 
{ 
    float2 c = texCoord.xy; 
    float2 z = 0; 

    int i; 

    float oldBailoutTest = 0; 
    float bailoutTest = 0; 

    int KeepGoing = 1; 

    int DoneIterations = Iterations; 

    int Bailout = 0; 

    for(i = 0; i < Iterations; i++) 
    { 
     z = lerp(z, ComPow(z, ZPower) + c, KeepGoing); 

     bailoutTest = lerp(bailoutTest, z.x * z.x + z.y * z.y, KeepGoing); 

     Bailout = lerp(Bailout, GreaterThan(bailoutTest, ZPower * ZPower), -abs(Bailout) + 1); 

     KeepGoing = lerp(KeepGoing, 0.0, Bailout); 
     DoneIterations = lerp(DoneIterations, min(i, DoneIterations), Bailout); 

     oldBailoutTest = lerp(oldBailoutTest, bailoutTest, KeepGoing); 
    } 

    float normalisedIterations = DoneIterations/Iterations; 
    float factor = (bailoutTest - oldBailoutTest)/(ZPower * ZPower - oldBailoutTest); 

    float4 Result = normalisedIterations + (1/factor/Iterations); 

    Result = (DoneIterations >= Iterations - 1) ? float4(0.0, 0.0, 0.0, 1.0) : float4(Result.x * Colour.r * ColourScale.x, Result.y * Colour.g * ColourScale.y, Result.z * Colour.b * ColourScale.z, 1); 

    return Result; 
} 

technique Technique1 
{ 
    pass 
    { 
     VertexShader = compile vs_3_0 SpriteVertexShader(); 
     PixelShader = compile ps_3_0 FractalPixelShader(128); 
    } 
} 

回答

1

在Xbox有一个相当大的块尺寸,所以分支在Xbox并不总是那么大。此外,编译器并不总是最有效的发送您的代码似乎使用的动态分支。

查找到分支属性:http://msdn.microsoft.com/en-us/library/bb313972%28v=xnagamestudio.31%29.aspx

此外,如果移动早救市,并在PC越来越多类似的Xbox?

请记住,到现在为止,现代显卡实际上比氙气设备要快得多。

+0

感谢您的快速回复。添加[分支]属性似乎没有做任何事情。有趣的是,删除if和break会使xbox完美运行。所以,看起来我将不得不做一些有趣的事情,用lerp? – 2011-05-25 21:22:44

+0

代码更新为使用'step'和'lerp',同样的生涩,删除'step'加快了速度,所以现在我需要一个替代函数! – 2011-05-25 21:40:34

+0

步骤有很多依赖说明。更有可能的是,删除步骤允许编译器运行整个优化的基调,否则它不能。目前还不清楚如何改善这一点......我会考虑一下。 – Arelius 2011-05-26 00:27:29