Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions D3D12ComputeAdd/D3D12Sample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

//#define USE_STRUCTURED_BUFFERS
//#define USE_VEC4
#define USE_INT
#define PRINT_DATA

namespace
Expand Down Expand Up @@ -43,7 +44,7 @@ D3D12Sample::D3D12Sample() :
m_cbSrvDescriptorSize(0),
m_constantBufferData{},
m_dataSize(1024*1024),
m_workGroupSizeX(128),
m_workGroupSizeX(64),
m_componentSize(1)
{
#ifdef USE_VEC4
Expand Down Expand Up @@ -202,6 +203,9 @@ void D3D12Sample::LoadAssets()
#endif
#ifdef USE_VEC4
"USE_VEC4", "1",
#endif
#ifdef USE_INT
"USE_INT", "1",
#endif
nullptr, nullptr
};
Expand Down Expand Up @@ -290,7 +294,7 @@ void D3D12Sample::LoadSizeDependentResources()
const UINT elementCount = m_dataSize;
for ( int i = 0; i < elementCount; ++i )
{
buf1Data.push_back((float) rand() / float(RAND_MAX));
buf1Data.push_back(rand() % 200);
}
const UINT bufferSize = buf1Data.size() * sizeof(float);

Expand Down Expand Up @@ -342,7 +346,7 @@ void D3D12Sample::LoadSizeDependentResources()
const UINT elementCount = m_dataSize;
for ( int i = 0; i < elementCount; ++i )
{
buf2Data.push_back((float) rand() / float(RAND_MAX));
buf2Data.push_back(rand() % 200);
}
const UINT bufferSize = buf2Data.size() * sizeof(float);

Expand Down Expand Up @@ -548,20 +552,20 @@ void D3D12Sample::RunCompute()
float result = 0.0;
int m = rand() % m_dataSize;
D3D12_RANGE readbackBufferRange{ 0, outputBufferSize };
FLOAT * pReadbackBufferData{};
int * pReadbackBufferData{};
ThrowIfFailed(readbackBuffer->Map(
0,
&readbackBufferRange,
reinterpret_cast<void**>(&pReadbackBufferData)));
bool hasError = false;
for (int i = 0; i < m_dataSize; i++)
{
float gpuResult = pReadbackBufferData[i];
float cpuResult = buf1Data[i] + buf2Data[i];
int gpuResult = pReadbackBufferData[i];
int cpuResult = buf1Data[i] + buf2Data[i];
if (abs(gpuResult - cpuResult) > 0.003)
{
hasError = true;
printf("The result is not correct at %d. Expected %f, actual %f", i, cpuResult, gpuResult);
printf("The result is not correct at %d. Expected %d, actual %d", i, cpuResult, gpuResult);
break;
}
}
Expand Down
4 changes: 2 additions & 2 deletions D3D12ComputeAdd/D3D12Sample.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ class D3D12Sample
UINT m_workGroupSizeX;
UINT m_componentSize;
UINT m_computeCount = 2000;
std::vector<float> buf1Data;
std::vector<float> buf2Data;
std::vector<int> buf1Data;
std::vector<int> buf2Data;

void GetHardwareAdapter(IDXGIFactory2* pFactory, IDXGIAdapter1** ppAdapter);
void CreateDevice(const ComPtr<IDXGIFactory4>& factory);
Expand Down
72 changes: 64 additions & 8 deletions D3D12ComputeAdd/SLM_4X4_16X16.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -74,31 +74,87 @@ void mm_write(int index, float4 value) {
dst.Store4(4 * (index * 4), asuint(value));
}
#else
float mm_readA(int index) {
float result = asfloat(src0.Load(4 * index));
#ifdef USE_INT
int mm_readA(int index) {
int result = asint(src0.Load(4 * index));
return result;
}

float mm_readB(int index) {
float result = asfloat(src1.Load(4 * index));
int mm_readB(int index) {
int result = asint(src1.Load(4 * index));
return result;
}

void mm_write(int index, float value) {
void mm_write(int index, int value) {
dst.Store(4 * index, asuint(value));
}

int tint_div(int lhs, int rhs) {
return (lhs / (((rhs == 0) | ((lhs == -2147483648) & (rhs == -1))) ? 1 : rhs));
}
int tint_mod(int lhs, int rhs) {
const int rhs_or_one = (((rhs == 0) | ((lhs == -2147483648) & (rhs == -1))) ? 1 : rhs);
if (any(((uint((lhs | rhs_or_one)) & 2147483648u) != 0u))) {
return (lhs - ((lhs / rhs_or_one) * rhs_or_one));
}
else {
return (lhs % rhs_or_one);
}
}
#else
uint mm_readA(int index) {
uint result = asuint(src0.Load(4 * index));
return result;
}

uint mm_readB(int index) {
uint result = asuint(src1.Load(4 * index));
return result;
}

void mm_write(int index, uint value) {
dst.Store(4 * index, asuint(value));
}
uint tint_div(uint lhs, uint rhs) {
return (lhs / ((rhs == 0u) ? 1u : rhs));
}

uint tint_mod(uint lhs, uint rhs) {
return (lhs % ((rhs == 0u) ? 1u : rhs));
}
#endif // USE_INT
#endif // USE_VEC4
#endif // USE_STRUCTURED_BUFFERS

[numthreads(128, 1, 1)]
[numthreads(64, 1, 1)]
void main(CS_INPUT input)
{
initGLBuiltins(input);
int index = int(gl_GlobalInvocationID.x);
#ifdef USE_VEC4
float4 result = mm_readA(index) + mm_readB(index);
#else
float result = mm_readA(index) + mm_readB(index);
#ifdef USE_INT
const int a = mm_readA(index);
const int b = mm_readB(index);
int c = 0;
{
for (int i = 1; (i < 200); i = (i + 1)) {
c = (c + (tint_div(a, i) + tint_mod(a, i)));
c = (c + (tint_div(b, i) + tint_mod(b, i)));
}
}
#else
const uint a = mm_readA(index);
const uint b = mm_readB(index);
uint c = 0u;
{
for (uint i = 1u; (i < 200u); i = (i + 1u)) {
c = (c + (tint_div(a, i) + tint_mod(a, i)));
c = (c + (tint_div(b, i) + tint_mod(b, i)));
}
}
#endif // USE_INT
#endif // USE_VEC4
mm_write(index, result);
mm_write(index, c);
}