ML-Decoder/proj_code.py at master · avinsrid/ML-Decoder · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#!/usr/bin/env python

"""
Conditional Optimization with Complex Numbers / M-QAM Symboles Input using PyOpenCL.
By Avinash Sridhar (as4626@columbia.edu) & Sareena Abdul Razak (sta2378@columbia.edu)

Note: Open README.md to see brief details of algorithms
"""

import time
#import pyopencl as cl
import numpy as np
from numpy import linalg as LA
import pyopencl as cl

# Selecting OpenCL platform.
NAME = 'NVIDIA CUDA'
platforms = cl.get_platforms()
devs = None
for platform in platforms:
	if platform.name == NAME:
		devs = platform.get_devices()

# Setting Command Queue.
ctx = cl.Context(devs)
queue = cl.CommandQueue(ctx)

# You need to set the flags of the buffers you create properly; otherwise,
# you might not be able to read or write them as needed:
mf = cl.mem_flags

# Maximum range for M-QAM is M i.e. 0,1,2,.....,M-1
M = 4
# Number of received input symbols is N
N = 4

kernel = """
#include <pyopencl-complex.h>
__kernel void ml_decoder(__global cfloat_t* decoded, __global cfloat_t* r, __global cfloat_t* MatP, __global cfloat_t* MatQ, __global cfloat_t* MatAdjP, __global cfloat_t* QAMconstell, const int N,const int sizeQAM, const float frobNorm) {
	/* Pick any two symbols from the constellation. this is x3 and x4 */

	cfloat_t s_bar[2][1];
	cfloat_t c_bar[2][1];
	cfloat_t Pc[2][1];
	cfloat_t Qs[2][1];
	cfloat_t rQs[2][1];
	cfloat_t cbar_temp[2][1];
	cfloat_t tempMs[2][1];
	cfloat_t matq[2][2];
	cfloat_t decoded_sbar[2][1], decoded_cbar[2][1];
	float ceil1, ceil2, floor1, floor2;
	float temp = 10000.0 ;
	float Ms;
	for (int i = 0; i< 2; i++)
	{
		for (int j = 0; j < 2; j++)
		matq[i][j] = MatQ[i+j];
	}
	unsigned int indexr = get_global_id(0);
	for (int i = 0 ; i <  sizeQAM ; i++)
	{
		for(int j = 0 ; j < sizeQAM ; j++)
		{
           s_bar[0][0] = QAMconstell[i];
           s_bar[1][0] = QAMconstell[j];

           /* Calculating c_bar from s_bar */

           /* Multiplying Q and s_bar matrices */
           Qs[0][0] = cfloat_add(cfloat_mul(MatQ[0 + 0] , s_bar[0][0]), cfloat_mul( MatQ[0 + 1] , s_bar[1][0]));
           Qs[1][0] = cfloat_add(cfloat_mul(MatQ[0 + 2] , s_bar[0][0]), cfloat_mul( MatQ[0 + 3] , s_bar[1][0]));

           /* r - Qs */
           rQs[0][0] = cfloat_add(r[indexr + 0] ,-Qs[0][0]);
           rQs[1][0] = cfloat_add(r[indexr + N] , -Qs[1][0]);

           /* Calculate c_bar */
           cbar_temp[0][0] =  cfloat_add(cfloat_mul(MatAdjP[0 + 0] , rQs[0][0]) , cfloat_mul(MatAdjP[0 + 1] , rQs[1][0])) ;
           cbar_temp[1][0] =  cfloat_add(cfloat_mul(MatAdjP[0 + 2] , rQs[0][0]) , cfloat_mul(MatAdjP[1 + 3] , rQs[1][0])) ;
		   c_bar[0][0] =  cfloat_mul((2/frobNorm) , cbar_temp[0][0]);
		   c_bar[1][0] = cfloat_mul((2/frobNorm) , cbar_temp[1][0]);


		   /* Multiplying P and c matrices */
		   Pc[0][0] = cfloat_add(cfloat_mul(MatP[0 + 0], c_bar[0][0]), cfloat_mul(MatP[0 + 1], c_bar[1][0]));
		   Pc[1][0] = cfloat_add(cfloat_mul(MatP[0 + 2], c_bar[0][0]), cfloat_mul(MatP[0 + 3], c_bar[1][0]));

		   /* Calculate ceiling of c_bar[0][0] and floor of c_bar[1][0] */
		   ceil1 = ((cfloat_real(c_bar[0][0]))/fabs(cfloat_real(c_bar[0][0])))*ceil(fabs(cfloat_real(c_bar[0][0])));
		   floor1 = ((cfloat_real(c_bar[1][0]))/fabs(cfloat_real(c_bar[1][0])))*floor(fabs(cfloat_real(c_bar[1][0])));
		   ceil2 = ((cfloat_imag(c_bar[0][0]))/fabs(cfloat_imag(c_bar[0][0])))*ceil(fabs(cfloat_imag(c_bar[0][0])));
		   floor2 = ((cfloat_imag(c_bar[1][0]))/fabs(cfloat_imag(c_bar[1][0])))*floor(fabs(cfloat_imag(c_bar[1][0])));

		   c_bar[0][0] = cfloat_new(ceil1, ceil2);
		   c_bar[1][0] = cfloat_new(floor1, floor2);

		   /* Calculate Ms */
		   /* First, we calculate the complex numbers' abs and then proceed with over all ||Ms|| calculation */
		   tempMs[0][0] = cfloat_add(r[indexr + 0], cfloat_add(-Pc[0][0], -Qs[0][0]));
		   tempMs[1][0] = cfloat_add(r[indexr + N], cfloat_add(-Pc[1][0], -Qs[1][0]));
		   Ms = pow(sqrt(pow((cfloat_real(tempMs[0][0])),2) + pow((cfloat_imag(tempMs[0][0])),2)) + sqrt(pow((cfloat_real(tempMs[1][0])),2) + pow((cfloat_imag(tempMs[1][0])),2)),2);

 		   /* Check if Ms < temp, if TRUE, then store Ms in temp and store decoded_sbar and decoded_cbar with their respective values */
 		   if (Ms < temp)
 		   {
 		   	decoded_cbar[0][0] = c_bar[0][0];
 		   	decoded_cbar[1][0] = c_bar[1][0];
 		   	decoded_sbar[0][0] = s_bar[0][0];
 		   	decoded_sbar[1][0] = s_bar[1][0];
 		   	temp = Ms;
 		   }
 		}
	}
	decoded[2*indexr] = decoded_cbar[0][0];
	decoded[2*indexr+1] = decoded_cbar[1][0];
	decoded[2*indexr + 2*N] = decoded_sbar[0][0];
	decoded[2*indexr + 2*N +1] = decoded_sbar[1][0];
}
"""

# These are the symbols transmitted on Tx1 and Tx2
# Format of symbols is :
# [x11 x12 x21 x22 x31 x32 ........; x13 x14 x23 x24 x33 x34] (Similiar to MATLAB notation for column separation)
#x = np.array([[0 + 0j,0 + 0j],[0 + 0j,0 + 0j]])
x = np.random.randint(0, M, size = (2,N*2)).astype(np.complex64)
x_mod = np.zeros_like(x)

# Silver Code Matrix (Constant throughout code)
V = (1/7**(0.5))*np.array([[1-1j, 1-2j],[1+2j, -1-1j]])
#testprint
#print "Silver Code Matrix V is ", V

# Generate array constellation for M-QAM, in our case M = 4
constellation = np.array([-1-1j, -1+1j, 1+1j, 1-1j]).astype(np.complex64)

# We will do modulation of the symbols, stored in x_mod
for i in range(2) :
	for j in range(N*2) :
		if x[i,j] == 0 :
			x_mod[i,j] = -1 - 1j
		elif x[i,j] == 1 :
			x_mod[i,j] = -1 + 1j
		elif x[i,j] == 2 :
			x_mod[i,j] = 1 + 1j
		elif x[i,j] == 3 :
			x_mod[i,j] = 1 - 1j
		else: pass

#testprint
#print M, " -QAM Modulated Input is ", x_mod

# Generate channel matrix h = [h1 h2]
h = np.array([[0.2254 - 0.9247j, -0.3066 + 0.2423j]]) #(1/(2**(0.5)))*np.random.random_sample((1,2)).astype(np.complex64) + (1/(2**(0.5)))*1j*np.random.random_sample((1,2)).astype(np.complex64)
# Generate channel matrix g = [g1 g2]
g =  np.array([[2.5303 + 1.9583j, -0.9545 + 2.1460j]]) # (1/(2**(0.5)))*np.random.random_sample((1,2)).astype(np.complex64) + (1/(2**(0.5)))*1j*np.random.random_sample((1,2)).astype(np.complex64)
# Generate h_bar = hV
h_bar = np.dot(h,V)
# Generate g_bar = gV
g_bar = np.dot(g, V)
# Generate noise matrix n1 = [n11, n12]
n1 = 0.3802 + 1.2968j #(1/(2**(0.5)))*np.random.random_sample((1,2)).astype(np.complex64) + (1/(2**(0.5)))*1j*np.random.random_sample((1,2)).astype(np.complex64)
# Generate noise matrix n1 = [n11, n12]
n2 = -1.5972 + 0.6096j #(1/(2**(0.5)))*np.random.random_sample((1,2)).astype(np.complex64) + (1/(2**(0.5)))*1j*np.random.random_sample((1,2)).astype(np.complex64)
n = np.array([[0+0j, 0+0j]])
#testprint
print "h is ", h
print "g is ", g
print "h bar is ", h_bar
print "g bar is ", g_bar
print "Noise matrix n1 is ", n1
print "Noise matrix n2 is ", n2

rx_sym = np.zeros((2,N)).astype(np.complex64)

#testprint
#print "Initialization of rx_sym = ", rx_sym

# Calculate the received symbols rx_sym on receiver 1 and 2 after input symbols passed through above channel matrices with noise
"""
for i in range(N):
	HG = np.array([[h[0,0], h[0,1]], [np.conjugate(h[0,1]), -np.conjugate(h[0,0])]])
	HG_bar = np.array([[h_bar[0,0], h_bar[0,1]], [np.conjugate(h_bar[0,1]), -np.conjugate(h_bar[0,1])]])
	x1x2 = np.array([[x_mod[0, 2*i]], [x_mod[0, (2*i)+1]]])
	x3x4 = np.array([[x_mod[1, 2*i]], [x_mod[1, (2*i)+1]]])
	r_temp = np.dot(HG,x1x2) + np.dot(HG_bar, x3x4) + n
	rx_sym[0,i] = r_temp[0,0]
	rx_sym[1,i] = r_temp[1,0]
"""

for i in range(N) :
	# Store matrix [x1 -x2*, x2 x1*] in x1_temp
	# Store matrix [x3 -x4*, x4 x3*] in x2_temp
	x1_temp = np.zeros((2,2)).astype(np.complex64)
	x2_temp = np.zeros((2,2)).astype(np.complex64)
	x1_temp[0,0] = x_mod[0,2*i]
	x1_temp[0,1] = -np.conjugate(x_mod[0,(2*i)+1])
	x1_temp[1,0] = x_mod[0,(2*i)+1]
	x1_temp[1,1] = np.conjugate(x_mod[0,2*i])
	x2_temp[0,0] = x_mod[1,2*i]
	x2_temp[0,1] = -np.conjugate(x_mod[1,(2*i)+1])
	x2_temp[1,0] = x_mod[1,(2*i)+1]
	x2_temp[1,1] = np.conjugate(x_mod[1,2*i])
	#testprint
	#print "x1_temp = ", x1_temp
	#print "x2_temp = ", x2_temp

	# r1 = [r11 r12], r2 = [r21 r22]
	r1 = np.dot(h, x1_temp) + np.dot(h_bar, x2_temp) + n
	r2 = np.dot(g, x1_temp) + np.dot(g_bar, x2_temp) + n

	#testprint
	#print "r1 is ", r1
	#print "r2 is ", r2

	# rx_sym will store r1 and r2 as [rx1_1, rx1_2, rx1_3,.....; rx2_1, rx2_2 rx2_3,......] rx1 is receiver 1
	# and rx2 is receiver 2. rx1_1 is total received 1st symbol on rx1 and rx2_1 is total received 1st symbol on rx2,
	# and so on
	# Combine [r11, r12] and [r21, r22] as [r1;r2], note that ; implies next row (Similar tor MATLAB manipulations)
	rx_sym[0,i] = r1[0,0] + r1[0,1]
	rx_sym[1,i] = r2[0,0] + r2[0,1]

#testprint
print "Received matrix of 10 symbols is ", rx_sym

# Generate P and Q matrices, P = tranpose([h;g]) and Q = transpose([h_bar;g_bar])
P = np.array([[h[0,0], h[0,1]], [g[0,0], g[0,1]]]).astype(np.complex64)
#Q = np.array([[h_bar[0,0], h_bar[0,1]], [g_bar[0,0], g_bar[0,1]]])

#P = np.array([[h[0,0], h[0,1]], [h_conj[0,1], -h_conj[0,0]]])
Q = np.array([[h_bar[0,0], h_bar[0,1]], [g_bar[0,0], g_bar[0,1]]]).astype(np.complex64)
#test print

print "P matrix is ", P
print "Q matrix is ", Q


#Calculate (|H|f)^2 + (|G|f)^2 where |H|f and |G|f are frobenius norms of h and g matrices
HfGf_sqr = (LA.norm(h, 'fro'))**2 + (LA.norm(g, 'fro'))**2
print LA.norm(h, 'fro')**2
print LA.norm(g, 'fro')**2
#testprint
print "(|H|f)^2 + (|G|f)^2 =  ", HfGf_sqr
print "hbar square + gbar square = ", (LA.norm(h_bar, 'fro'))**2 + (LA.norm(g_bar, 'fro'))**2

# Calculate adjoint(P). I could not find a function for this in numpy and hence manually performing it
P_adj = np.array([[g[0,1], -h[0,1]],[-g[0,0], h[0,0]]]).astype(np.complex64)
#P_adj = np.array([[-np.conjugate(h[0,0]), -np.conjugate(h[0,1])],[-h[0,1], h[0,0]]])
Q_adj = np.array([[g_bar[0,1], -h_bar[0,1]], [-g_bar[0,0], h_bar[0,0]]]).astype(np.complex64)
#testprint
print "adjoint(P) = ", P_adj
print "adjoint(Q) = ", Q_adj

print np.dot(P_adj, P) # as verification for P_adj.P = 1/2(HfGf_sqr).I
print np.dot(Q_adj, Q)
# Initialize decoded matrix with all 0s (2x2 matrix)
result_decode = np.zeros((2,N*2)).astype(np.complex64)
opencl_recvsymbols = np.zeros_like(result_decode)

# Perform Conditional Optimization here algorithm here and decode the received symbols correctly
for i in range(N) :
	r = np.array([[rx_sym[0,i]],[rx_sym[1,i]]])
	mean_sqr = 0
	temp= 100000 + 100000j
	temp_index = 0
	temp_s_bar = np.zeros((2,1)).astype(np.complex64)
	temp_c_bar = np.zeros((2,1)).astype(np.complex64)
	# Iterate through every combination of x3' and x4' from constellation of M symbols
	for j in range(M) :
		for k in range(M):
			# s_bar is tranpose([j k])
			Cs = np.zeros((2,1)).astype(np.complex64)
			s_bar = np.array([[constellation[j]],[constellation[k]]], np.complex64)
			Cs = np.dot(((2*P_adj)/(HfGf_sqr)),(r - np.dot(Q,s_bar)))
			# Take ceiling of x3'
			x3_real = ((Cs[0,0].real)/(np.absolute(Cs[0,0].real)))*np.ceil(np.absolute(Cs[0,0].real))
			x3_complex = ((Cs[0,0].imag)/(np.absolute(Cs[0,0].imag)))*np.ceil(np.absolute(Cs[0,0].imag))
			# Take floor of x4'
			x4_real = ((Cs[1,0].real)/(np.absolute(Cs[1,0].real)))*np.floor(np.absolute(Cs[1,0].real))
			x4_complex = ((Cs[1,0].imag)/(np.absolute(Cs[1,0].imag)))*np.floor(np.absolute(Cs[1,0].imag))

			Cs[0,0] = x3_real + 1j*x3_complex
			Cs[1,0] = x4_real + 1j*x4_complex
			mean_sqr = (LA.norm(r - np.dot(P,Cs) - np.dot(Q,s_bar), 2))**2
			#print mean_sqr
			if mean_sqr < temp :
				temp = mean_sqr
				temp_s_bar = s_bar
				temp_c_bar = Cs
	#print "Least Sqr Metric = ", temp
	result_decode[0,2*i] = temp_c_bar[0,0]
	result_decode[0,(2*i)+1] = temp_c_bar[1,0]
	result_decode[1,2*i] = temp_s_bar[0,0]
	result_decode[1, (2*i)+1] = temp_s_bar[1,0]

#print Cs_array
print 'Input = ', x_mod
print 'Decoded Result = ', result_decode

opencl_recvsymbols = np.zeros_like(result_decode)

# Create buffers
r_buf =cl.Buffer(ctx, mf.WRITE_ONLY, opencl_recvsymbols.nbytes)
input_received_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=rx_sym)
P_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=P)
Q_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=Q)
P_adj_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=P_adj)
constellation_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=constellation)

# Launch Kernel
# Build kernel for decoding
prg = cl.Program(ctx, kernel).build()

prg.ml_decoder(queue, (N,) , None, r_buf, input_received_buf, P_buf, Q_buf, P_adj_buf, constellation_buf, np.int32(N), np.int32(M), np.float32(HfGf_sqr))

# Copy output back from buffer
cl.enqueue_copy(queue, opencl_recvsymbols, r_buf)

print opencl_recvsymbols