swar-add-2x16b-pragma.c
2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/* Assembler needs option --has-swar */
//A: '--has-swar'
#include <stdint.h>
typedef unsigned int s2x16b __attribute__((subword(16)));
#define ALEN 20
uint16_t input_a[ALEN] = {17554, 30725, 25860, 2562, 8722, 23094, 16793, 12791,
4507, 14944, 8703, 28534, 22887, 32420, 15943, 10643,
26325, 18166, 20856, 15838};
uint16_t input_b[ALEN] = {12996, 32492, 15993, 30426, 13600, 14398, 15600, 11328,
8375, 14670, 14411, 85, 985, 12091, 11686, 6561,
19045, 9275, 5777, 17885};
uint16_t output_zU16[ALEN];
uint16_t output_zS2x16b[ALEN];
void do_as_uint16(void)
{
uint16_t *pa = input_a;
uint16_t *pb = input_b;
uint16_t *pz = output_zU16;
for(int i=0;i<ALEN;++i) {
pz[i] = pa[i] + pb[i];
}
}
void do_as_s2x16b(void)
{
s2x16b *pa = (s2x16b *)input_a;
s2x16b *pb = (s2x16b *)input_b;
s2x16b *pz = (s2x16b *)output_zS2x16b;
for(int i=0;i<ALEN/2;++i) {
pz[i] = pa[i] + pb[i];
}
}
void do_as_s2x16b_sat(void)
{
{
#pragma swar saturate
s2x16b *pa = (s2x16b *)input_a;
s2x16b *pb = (s2x16b *)input_b;
s2x16b *pz = (s2x16b *)output_zS2x16b;
for(int i=0;i<ALEN/2;++i) {
pz[i] = pa[i] + pb[i];
}
}
}
void do_as_s2x16b_red(void)
{
{
#pragma swar reduce
s2x16b *pa = (s2x16b *)input_a;
s2x16b *pb = (s2x16b *)input_b;
s2x16b *pz = (s2x16b *)output_zS2x16b;
for(int i=0;i<ALEN/2;++i) {
pz[i] = pa[i] + pb[i];
}
}
}
void do_as_s2x16b_norm(void)
{
{ /* the second inside block is a hack for pragma swar - without them pragma is shared with other functions */
#pragma swar normalize
s2x16b *pa = (s2x16b *)input_a;
s2x16b *pb = (s2x16b *)input_b;
s2x16b *pz = (s2x16b *)output_zS2x16b;
for(int i=0;i<ALEN/2;++i) {
pz[i] = pa[i] + pb[i];
}
}
}
void do_as_s2x16b_dis(void)
{
#pragma swar manual
s2x16b *pa = (s2x16b *)input_a;
s2x16b *pb = (s2x16b *)input_b;
s2x16b *pz = (s2x16b *)output_zS2x16b;
for(int i=0;i<ALEN/2;++i) {
pz[i] = pa[i] + pb[i];
}
}
int main(void)
{
int fail = 0;
/* clear results */
for(int i=0;i<ALEN;++i) {
output_zU16[i] = 0;
output_zS2x16b[i] = 0;
}
/* compute as separated U16 */
do_as_uint16();
/* compute as swar s2x16b */
do_as_s2x16b();
/* display variables */
// printf("Data before (a,b, z1, z2):\n");
// for(int i=0;i<ALEN;++i) {
// printf(" #%d: %u , %u , %u , %u\n", i, input_a[i], input_b[i], output_zU16[i], output_zS2x16b[i]);
// }
for(int i=0;i<ALEN;++i) {
if (output_zU16[i]!=output_zS2x16b[i]) {
fail++;
}
}
/* test swar operations controlled with pragmas */
do_as_s2x16b_sat();
do_as_s2x16b_red();
do_as_s2x16b_norm();
do_as_s2x16b_dis();
return fail;
}