swar_10x3b.c
1.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
//A: --has-swar
//C: -daiteq-swar-enable
// array of subword
#define DATA_SIZE 10
typedef unsigned int s10x3b __attribute__((subword(3)));
void do_10x3b_add(int n, s10x3b a[], s10x3b b[], s10x3b z[])
{
for (int i=0;i<n;++i)
{
z[i] = a[i] + b[i];
}
}
void do_10x3b_sub(int n, s10x3b a[], s10x3b b[], s10x3b z[])
{
for (int i=0;i<n;++i)
{
z[i] = a[i] - b[i];
}
}
void do_10x3b_mul(int n, s10x3b a[], s10x3b b[], s10x3b z[])
{
for (int i=0;i<n;++i)
{
z[i] = a[i] * b[i];
}
}
unsigned int data_a[DATA_SIZE] = {0,1,2,3,4,5,6,7,8,9,10};
unsigned int data_b[DATA_SIZE] = {1,2,3,4,5,6,7,8,9,10,11};
unsigned int data_z[DATA_SIZE];
int main(void)
{
// s8x4b a[NUM_PIXELS], b[NUM_PIXELS], z[NUM_PIXELS];
s10x3b *a, *b, *z;
a = (s10x3b *)data_a;
b = (s10x3b *)data_b;
z = (s10x3b *)data_z;
// for (int i=0;i<NUM_PIXELS;++i) {
// z[i] = a[i] + b[i];
// }
// z = a + b;
do_10x3b_add((DATA_SIZE+9)/10,a,b,z);
do_10x3b_sub((DATA_SIZE+9)/10,a,b,z);
do_10x3b_mul((DATA_SIZE+9)/10,a,b,z);
return 0;
}