diff --git a/libclc/generic/include/clc/clc.h b/libclc/generic/include/clc/clc.h index b9062455c41f977b65279befbbcba327d1594e0c..38f71dbdee0c5b8185069c917cbf7da566721b33 100644 --- a/libclc/generic/include/clc/clc.h +++ b/libclc/generic/include/clc/clc.h @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include diff --git a/libclc/generic/include/clc/integer/hadd.h b/libclc/generic/include/clc/integer/hadd.h new file mode 100644 index 0000000000000000000000000000000000000000..37304e26cc2d628d580e2cfb810732a26898b3b9 --- /dev/null +++ b/libclc/generic/include/clc/integer/hadd.h @@ -0,0 +1,2 @@ +#define __CLC_BODY +#include diff --git a/libclc/generic/include/clc/integer/hadd.inc b/libclc/generic/include/clc/integer/hadd.inc new file mode 100644 index 0000000000000000000000000000000000000000..f698989cef2026b8e8868175ece6060389adc13c --- /dev/null +++ b/libclc/generic/include/clc/integer/hadd.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y); diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index 9ac08bd9d2419433c6145aa01d7f9bd3e533273d..e936fbc6d94bd9d1b5e616d789344d6e5eb178ae 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -11,6 +11,7 @@ integer/add_sat_impl.ll integer/clz.cl integer/clz_if.ll integer/clz_impl.ll +integer/hadd.cl integer/mad24.cl integer/mul24.cl integer/rotate.cl diff --git a/libclc/generic/lib/integer/hadd.cl b/libclc/generic/lib/integer/hadd.cl new file mode 100644 index 0000000000000000000000000000000000000000..749026e5a8ad8183fff80fd81c01c4aa167e32b7 --- /dev/null +++ b/libclc/generic/lib/integer/hadd.cl @@ -0,0 +1,4 @@ +#include + +#define __CLC_BODY +#include diff --git a/libclc/generic/lib/integer/hadd.inc b/libclc/generic/lib/integer/hadd.inc new file mode 100644 index 0000000000000000000000000000000000000000..ea59d9bd7db5f83b3d8195b1280109d0f0578e0c --- /dev/null +++ b/libclc/generic/lib/integer/hadd.inc @@ -0,0 +1,6 @@ +//hadd = (x+y)>>1 +//This can be simplified to x>>1 + y>>1 + (1 if both x and y have the 1s bit set) +//This saves us having to do any checks for overflow in the addition sum +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE hadd(__CLC_GENTYPE x, __CLC_GENTYPE y) { + return (x>>(__CLC_GENTYPE)1)+(y>>(__CLC_GENTYPE)1)+(x&y&(__CLC_GENTYPE)1); +}