92
111
#define HST_XCACHE_LD128 2 //!< Store SUBH_X cache as groups of float2/float4 numbers to reduce number of memory instructions
93
112
//#define HST_SQUARE_PPT //!< NO. Little effect on performance (slighly reduces register usage, but doesn't affect computations as the loops are unrolled anyway)
95
118
#define HST_NEWCACHE //!< YES! New way of caching reducing number of shmem reads/writes
102
126
//#define HST_NEWCACHE_UNPAD //!< Add if and remove padding in the shmem cache (low efficiency)