3
// File: fft_internal.h
7
// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple")
8
// in consideration of your agreement to the following terms, and your use,
9
// installation, modification or redistribution of this Apple software
10
// constitutes acceptance of these terms. If you do not agree with these
11
// terms, please do not use, install, modify or redistribute this Apple
14
// In consideration of your agreement to abide by the following terms, and
15
// subject to these terms, Apple grants you a personal, non - exclusive
16
// license, under Apple's copyrights in this original Apple software ( the
17
// "Apple Software" ), to use, reproduce, modify and redistribute the Apple
18
// Software, with or without modifications, in source and / or binary forms;
19
// provided that if you redistribute the Apple Software in its entirety and
20
// without modifications, you must retain this notice and the following text
21
// and disclaimers in all such redistributions of the Apple Software. Neither
22
// the name, trademarks, service marks or logos of Apple Inc. may be used to
23
// endorse or promote products derived from the Apple Software without specific
24
// prior written permission from Apple. Except as expressly stated in this
25
// notice, no other rights or licenses, express or implied, are granted by
26
// Apple herein, including but not limited to any patent rights that may be
27
// infringed by your derivative works or by other works in which the Apple
28
// Software may be incorporated.
30
// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO
31
// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
32
// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
33
// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
34
// ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
36
// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
37
// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39
// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
40
// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
41
// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
42
// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44
// Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
46
////////////////////////////////////////////////////////////////////////////////////////////////////
49
#ifndef __CLFFT_INTERNAL_H
50
#define __CLFFT_INTERNAL_H
57
typedef enum kernel_dir_t
64
typedef struct kernel_info_t
69
size_t num_workgroups;
70
size_t num_xforms_per_workgroup;
71
size_t num_workitems_per_workgroup;
72
cl_fft_kernel_dir dir;
73
int in_place_possible;
79
// context in which fft resources are created and kernels are executed
85
// dimension of transform ... must be either 1D, 2D or 3D
88
// data format ... must be either interleaved or plannar
89
clFFT_DataFormat format;
91
// string containing kernel source. Generated at runtime based on
92
// n, dim, format and other parameters
93
string *kernel_string;
95
// CL program containing source and kernel this particular
96
// n, dim, data format
99
// linked list of kernels which needs to be executed for this fft
100
cl_fft_kernel_info *kernel_info;
105
// twist kernel for virtualizing fft of very large sizes that do not
106
// fit in GPU global memory
107
cl_kernel twist_kernel;
109
// flag indicating if temporary intermediate buffer is needed or not.
110
// this depends on fft kernels being executed and if transform is
111
// in-place or out-of-place. e.g. Local memory fft (say 1D 1024 ...
112
// one that does not require global transpose do not need temporary buffer)
113
// 2D 1024x1024 out-of-place fft however do require intermediate buffer.
114
// If temp buffer is needed, its allocation is lazy i.e. its not allocated
116
cl_int temp_buffer_needed;
118
// Batch size is runtime parameter and size of temporary buffer (if needed)
119
// depends on batch size. Allocation of temporary buffer is lazy i.e. its
120
// only created when needed. Once its created at first call of clFFT_Executexxx
121
// it is not allocated next time if next time clFFT_Executexxx is called with
122
// batch size different than the first call. last_batch_size caches the last
123
// batch size with which this plan is used so that we dont keep allocating/deallocating
124
// temp buffer if same batch size is used again and again.
125
size_t last_batch_size;
127
// temporary buffer for interleaved plan
130
// temporary buffer for planner plan. Only one of tempmemobj or
131
// (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending
132
// data format of plan (plannar or interleaved)
133
cl_mem tempmemobj_real, tempmemobj_imag;
135
// Maximum size of signal for which local memory transposed based
136
// fft is sufficient i.e. no global mem transpose (communication)
138
size_t max_localmem_fft_size;
140
// Maximum work items per work group allowed. This, along with max_radix below controls
141
// maximum local memory being used by fft kernels of this plan. Set to 256 by default
142
size_t max_work_item_per_workgroup;
144
// Maximum base radix for local memory fft ... this controls the maximum register
145
// space used by work items. Currently defaults to 16
148
// Device depended parameter that tells how many work-items need to be read consecutive
149
// values to make sure global memory access by work-items of a work-group result in
150
// coalesced memory access to utilize full bandwidth e.g. on NVidia tesla, this is 16
151
size_t min_mem_coalesce_width;
153
// Number of local memory banks. This is used to geneate kernel with local memory
154
// transposes with appropriate padding to avoid bank conflicts to local memory
155
// e.g. on NVidia it is 16.
156
size_t num_local_mem_banks;
159
void FFT1D(cl_fft_plan *plan, cl_fft_kernel_dir dir);