bzr branch
http://darksoft.org/webbzr/alps/fastwriter
12.1.1
by Suren A. Chilingaryan
Use memcpy implementation by Daniel Vik |
1 |
/********************************************************************
|
2 |
** File: memcpy.c
|
|
3 |
**
|
|
4 |
** Copyright (C) 1999-2010 Daniel Vik
|
|
5 |
**
|
|
6 |
** This software is provided 'as-is', without any express or implied
|
|
7 |
** warranty. In no event will the authors be held liable for any
|
|
8 |
** damages arising from the use of this software.
|
|
9 |
** Permission is granted to anyone to use this software for any
|
|
10 |
** purpose, including commercial applications, and to alter it and
|
|
11 |
** redistribute it freely, subject to the following restrictions:
|
|
12 |
**
|
|
13 |
** 1. The origin of this software must not be misrepresented; you
|
|
14 |
** must not claim that you wrote the original software. If you
|
|
15 |
** use this software in a product, an acknowledgment in the
|
|
16 |
** use this software in a product, an acknowledgment in the
|
|
17 |
** product documentation would be appreciated but is not
|
|
18 |
** required.
|
|
19 |
**
|
|
20 |
** 2. Altered source versions must be plainly marked as such, and
|
|
21 |
** must not be misrepresented as being the original software.
|
|
22 |
**
|
|
23 |
** 3. This notice may not be removed or altered from any source
|
|
24 |
** distribution.
|
|
25 |
**
|
|
26 |
**
|
|
27 |
** Description: Implementation of the standard library function memcpy.
|
|
28 |
** This implementation of memcpy() is ANSI-C89 compatible.
|
|
29 |
**
|
|
30 |
** The following configuration options can be set:
|
|
31 |
**
|
|
32 |
** LITTLE_ENDIAN - Uses processor with little endian
|
|
33 |
** addressing. Default is big endian.
|
|
34 |
**
|
|
35 |
** PRE_INC_PTRS - Use pre increment of pointers.
|
|
36 |
** Default is post increment of
|
|
37 |
** pointers.
|
|
38 |
**
|
|
39 |
** INDEXED_COPY - Copying data using array indexing.
|
|
40 |
** Using this option, disables the
|
|
41 |
** PRE_INC_PTRS option.
|
|
42 |
**
|
|
43 |
** MEMCPY_64BIT - Compiles memcpy for 64 bit
|
|
44 |
** architectures
|
|
45 |
**
|
|
46 |
**
|
|
47 |
** Best Settings:
|
|
48 |
**
|
|
49 |
** Intel x86: LITTLE_ENDIAN and INDEXED_COPY
|
|
50 |
**
|
|
51 |
*******************************************************************/
|
|
52 |
||
53 |
||
54 |
||
55 |
/********************************************************************
|
|
56 |
** Configuration definitions.
|
|
57 |
*******************************************************************/
|
|
58 |
||
59 |
#define LITTLE_ENDIAN
|
|
60 |
#define INDEXED_COPY
|
|
61 |
||
62 |
||
63 |
/********************************************************************
|
|
64 |
** Includes for size_t definition
|
|
65 |
*******************************************************************/
|
|
66 |
||
67 |
#include <stddef.h> |
|
68 |
||
69 |
||
70 |
/********************************************************************
|
|
71 |
** Typedefs
|
|
72 |
*******************************************************************/
|
|
73 |
||
74 |
typedef unsigned char UInt8; |
|
75 |
typedef unsigned short UInt16; |
|
76 |
typedef unsigned int UInt32; |
|
77 |
#ifdef _WIN32
|
|
78 |
typedef unsigned __int64 UInt64; |
|
79 |
#else
|
|
80 |
typedef unsigned long long UInt64; |
|
81 |
#endif
|
|
82 |
||
83 |
#ifdef MEMCPY_64BIT
|
|
84 |
typedef UInt64 UIntN; |
|
85 |
#define TYPE_WIDTH 8L
|
|
86 |
#else
|
|
87 |
typedef UInt32 UIntN; |
|
88 |
#define TYPE_WIDTH 4L
|
|
89 |
#endif
|
|
90 |
||
91 |
||
92 |
/********************************************************************
|
|
93 |
** Remove definitions when INDEXED_COPY is defined.
|
|
94 |
*******************************************************************/
|
|
95 |
||
96 |
#if defined (INDEXED_COPY)
|
|
97 |
#if defined (PRE_INC_PTRS)
|
|
98 |
#undef PRE_INC_PTRS
|
|
99 |
#endif /*PRE_INC_PTRS*/ |
|
100 |
#endif /*INDEXED_COPY*/ |
|
101 |
||
102 |
||
103 |
||
104 |
/********************************************************************
|
|
105 |
** Definitions for pre and post increment of pointers.
|
|
106 |
*******************************************************************/
|
|
107 |
||
108 |
#if defined (PRE_INC_PTRS)
|
|
109 |
||
110 |
#define START_VAL(x) (x)--
|
|
111 |
#define INC_VAL(x) *++(x)
|
|
112 |
#define CAST_TO_U8(p, o) ((UInt8*)p + o + TYPE_WIDTH)
|
|
113 |
#define WHILE_DEST_BREAK (TYPE_WIDTH - 1)
|
|
114 |
#define PRE_LOOP_ADJUST - (TYPE_WIDTH - 1)
|
|
115 |
#define PRE_SWITCH_ADJUST + 1
|
|
116 |
||
117 |
#else /*PRE_INC_PTRS*/ |
|
118 |
||
119 |
#define START_VAL(x)
|
|
120 |
#define INC_VAL(x) *(x)++
|
|
121 |
#define CAST_TO_U8(p, o) ((UInt8*)p + o)
|
|
122 |
#define WHILE_DEST_BREAK 0
|
|
123 |
#define PRE_LOOP_ADJUST
|
|
124 |
#define PRE_SWITCH_ADJUST
|
|
125 |
||
126 |
#endif /*PRE_INC_PTRS*/ |
|
127 |
||
128 |
||
129 |
||
130 |
/********************************************************************
|
|
131 |
** Definitions for endians
|
|
132 |
*******************************************************************/
|
|
133 |
||
134 |
#if defined (LITTLE_ENDIAN)
|
|
135 |
||
136 |
#define SHL >>
|
|
137 |
#define SHR <<
|
|
138 |
||
139 |
#else /* LITTLE_ENDIAN */ |
|
140 |
||
141 |
#define SHL <<
|
|
142 |
#define SHR >>
|
|
143 |
||
144 |
#endif /* LITTLE_ENDIAN */ |
|
145 |
||
146 |
||
147 |
||
148 |
/********************************************************************
|
|
149 |
** Macros for copying words of different alignment.
|
|
150 |
** Uses incremening pointers.
|
|
151 |
*******************************************************************/
|
|
152 |
||
153 |
#define CP_INCR() { \
|
|
154 |
INC_VAL(dstN) = INC_VAL(srcN); \
|
|
155 |
}
|
|
156 |
||
157 |
#define CP_INCR_SH(shl, shr) { \
|
|
158 |
dstWord = srcWord SHL shl; \
|
|
159 |
srcWord = INC_VAL(srcN); \
|
|
160 |
dstWord |= srcWord SHR shr; \
|
|
161 |
INC_VAL(dstN) = dstWord; \
|
|
162 |
}
|
|
163 |
||
164 |
||
165 |
||
166 |
/********************************************************************
|
|
167 |
** Macros for copying words of different alignment.
|
|
168 |
** Uses array indexes.
|
|
169 |
*******************************************************************/
|
|
170 |
||
171 |
#define CP_INDEX(idx) { \
|
|
172 |
dstN[idx] = srcN[idx]; \
|
|
173 |
}
|
|
174 |
||
175 |
#define CP_INDEX_SH(x, shl, shr) { \
|
|
176 |
dstWord = srcWord SHL shl; \
|
|
177 |
srcWord = srcN[x]; \
|
|
178 |
dstWord |= srcWord SHR shr; \
|
|
179 |
dstN[x] = dstWord; \
|
|
180 |
}
|
|
181 |
||
182 |
||
183 |
||
184 |
/********************************************************************
|
|
185 |
** Macros for copying words of different alignment.
|
|
186 |
** Uses incremening pointers or array indexes depending on
|
|
187 |
** configuration.
|
|
188 |
*******************************************************************/
|
|
189 |
||
190 |
#if defined (INDEXED_COPY)
|
|
191 |
||
192 |
#define CP(idx) CP_INDEX(idx)
|
|
193 |
#define CP_SH(idx, shl, shr) CP_INDEX_SH(idx, shl, shr)
|
|
194 |
||
195 |
#define INC_INDEX(p, o) ((p) += (o))
|
|
196 |
||
197 |
#else /* INDEXED_COPY */ |
|
198 |
||
199 |
#define CP(idx) CP_INCR()
|
|
200 |
#define CP_SH(idx, shl, shr) CP_INCR_SH(shl, shr)
|
|
201 |
||
202 |
#define INC_INDEX(p, o)
|
|
203 |
||
204 |
#endif /* INDEXED_COPY */ |
|
205 |
||
206 |
||
207 |
#define COPY_REMAINING(count) { \
|
|
208 |
START_VAL(dst8); \
|
|
209 |
START_VAL(src8); \
|
|
210 |
\
|
|
211 |
switch (count) { \
|
|
212 |
case 7: INC_VAL(dst8) = INC_VAL(src8); \
|
|
213 |
case 6: INC_VAL(dst8) = INC_VAL(src8); \
|
|
214 |
case 5: INC_VAL(dst8) = INC_VAL(src8); \
|
|
215 |
case 4: INC_VAL(dst8) = INC_VAL(src8); \
|
|
216 |
case 3: INC_VAL(dst8) = INC_VAL(src8); \
|
|
217 |
case 2: INC_VAL(dst8) = INC_VAL(src8); \
|
|
218 |
case 1: INC_VAL(dst8) = INC_VAL(src8); \
|
|
219 |
case 0: \
|
|
220 |
default: break; \
|
|
221 |
} \
|
|
222 |
}
|
|
223 |
||
224 |
#define COPY_NO_SHIFT() { \
|
|
225 |
UIntN* dstN = (UIntN*)(dst8 PRE_LOOP_ADJUST); \
|
|
226 |
UIntN* srcN = (UIntN*)(src8 PRE_LOOP_ADJUST); \
|
|
227 |
size_t length = count / TYPE_WIDTH; \
|
|
228 |
\
|
|
229 |
while (length & 7) { \
|
|
230 |
CP_INCR(); \
|
|
231 |
length--; \
|
|
232 |
} \
|
|
233 |
\
|
|
234 |
length /= 8; \
|
|
235 |
\
|
|
236 |
while (length--) { \
|
|
237 |
CP(0); \
|
|
238 |
CP(1); \
|
|
239 |
CP(2); \
|
|
240 |
CP(3); \
|
|
241 |
CP(4); \
|
|
242 |
CP(5); \
|
|
243 |
CP(6); \
|
|
244 |
CP(7); \
|
|
245 |
\
|
|
246 |
INC_INDEX(dstN, 8); \
|
|
247 |
INC_INDEX(srcN, 8); \
|
|
248 |
} \
|
|
249 |
\
|
|
250 |
src8 = CAST_TO_U8(srcN, 0); \
|
|
251 |
dst8 = CAST_TO_U8(dstN, 0); \
|
|
252 |
\
|
|
253 |
COPY_REMAINING(count & (TYPE_WIDTH - 1)); \
|
|
254 |
\
|
|
255 |
return dest; \
|
|
256 |
}
|
|
257 |
||
258 |
||
259 |
||
260 |
#define COPY_SHIFT(shift) { \
|
|
261 |
UIntN* dstN = (UIntN*)((((UIntN)dst8) PRE_LOOP_ADJUST) & \
|
|
262 |
~(TYPE_WIDTH - 1)); \
|
|
263 |
UIntN* srcN = (UIntN*)((((UIntN)src8) PRE_LOOP_ADJUST) & \
|
|
264 |
~(TYPE_WIDTH - 1)); \
|
|
265 |
size_t length = count / TYPE_WIDTH; \
|
|
266 |
UIntN srcWord = INC_VAL(srcN); \
|
|
267 |
UIntN dstWord; \
|
|
268 |
\
|
|
269 |
while (length & 7) { \
|
|
270 |
CP_INCR_SH(8 * shift, 8 * (TYPE_WIDTH - shift)); \
|
|
271 |
length--; \
|
|
272 |
} \
|
|
273 |
\
|
|
274 |
length /= 8; \
|
|
275 |
\
|
|
276 |
while (length--) { \
|
|
277 |
CP_SH(0, 8 * shift, 8 * (TYPE_WIDTH - shift)); \
|
|
278 |
CP_SH(1, 8 * shift, 8 * (TYPE_WIDTH - shift)); \
|
|
279 |
CP_SH(2, 8 * shift, 8 * (TYPE_WIDTH - shift)); \
|
|
280 |
CP_SH(3, 8 * shift, 8 * (TYPE_WIDTH - shift)); \
|
|
281 |
CP_SH(4, 8 * shift, 8 * (TYPE_WIDTH - shift)); \
|
|
282 |
CP_SH(5, 8 * shift, 8 * (TYPE_WIDTH - shift)); \
|
|
283 |
CP_SH(6, 8 * shift, 8 * (TYPE_WIDTH - shift)); \
|
|
284 |
CP_SH(7, 8 * shift, 8 * (TYPE_WIDTH - shift)); \
|
|
285 |
\
|
|
286 |
INC_INDEX(dstN, 8); \
|
|
287 |
INC_INDEX(srcN, 8); \
|
|
288 |
} \
|
|
289 |
\
|
|
290 |
src8 = CAST_TO_U8(srcN, (shift - TYPE_WIDTH)); \
|
|
291 |
dst8 = CAST_TO_U8(dstN, 0); \
|
|
292 |
\
|
|
293 |
COPY_REMAINING(count & (TYPE_WIDTH - 1)); \
|
|
294 |
\
|
|
295 |
return dest; \
|
|
296 |
}
|
|
297 |
||
298 |
||
299 |
/********************************************************************
|
|
300 |
**
|
|
301 |
** void *memcpy(void *dest, const void *src, size_t count)
|
|
302 |
**
|
|
303 |
** Args: dest - pointer to destination buffer
|
|
304 |
** src - pointer to source buffer
|
|
305 |
** count - number of bytes to copy
|
|
306 |
**
|
|
307 |
** Return: A pointer to destination buffer
|
|
308 |
**
|
|
309 |
** Purpose: Copies count bytes from src to dest.
|
|
310 |
** No overlap check is performed.
|
|
311 |
**
|
|
312 |
*******************************************************************/
|
|
313 |
||
314 |
void *fast_memcpy(void *dest, const void *src, size_t count) |
|
315 |
{
|
|
316 |
UInt8* dst8 = (UInt8*)dest; |
|
317 |
UInt8* src8 = (UInt8*)src; |
|
318 |
||
319 |
if (count < 8) { |
|
320 |
COPY_REMAINING(count); |
|
321 |
return dest; |
|
322 |
}
|
|
323 |
||
324 |
START_VAL(dst8); |
|
325 |
START_VAL(src8); |
|
326 |
||
327 |
while (((UIntN)dst8 & (TYPE_WIDTH - 1)) != WHILE_DEST_BREAK) { |
|
328 |
INC_VAL(dst8) = INC_VAL(src8); |
|
329 |
count--; |
|
330 |
}
|
|
331 |
||
332 |
switch ((((UIntN)src8) PRE_SWITCH_ADJUST) & (TYPE_WIDTH - 1)) { |
|
333 |
case 0: COPY_NO_SHIFT(); break; |
|
334 |
case 1: COPY_SHIFT(1); break; |
|
335 |
case 2: COPY_SHIFT(2); break; |
|
336 |
case 3: COPY_SHIFT(3); break; |
|
337 |
#if TYPE_WIDTH > 4
|
|
338 |
case 4: COPY_SHIFT(4); break; |
|
339 |
case 5: COPY_SHIFT(5); break; |
|
340 |
case 6: COPY_SHIFT(6); break; |
|
341 |
case 7: COPY_SHIFT(7); break; |
|
342 |
#endif
|
|
343 |
}
|
|
344 |
}
|