/alps/fastwriter

To get this branch, use:
bzr branch http://darksoft.org/webbzr/alps/fastwriter
12.1.1 by Suren A. Chilingaryan
Use memcpy implementation by Daniel Vik
1
/********************************************************************
2
 ** File:     memcpy.c
3
 **
4
 ** Copyright (C) 1999-2010 Daniel Vik
5
 ** 
6
 ** This software is provided 'as-is', without any express or implied
7
 ** warranty. In no event will the authors be held liable for any
8
 ** damages arising from the use of this software.
9
 ** Permission is granted to anyone to use this software for any
10
 ** purpose, including commercial applications, and to alter it and
11
 ** redistribute it freely, subject to the following restrictions:
12
 ** 
13
 ** 1. The origin of this software must not be misrepresented; you
14
 **    must not claim that you wrote the original software. If you
15
 **    use this software in a product, an acknowledgment in the
16
 **    use this software in a product, an acknowledgment in the
17
 **    product documentation would be appreciated but is not
18
 **    required.
19
 ** 
20
 ** 2. Altered source versions must be plainly marked as such, and
21
 **    must not be misrepresented as being the original software.
22
 ** 
23
 ** 3. This notice may not be removed or altered from any source
24
 **    distribution.
25
 ** 
26
 ** 
27
 ** Description: Implementation of the standard library function memcpy.
28
 **             This implementation of memcpy() is ANSI-C89 compatible.
29
 **
30
 **             The following configuration options can be set:
31
 **
32
 **           LITTLE_ENDIAN   - Uses processor with little endian
33
 **                             addressing. Default is big endian.
34
 **
35
 **           PRE_INC_PTRS    - Use pre increment of pointers.
36
 **                             Default is post increment of
37
 **                             pointers.
38
 **
39
 **           INDEXED_COPY    - Copying data using array indexing.
40
 **                             Using this option, disables the
41
 **                             PRE_INC_PTRS option.
42
 **
43
 **           MEMCPY_64BIT    - Compiles memcpy for 64 bit 
44
 **                             architectures
45
 **
46
 **
47
 ** Best Settings:
48
 **
49
 ** Intel x86:  LITTLE_ENDIAN and INDEXED_COPY
50
 **
51
 *******************************************************************/
52
53
54
55
/********************************************************************
56
 ** Configuration definitions.
57
 *******************************************************************/
58
59
#define LITTLE_ENDIAN
60
#define INDEXED_COPY
61
62
63
/********************************************************************
64
 ** Includes for size_t definition
65
 *******************************************************************/
66
67
#include <stddef.h>
68
69
70
/********************************************************************
71
 ** Typedefs
72
 *******************************************************************/
73
74
typedef unsigned char       UInt8;
75
typedef unsigned short      UInt16;
76
typedef unsigned int        UInt32;
77
#ifdef _WIN32
78
typedef unsigned __int64    UInt64;
79
#else
80
typedef unsigned long long  UInt64;
81
#endif
82
83
#ifdef MEMCPY_64BIT
84
typedef UInt64              UIntN;
85
#define TYPE_WIDTH          8L
86
#else
87
typedef UInt32              UIntN;
88
#define TYPE_WIDTH          4L
89
#endif
90
91
92
/********************************************************************
93
 ** Remove definitions when INDEXED_COPY is defined.
94
 *******************************************************************/
95
96
#if defined (INDEXED_COPY)
97
#if defined (PRE_INC_PTRS)
98
#undef PRE_INC_PTRS
99
#endif /*PRE_INC_PTRS*/
100
#endif /*INDEXED_COPY*/
101
102
103
104
/********************************************************************
105
 ** Definitions for pre and post increment of pointers.
106
 *******************************************************************/
107
108
#if defined (PRE_INC_PTRS)
109
110
#define START_VAL(x)            (x)--
111
#define INC_VAL(x)              *++(x)
112
#define CAST_TO_U8(p, o)        ((UInt8*)p + o + TYPE_WIDTH)
113
#define WHILE_DEST_BREAK        (TYPE_WIDTH - 1)
114
#define PRE_LOOP_ADJUST         - (TYPE_WIDTH - 1)
115
#define PRE_SWITCH_ADJUST       + 1
116
117
#else /*PRE_INC_PTRS*/
118
119
#define START_VAL(x)
120
#define INC_VAL(x)              *(x)++
121
#define CAST_TO_U8(p, o)        ((UInt8*)p + o)
122
#define WHILE_DEST_BREAK        0
123
#define PRE_LOOP_ADJUST
124
#define PRE_SWITCH_ADJUST
125
126
#endif /*PRE_INC_PTRS*/
127
128
129
130
/********************************************************************
131
 ** Definitions for endians
132
 *******************************************************************/
133
134
#if defined (LITTLE_ENDIAN)
135
136
#define SHL >>
137
#define SHR <<
138
139
#else /* LITTLE_ENDIAN */
140
141
#define SHL <<
142
#define SHR >>
143
144
#endif /* LITTLE_ENDIAN */
145
146
147
148
/********************************************************************
149
 ** Macros for copying words of  different alignment.
150
 ** Uses incremening pointers.
151
 *******************************************************************/
152
153
#define CP_INCR() {                         \
154
    INC_VAL(dstN) = INC_VAL(srcN);          \
155
}
156
157
#define CP_INCR_SH(shl, shr) {              \
158
    dstWord   = srcWord SHL shl;            \
159
    srcWord   = INC_VAL(srcN);              \
160
    dstWord  |= srcWord SHR shr;            \
161
    INC_VAL(dstN) = dstWord;                \
162
}
163
164
165
166
/********************************************************************
167
 ** Macros for copying words of  different alignment.
168
 ** Uses array indexes.
169
 *******************************************************************/
170
171
#define CP_INDEX(idx) {                     \
172
    dstN[idx] = srcN[idx];                  \
173
}
174
175
#define CP_INDEX_SH(x, shl, shr) {          \
176
    dstWord   = srcWord SHL shl;            \
177
    srcWord   = srcN[x];                    \
178
    dstWord  |= srcWord SHR shr;            \
179
    dstN[x]  = dstWord;                     \
180
}
181
182
183
184
/********************************************************************
185
 ** Macros for copying words of different alignment.
186
 ** Uses incremening pointers or array indexes depending on
187
 ** configuration.
188
 *******************************************************************/
189
190
#if defined (INDEXED_COPY)
191
192
#define CP(idx)               CP_INDEX(idx)
193
#define CP_SH(idx, shl, shr)  CP_INDEX_SH(idx, shl, shr)
194
195
#define INC_INDEX(p, o)       ((p) += (o))
196
197
#else /* INDEXED_COPY */
198
199
#define CP(idx)               CP_INCR()
200
#define CP_SH(idx, shl, shr)  CP_INCR_SH(shl, shr)
201
202
#define INC_INDEX(p, o)
203
204
#endif /* INDEXED_COPY */
205
206
207
#define COPY_REMAINING(count) {                                     \
208
    START_VAL(dst8);                                                \
209
    START_VAL(src8);                                                \
210
                                                                    \
211
    switch (count) {                                                \
212
    case 7: INC_VAL(dst8) = INC_VAL(src8);                          \
213
    case 6: INC_VAL(dst8) = INC_VAL(src8);                          \
214
    case 5: INC_VAL(dst8) = INC_VAL(src8);                          \
215
    case 4: INC_VAL(dst8) = INC_VAL(src8);                          \
216
    case 3: INC_VAL(dst8) = INC_VAL(src8);                          \
217
    case 2: INC_VAL(dst8) = INC_VAL(src8);                          \
218
    case 1: INC_VAL(dst8) = INC_VAL(src8);                          \
219
    case 0:                                                         \
220
    default: break;                                                 \
221
    }                                                               \
222
}
223
224
#define COPY_NO_SHIFT() {                                           \
225
    UIntN* dstN = (UIntN*)(dst8 PRE_LOOP_ADJUST);                   \
226
    UIntN* srcN = (UIntN*)(src8 PRE_LOOP_ADJUST);                   \
227
    size_t length = count / TYPE_WIDTH;                             \
228
                                                                    \
229
    while (length & 7) {                                            \
230
        CP_INCR();                                                  \
231
        length--;                                                   \
232
    }                                                               \
233
                                                                    \
234
    length /= 8;                                                    \
235
                                                                    \
236
    while (length--) {                                              \
237
        CP(0);                                                      \
238
        CP(1);                                                      \
239
        CP(2);                                                      \
240
        CP(3);                                                      \
241
        CP(4);                                                      \
242
        CP(5);                                                      \
243
        CP(6);                                                      \
244
        CP(7);                                                      \
245
                                                                    \
246
        INC_INDEX(dstN, 8);                                         \
247
        INC_INDEX(srcN, 8);                                         \
248
    }                                                               \
249
                                                                    \
250
    src8 = CAST_TO_U8(srcN, 0);                                     \
251
    dst8 = CAST_TO_U8(dstN, 0);                                     \
252
                                                                    \
253
    COPY_REMAINING(count & (TYPE_WIDTH - 1));                       \
254
                                                                    \
255
    return dest;                                                    \
256
}
257
258
259
260
#define COPY_SHIFT(shift) {                                         \
261
    UIntN* dstN  = (UIntN*)((((UIntN)dst8) PRE_LOOP_ADJUST) &       \
262
                             ~(TYPE_WIDTH - 1));                    \
263
    UIntN* srcN  = (UIntN*)((((UIntN)src8) PRE_LOOP_ADJUST) &       \
264
                             ~(TYPE_WIDTH - 1));                    \
265
    size_t length  = count / TYPE_WIDTH;                            \
266
    UIntN srcWord = INC_VAL(srcN);                                  \
267
    UIntN dstWord;                                                  \
268
                                                                    \
269
    while (length & 7) {                                            \
270
        CP_INCR_SH(8 * shift, 8 * (TYPE_WIDTH - shift));            \
271
        length--;                                                   \
272
    }                                                               \
273
                                                                    \
274
    length /= 8;                                                    \
275
                                                                    \
276
    while (length--) {                                              \
277
        CP_SH(0, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
278
        CP_SH(1, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
279
        CP_SH(2, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
280
        CP_SH(3, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
281
        CP_SH(4, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
282
        CP_SH(5, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
283
        CP_SH(6, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
284
        CP_SH(7, 8 * shift, 8 * (TYPE_WIDTH - shift));              \
285
                                                                    \
286
        INC_INDEX(dstN, 8);                                         \
287
        INC_INDEX(srcN, 8);                                         \
288
    }                                                               \
289
                                                                    \
290
    src8 = CAST_TO_U8(srcN, (shift - TYPE_WIDTH));                  \
291
    dst8 = CAST_TO_U8(dstN, 0);                                     \
292
                                                                    \
293
    COPY_REMAINING(count & (TYPE_WIDTH - 1));                       \
294
                                                                    \
295
    return dest;                                                    \
296
}
297
298
299
/********************************************************************
300
 **
301
 ** void *memcpy(void *dest, const void *src, size_t count)
302
 **
303
 ** Args:     dest        - pointer to destination buffer
304
 **           src         - pointer to source buffer
305
 **           count       - number of bytes to copy
306
 **
307
 ** Return:   A pointer to destination buffer
308
 **
309
 ** Purpose:  Copies count bytes from src to dest. 
310
 **           No overlap check is performed.
311
 **
312
 *******************************************************************/
313
314
void *fast_memcpy(void *dest, const void *src, size_t count) 
315
{
316
    UInt8* dst8 = (UInt8*)dest;
317
    UInt8* src8 = (UInt8*)src;
318
319
    if (count < 8) {
320
        COPY_REMAINING(count);
321
        return dest;
322
    }
323
324
    START_VAL(dst8);
325
    START_VAL(src8);
326
327
    while (((UIntN)dst8 & (TYPE_WIDTH - 1)) != WHILE_DEST_BREAK) {
328
        INC_VAL(dst8) = INC_VAL(src8);
329
        count--;
330
    }
331
332
    switch ((((UIntN)src8) PRE_SWITCH_ADJUST) & (TYPE_WIDTH - 1)) {
333
    case 0: COPY_NO_SHIFT(); break;
334
    case 1: COPY_SHIFT(1);   break;
335
    case 2: COPY_SHIFT(2);   break;
336
    case 3: COPY_SHIFT(3);   break;
337
#if TYPE_WIDTH > 4
338
    case 4: COPY_SHIFT(4);   break;
339
    case 5: COPY_SHIFT(5);   break;
340
    case 6: COPY_SHIFT(6);   break;
341
    case 7: COPY_SHIFT(7);   break;
342
#endif
343
    }
344
}