1
/* bitlex - Lexical Item Stream Module.
2
Copyright (c) 2007, 2008, Robert D. Cameron.
3
Licensed to the public under the Open Software License 3.0.
4
Licensed to International Characters, Inc., under the Academic
10
#include "../lib/lib_simd.h"
11
#include "xml_error.h"
15
#include "../codeclocker/clocker/code_clocker_session.h"
16
Code_Clocker * transpose_clocker;
17
Code_Clocker * WS_Control_clocker;
18
Code_Clocker * MarkupStreams_clocker;
19
Code_Clocker * char_validation_clocker;
24
Lexer_Interface::Lexer_Interface(Entity_Info * e, LexicalStreamSet *l) {
26
parsing_engine_data = l;
28
x8basis = (BitBlockBasis *) simd_new(BUFFER_SIZE/PACKSIZE);
29
validation_stream = (BitBlock *) simd_new(BUFFER_BLOCKS+SENTINEL_BLOCKS);
30
#ifdef TEMPLATED_SIMD_LIB
31
validation_stream[BUFFER_BLOCKS] = simd<1>::constant<1>();
33
#ifndef TEMPLATED_SIMD_LIB
34
validation_stream[BUFFER_BLOCKS] = simd_const_1(1);
39
Lexer_Interface::~Lexer_Interface() {
40
simd_delete((SIMD_type *) validation_stream);
45
Lexer_Interface * Lexer<ASCII>::LexerFactory(Entity_Info * e, LexicalStreamSet *l) {
48
WS_Control_clocker = register_Code_Clocker("WS", "WhiteSpace/Control\n");
49
MarkupStreams_clocker = register_Code_Clocker("bitlex", "Markup streams\n");
50
char_validation_clocker = register_Code_Clocker("charcheck", "Character validation\n");
53
if (!(e->has_encoding_decl)) {
54
// Must be UTF-8 or UTF-16; UTF-16 requires a ByteOrderMark.
55
if (e->code_unit_size == SingleByte) return new UTF_8_Lexer(e, l);
56
else if ((e->code_unit_size == DoubleByte))
57
if (e->BOM_units == 1) return new UTF_16_Lexer(e, l);
58
else NoEncodingError("UTF-16 implied but no byte order found.");
59
else NoEncodingError("UTF-32 without an encoding declaration.\n");
62
int lgth = strlen((const char *) e->encoding);
63
CodeUnit_ByteOrder order = e->byte_order;
64
switch (e->code_unit_size) {
66
if ((lgth == 5) && at_UTF_8(e->encoding))
67
return new UTF_8_Lexer(e, l);
68
else if ((lgth == 5) && at_ASCII(e->encoding))
69
return new ASCII_7_Lexer(e, l);
70
else if ((lgth == 6) && at_Latin1(e->encoding))
71
return new EASCII_8_Lexer(e, l);
72
/* Really need a table-based lookup here */
73
else EncodingError("8-bit", e->encoding, lgth);
75
if (e->BOM_units == 1)
76
if ((lgth == 6) && at_UTF_16(e->encoding))
77
return new UTF_16_Lexer(e, l);
78
else if ((lgth == 5) && at_UCS_2(e->encoding))
79
return new UCS_2_Lexer(e, l);
80
else EncodingError("16-bit", e->encoding, lgth);
81
else if (order == BigEndian)
82
if ((lgth == 8) && at_UTF_16BE(e->encoding))
83
return new UTF_16_Lexer(e, l);
84
else if ((lgth == 7) && at_UCS_2BE(e->encoding))
85
return new UCS_2_Lexer(e, l);
86
else EncodingError("16BE", e->encoding, lgth);
87
else /*if (order == LittleEndian)*/
88
if ((lgth == 8) && at_UTF_16LE(e->encoding))
89
return new UTF_16_Lexer(e, l);
90
else if ((lgth == 7) && at_UCS_2LE(e->encoding))
91
return new UCS_2_Lexer(e, l);
92
else EncodingError("16LE", e->encoding, lgth);
94
if (e->BOM_units == 1)
95
if ((lgth == 6) && at_UTF_32(e->encoding))
96
return new UTF_32_Lexer(e, l);
97
else if ((lgth == 5) && at_UCS_4(e->encoding))
98
return new UTF_32_Lexer(e, l);
99
else EncodingError("32-bit", e->encoding, lgth);
100
else if (order == BigEndian)
101
if ((lgth == 8) && at_UTF_32BE(e->encoding))
102
return new UTF_32_Lexer(e, l);
103
else if ((lgth == 7) && at_UCS_4BE(e->encoding))
104
return new UTF_32_Lexer(e, l);
105
else EncodingError("32BE", e->encoding, lgth);
106
else if (order == LittleEndian)
107
if ((lgth == 8) && at_UTF_32LE(e->encoding))
108
return new UTF_32_Lexer(e, l);
109
else if ((lgth == 7) && at_UCS_4LE(e->encoding))
110
return new UTF_32_Lexer(e, l);
111
else EncodingError("32LE", e->encoding, lgth);
112
else EncodingError("32-bit", e->encoding, lgth);
118
Lexer_Interface * Lexer<EBCDIC>::LexerFactory(Entity_Info * e, LexicalStreamSet *l) {
119
if (!(e->has_encoding_decl)) {
120
// Must be UTF-8 or UTF-16; UTF-16 requires a ByteOrderMark.
121
NoEncodingError("EBCDIC-family inferred, but no encoding declaration present.\n");
124
int lgth = strlen((const char *) e->encoding);
125
/* Really need a table-based lookup here */
126
if ((lgth == 6) && at_EBCDIC(e->encoding))
127
return new EBCDIC_Lexer(e, l);
128
else EncodingError("EBCDIC family", e->encoding, lgth);
132
template <CodeUnit_Base C>
133
Lexer<C>::Lexer(Entity_Info * e, LexicalStreamSet *l) : Lexer_Interface::Lexer_Interface(e, l) {
136
UTF_8_Lexer::UTF_8_Lexer(Entity_Info * e, LexicalStreamSet *l) : Lexer<ASCII>::Lexer(e, l) {
139
ASCII_7_Lexer::ASCII_7_Lexer(Entity_Info * e, LexicalStreamSet *l) : Lexer<ASCII>::Lexer(e, l) {
142
EASCII_8_Lexer::EASCII_8_Lexer(Entity_Info * e, LexicalStreamSet *l) : Lexer<ASCII>::Lexer(e, l) {
145
U16_Lexer::U16_Lexer(Entity_Info * e, LexicalStreamSet *l) : Lexer<ASCII>::Lexer(e, l) {
148
UTF_16_Lexer::UTF_16_Lexer(Entity_Info * e, LexicalStreamSet *l) : U16_Lexer::U16_Lexer(e, l) {
151
UCS_2_Lexer::UCS_2_Lexer(Entity_Info * e, LexicalStreamSet *l) : U16_Lexer::U16_Lexer(e, l) {
154
UTF_32_Lexer::UTF_32_Lexer(Entity_Info * e, LexicalStreamSet *l) : Lexer<ASCII>::Lexer(e, l) {
157
EBCDIC_Lexer::EBCDIC_Lexer(Entity_Info * e, LexicalStreamSet *l) : Lexer<EBCDIC>::Lexer(e, l) {
160
template <CodeUnit_Base C>
161
static inline void WS_Control_Blocks(BitBlock bit[], BitBlock& WS, BitBlock& Control);
164
static inline void WS_Control_Blocks<ASCII>(BitBlock bit[], BitBlock& WS, BitBlock& Control) {
165
BitBlock temp1 = simd_or(bit[0], bit[1]);
166
BitBlock temp2 = simd_or(temp1, bit[2]);
167
#ifdef TEMPLATED_SIMD_LIB
168
Control = simd_andc(simd<1>::constant<1>(), temp2);
170
#ifndef TEMPLATED_SIMD_LIB
171
Control = simd_andc(simd_const_1(1), temp2);
173
BitBlock temp3 = simd_or(bit[2], bit[3]);
174
BitBlock temp4 = simd_or(temp1, temp3);
175
BitBlock temp5 = simd_and(bit[4], bit[5]);
176
BitBlock temp6 = simd_andc(bit[7], bit[6]);
177
BitBlock temp7 = simd_and(temp5, temp6);
178
BitBlock CR = simd_andc(temp7, temp4);
179
BitBlock temp8 = simd_andc(bit[4], bit[5]);
180
BitBlock temp9 = simd_andc(bit[6], bit[7]);
181
BitBlock temp10 = simd_and(temp8, temp9);
182
BitBlock LF = simd_andc(temp10, temp4);
183
BitBlock temp11 = simd_and(temp8, temp6);
184
BitBlock HT = simd_andc(temp11, temp4);
185
BitBlock temp12 = simd_andc(bit[2], bit[3]);
186
BitBlock temp13 = simd_andc(temp12, temp1);
187
BitBlock temp14 = simd_or(bit[4], bit[5]);
188
BitBlock temp15 = simd_or(bit[6], bit[7]);
189
BitBlock temp16 = simd_or(temp14, temp15);
190
BitBlock SP = simd_andc(temp13, temp16);
191
WS = simd_or(simd_or(CR, LF), simd_or(HT, SP));
195
static inline void WS_Control_Blocks<EBCDIC>(BitBlock bit[], BitBlock& WS, BitBlock& Control) {
196
BitBlock temp1 = simd_or(bit[0], bit[1]);
197
BitBlock temp2 = simd_or(bit[2], bit[3]);
198
BitBlock temp3 = simd_or(temp1, temp2);
199
BitBlock temp4 = simd_or(bit[4], bit[5]);
200
BitBlock temp5 = simd_or(temp3, temp4);
201
BitBlock temp6 = simd_and(bit[2], bit[3]);
202
BitBlock temp7 = simd_andc(temp6, temp1);
203
BitBlock temp8 = simd_andc(bit[5], bit[4]);
204
BitBlock temp9 = simd_and(bit[6], bit[7]);
205
BitBlock temp10 = simd_and(temp8, temp9);
206
BitBlock temp11 = simd_and(temp7, temp10);
207
BitBlock temp12 = simd_andc(temp5, temp11);
208
BitBlock temp13 = simd_andc(bit[2], bit[3]);
209
BitBlock temp14 = simd_andc(temp13, temp1);
210
BitBlock temp15 = simd_and(bit[4], bit[5]);
211
BitBlock temp16 = simd_and(temp14, temp15);
212
BitBlock temp17 = simd_andc(bit[6], bit[7]);
213
BitBlock temp18 = simd_andc(temp16, temp17);
214
BitBlock temp19 = simd_andc(temp12, temp18);
215
BitBlock temp20 = simd_andc(bit[3], bit[2]);
216
BitBlock temp21 = simd_andc(temp20, temp1);
217
BitBlock temp22 = simd_and(temp8, temp17);
218
BitBlock temp23 = simd_and(temp21, temp22);
219
BitBlock temp24 = simd_andc(temp19, temp23);
220
BitBlock temp25 = simd_or(temp1, bit[2]);
221
BitBlock temp26 = simd_or(bit[5], temp9);
222
BitBlock temp27 = simd_and(bit[4], temp26);
223
#ifdef TEMPLATED_SIMD_LIB
224
BitBlock temp28 = simd_andc(simd<1>::constant<1>(), temp4);
226
#ifndef TEMPLATED_SIMD_LIB
227
BitBlock temp28 = simd_andc(simd_const_1(1), temp4);
229
BitBlock temp29 = simd_if(bit[3], temp27, temp28);
230
BitBlock temp30 = simd_andc(temp29, temp25);
231
BitBlock temp31 = simd_andc(temp24, temp30);
232
BitBlock temp32 = simd_andc(temp15, bit[6]);
233
BitBlock temp33 = simd_and(temp7, temp32);
234
BitBlock temp34 = simd_andc(temp31, temp33);
235
BitBlock temp35 = simd_andc(temp17, temp4);
236
BitBlock temp36 = simd_and(temp7, temp35);
237
BitBlock temp37 = simd_andc(temp34, temp36);
238
BitBlock temp38 = simd_and(temp8, bit[6]);
239
BitBlock temp39 = simd_and(temp14, temp38);
240
BitBlock temp40 = simd_andc(temp37, temp39);
241
BitBlock temp41 = simd_andc(bit[4], bit[5]);
242
BitBlock temp42 = simd_andc(temp41, bit[6]);
243
BitBlock temp43 = simd_and(temp21, temp42);
244
BitBlock temp44 = simd_andc(temp40, temp43);
245
BitBlock temp45 = simd_and(temp15, temp9);
246
BitBlock temp46 = simd_and(temp7, temp45);
247
BitBlock temp47 = simd_andc(temp44, temp46);
248
BitBlock temp48 = simd_and(temp21, temp15);
249
BitBlock temp49 = simd_andc(temp47, temp48);
250
#ifdef TEMPLATED_SIMD_LIB
251
Control = simd_andc(simd<1>::constant<1>(), temp49);
253
#ifndef TEMPLATED_SIMD_LIB
254
Control = simd_andc(simd_const_1(1), temp49);
256
BitBlock temp50 = simd_andc(bit[7], bit[6]);
257
BitBlock temp51 = simd_and(temp15, temp50);
258
BitBlock CR = simd_andc(temp51, temp3);
259
BitBlock temp52 = simd_and(temp8, temp50);
260
BitBlock LF = simd_and(temp14, temp52);
261
BitBlock HT = simd_andc(temp52, temp3);
262
BitBlock temp53 = simd_andc(bit[1], bit[0]);
263
BitBlock temp54 = simd_andc(temp53, temp2);
264
BitBlock temp55 = simd_or(bit[6], bit[7]);
265
BitBlock temp56 = simd_or(temp4, temp55);
266
BitBlock SP = simd_andc(temp54, temp56);
267
WS = simd_or(simd_or(CR, LF), simd_or(HT, SP));
272
template <CodeUnit_Base C>
273
void Lexer<C>::Do_XML_10_WS_Control() {
274
BitBlock Control, WS;
275
for (int i = 0; i < buffer_blocks; i++) {
276
WS_Control_Blocks<C>(x8basis[i].bit,
279
parsing_engine_data->item_stream[NonWS][i] = simd_not(WS);
280
validation_stream[i] = simd_andc(Control, WS);
286
template <CodeUnit_Base C>
287
static inline void ComputeLexicalItemBlocks(BitBlock bit[], BitBlock LexItem[]);
289
/* Given the bit[] array of one BitBlock each for the 8 bits of
290
an ASCII-family character representation, compute the parallel
291
lexical item streams needed for XML parsing.
293
WARNING: the following is generated code by charset_compiler.py.
299
static inline void ComputeLexicalItemBlocks<ASCII>(BitBlock bit[], BitBlock LexItem[]) {
300
BitBlock temp1 = simd_or(bit[0], bit[1]);
301
BitBlock temp2 = simd_and(bit[2], bit[3]);
302
BitBlock temp3 = simd_andc(temp2, temp1);
303
BitBlock temp4 = simd_and(bit[4], bit[5]);
304
BitBlock temp5 = simd_or(bit[6], bit[7]);
305
BitBlock temp6 = simd_andc(temp4, temp5);
306
BitBlock temp7 = simd_and(temp3, temp6);
307
BitBlock temp8 = simd_andc(bit[2], bit[3]);
308
BitBlock temp9 = simd_andc(temp8, temp1);
309
BitBlock temp10 = simd_andc(bit[5], bit[4]);
310
BitBlock temp11 = simd_andc(bit[6], bit[7]);
311
BitBlock temp12 = simd_and(temp10, temp11);
312
BitBlock temp13 = simd_and(temp9, temp12);
313
LexItem[MarkupStart] = simd_or(temp7, temp13);
314
BitBlock temp14 = simd_and(temp4, temp11);
315
BitBlock RAngle = simd_and(temp3, temp14);
316
BitBlock temp15 = simd_andc(bit[1], bit[0]);
317
BitBlock temp16 = simd_andc(bit[3], bit[2]);
318
BitBlock temp17 = simd_and(temp15, temp16);
319
BitBlock temp18 = simd_andc(bit[7], bit[6]);
320
BitBlock temp19 = simd_and(temp4, temp18);
321
BitBlock RBracket = simd_and(temp17, temp19);
322
LexItem[Hyphen] = simd_and(temp9, temp19);
323
BitBlock temp20 = simd_and(bit[6], bit[7]);
324
BitBlock temp21 = simd_and(temp4, temp20);
325
LexItem[QMark] = simd_and(temp3, temp21);
326
BitBlock temp22 = simd_or(bit[4], bit[5]);
327
BitBlock temp23 = simd_andc(temp11, temp22);
328
BitBlock temp24 = simd_and(temp10, temp20);
329
BitBlock temp25 = simd_or(temp23, temp24);
330
BitBlock temp26 = simd_and(temp9, temp25);
331
BitBlock temp27 = simd_or(temp26, temp7);
332
LexItem[Quote] = simd_or(temp27, temp13);
333
BitBlock temp28 = simd_and(bit[5], temp5);
334
BitBlock temp29 = simd_and(bit[4], temp28);
335
BitBlock temp30 = simd_andc(temp29, temp21);
336
BitBlock temp31 = simd_andc(temp9, temp30);
337
BitBlock temp32 = simd_and(temp3, bit[4]);
338
BitBlock temp33 = simd_or(bit[5], temp20);
339
BitBlock temp34 = simd_and(temp32, temp33);
340
BitBlock temp35 = simd_or(temp31, temp34);
341
BitBlock temp36 = simd_and(temp17, bit[4]);
342
#ifdef TEMPLATED_SIMD_LIB
343
BitBlock temp37 = simd_andc(simd<1>::constant<1>(), temp20);
345
#ifndef TEMPLATED_SIMD_LIB
346
BitBlock temp37 = simd_andc(simd_const_1(1), temp20);
348
BitBlock temp38 = simd_if(bit[5], temp37, temp20);
349
BitBlock temp39 = simd_and(temp36, temp38);
350
BitBlock temp40 = simd_or(temp35, temp39);
351
BitBlock temp41 = simd_and(temp15, temp2);
352
BitBlock temp42 = simd_and(temp41, bit[4]);
353
BitBlock temp43 = simd_and(temp42, temp38);
354
LexItem[NameFollow] = simd_or(temp40, temp43);
355
#ifdef DIGIT_AND_HEX_ITEMS
356
BitBlock temp44 = simd_or(bit[5], bit[6]);
357
BitBlock temp45 = simd_and(bit[4], temp44);
358
BitBlock Digit = simd_andc(temp3, temp45);
359
BitBlock temp46 = simd_or(bit[2], bit[3]);
360
BitBlock temp47 = simd_andc(temp15, temp46);
361
BitBlock temp48 = simd_andc(temp47, bit[4]);
362
BitBlock temp49 = simd_if(bit[5], temp37, temp5);
363
BitBlock temp50 = simd_and(temp48, temp49);
364
BitBlock temp51 = simd_or(Digit, temp50);
365
BitBlock temp52 = simd_and(temp15, temp8);
366
BitBlock temp53 = simd_andc(temp52, bit[4]);
367
BitBlock temp54 = simd_and(temp53, temp49);
368
BitBlock Hex = simd_or(temp51, temp54);
369
LexItem[NonDigit] = simd_not(Digit);
370
LexItem[NonHex] = simd_not(Hex);
372
#ifdef MARKUP_SORTING
373
BitBlock temp55 = simd_andc(temp20, temp22);
374
BitBlock temp56 = simd_or(temp12, temp55);
375
BitBlock temp57 = simd_or(temp56, temp21);
376
LexItem[AmpHashSlash] = simd_and(temp9, temp57);
379
/* Mark potential occurrences of ']]>' These are all actual
380
occurrences of ]]> as well as occurrences of ]] or ] at
381
the block end. Shifting the RBracket and RAngle streams in
382
negated forms ensures that a potential CD_End is not ruled
383
out at the block boundary. */
384
LexItem[CD_End_check] = simd_andc(RBracket,
385
simd_or(sisd_sbli(simd_not(RBracket), 1),
386
sisd_sbli(simd_not(RAngle), 2)));
387
#ifndef OMIT_CD_End_check_In_Markup_Scan
388
LexItem[MarkupStart] = simd_or(LexItem[MarkupStart], LexItem[CD_End_check]);
393
static inline void ComputeLexicalItemBlocks<EBCDIC>(BitBlock bit[], BitBlock LexItem[]) {
394
BitBlock temp1 = simd_andc(bit[1], bit[0]);
395
BitBlock temp2 = simd_or(bit[2], bit[3]);
396
BitBlock temp3 = simd_andc(temp1, temp2);
397
BitBlock temp4 = simd_and(bit[4], bit[5]);
398
BitBlock temp5 = simd_or(bit[6], bit[7]);
399
BitBlock temp6 = simd_andc(temp4, temp5);
400
BitBlock temp7 = simd_and(temp3, temp6);
401
BitBlock temp8 = simd_andc(bit[3], bit[2]);
402
BitBlock temp9 = simd_and(temp1, temp8);
403
BitBlock temp10 = simd_or(bit[4], bit[5]);
404
BitBlock temp11 = simd_or(temp10, temp5);
405
BitBlock temp12 = simd_andc(temp9, temp11);
406
LexItem[MarkupStart] = simd_or(temp7, temp12);
407
BitBlock temp13 = simd_andc(bit[2], bit[3]);
408
BitBlock temp14 = simd_and(temp1, temp13);
409
BitBlock temp15 = simd_andc(bit[6], bit[7]);
410
BitBlock temp16 = simd_and(temp4, temp15);
411
BitBlock RAngle = simd_and(temp14, temp16);
412
BitBlock temp17 = simd_andc(bit[0], bit[1]);
413
BitBlock temp18 = simd_and(bit[2], bit[3]);
414
BitBlock temp19 = simd_and(temp17, temp18);
415
BitBlock temp20 = simd_andc(bit[4], bit[5]);
416
BitBlock temp21 = simd_and(bit[6], bit[7]);
417
BitBlock temp22 = simd_and(temp20, temp21);
418
BitBlock RBracket = simd_and(temp19, temp22);
419
LexItem[Hyphen] = simd_andc(temp14, temp11);
420
BitBlock temp23 = simd_and(temp4, temp21);
421
BitBlock QMark = simd_and(temp14, temp23);
422
BitBlock temp24 = simd_and(temp1, temp18);
423
BitBlock temp25 = simd_and(temp4, bit[7]);
424
BitBlock temp26 = simd_and(temp24, temp25);
425
BitBlock temp27 = simd_or(temp26, temp7);
426
LexItem[Quote] = simd_or(temp27, temp12);
427
BitBlock temp28 = simd_andc(temp3, temp11);
428
BitBlock temp29 = simd_and(temp20, temp15);
429
BitBlock temp30 = simd_and(temp9, temp29);
430
BitBlock temp31 = simd_or(temp28, temp30);
431
BitBlock temp32 = simd_and(temp24, temp23);
432
BitBlock temp33 = simd_or(temp31, temp32);
433
BitBlock temp34 = simd_and(temp24, temp22);
434
BitBlock temp35 = simd_or(temp33, temp34);
435
BitBlock temp36 = simd_and(temp9, temp22);
436
BitBlock temp37 = simd_or(temp35, temp36);
437
BitBlock temp38 = simd_and(temp14, temp6);
438
BitBlock temp39 = simd_or(temp37, temp38);
439
BitBlock temp40 = simd_or(temp39, temp12);
440
BitBlock temp41 = simd_andc(bit[7], bit[6]);
441
BitBlock temp42 = simd_and(temp4, temp41);
442
BitBlock temp43 = simd_and(temp24, temp42);
443
BitBlock temp44 = simd_or(temp40, temp43);
444
BitBlock temp45 = simd_and(temp3, temp42);
445
BitBlock temp46 = simd_or(temp44, temp45);
446
BitBlock temp47 = simd_and(temp9, temp42);
447
BitBlock temp48 = simd_or(temp46, temp47);
448
BitBlock temp49 = simd_and(temp9, temp6);
449
BitBlock temp50 = simd_or(temp48, temp49);
450
BitBlock temp51 = simd_and(temp3, temp16);
451
BitBlock temp52 = simd_or(temp50, temp51);
452
BitBlock temp53 = simd_and(temp14, temp22);
453
BitBlock temp54 = simd_or(temp52, temp53);
454
BitBlock temp55 = simd_andc(temp41, temp10);
455
BitBlock temp56 = simd_and(temp14, temp55);
456
BitBlock temp57 = simd_or(temp54, temp56);
457
BitBlock temp58 = simd_and(temp9, temp16);
458
BitBlock temp59 = simd_or(temp57, temp58);
459
BitBlock temp60 = simd_or(temp59, temp7);
460
BitBlock temp61 = simd_and(temp24, temp16);
461
BitBlock temp62 = simd_or(temp60, temp61);
462
BitBlock temp63 = simd_or(temp62, RAngle);
463
BitBlock temp64 = simd_or(temp63, QMark);
464
BitBlock temp65 = simd_and(temp19, temp29);
465
BitBlock temp66 = simd_or(temp64, temp65);
466
BitBlock temp67 = simd_and(bit[0], bit[1]);
467
BitBlock temp68 = simd_and(temp67, temp13);
468
BitBlock temp69 = simd_andc(temp68, temp11);
469
BitBlock temp70 = simd_or(temp66, temp69);
470
BitBlock temp71 = simd_or(temp70, RBracket);
471
BitBlock temp72 = simd_andc(temp19, temp11);
472
BitBlock temp73 = simd_or(temp71, temp72);
473
BitBlock temp74 = simd_andc(temp67, temp2);
474
BitBlock temp75 = simd_andc(temp74, temp11);
475
BitBlock temp76 = simd_or(temp73, temp75);
476
BitBlock temp77 = simd_and(temp3, temp23);
477
BitBlock temp78 = simd_or(temp76, temp77);
478
BitBlock temp79 = simd_and(temp67, temp8);
479
BitBlock temp80 = simd_andc(temp79, temp11);
480
BitBlock temp81 = simd_or(temp78, temp80);
481
BitBlock temp82 = simd_and(temp17, temp13);
482
BitBlock temp83 = simd_and(temp82, temp55);
483
LexItem[NameFollow] = simd_or(temp81, temp83);
484
#ifdef DIGIT_AND_HEX_ITEMS
485
BitBlock temp84 = simd_and(temp67, temp18);
486
BitBlock temp85 = simd_andc(temp11, temp55);
487
BitBlock temp86 = simd_andc(temp15, temp10);
488
BitBlock temp87 = simd_andc(temp85, temp86);
489
BitBlock temp88 = simd_andc(temp21, temp10);
490
BitBlock temp89 = simd_andc(temp87, temp88);
491
BitBlock temp90 = simd_andc(bit[5], bit[4]);
492
BitBlock temp91 = simd_andc(temp90, temp5);
493
BitBlock temp92 = simd_andc(temp89, temp91);
494
BitBlock temp93 = simd_and(temp90, temp41);
495
BitBlock temp94 = simd_andc(temp92, temp93);
496
BitBlock temp95 = simd_and(temp90, temp15);
497
BitBlock temp96 = simd_andc(temp94, temp95);
498
BitBlock temp97 = simd_and(temp90, temp21);
499
BitBlock temp98 = simd_andc(temp96, temp97);
500
BitBlock temp99 = simd_andc(temp20, temp5);
501
BitBlock temp100 = simd_andc(temp98, temp99);
502
BitBlock temp101 = simd_and(temp20, temp41);
503
BitBlock temp102 = simd_andc(temp100, temp101);
504
BitBlock Digit = simd_andc(temp84, temp102);
505
BitBlock temp103 = simd_and(temp74, temp55);
506
BitBlock temp104 = simd_or(Digit, temp103);
507
BitBlock temp105 = simd_and(temp74, temp86);
508
BitBlock temp106 = simd_or(temp104, temp105);
509
BitBlock temp107 = simd_and(temp74, temp88);
510
BitBlock temp108 = simd_or(temp106, temp107);
511
BitBlock temp109 = simd_and(temp74, temp91);
512
BitBlock temp110 = simd_or(temp108, temp109);
513
BitBlock temp111 = simd_and(temp74, temp93);
514
BitBlock temp112 = simd_or(temp110, temp111);
515
BitBlock temp113 = simd_and(temp74, temp95);
516
BitBlock temp114 = simd_or(temp112, temp113);
517
BitBlock temp115 = simd_andc(temp17, temp2);
518
BitBlock temp116 = simd_and(temp115, temp55);
519
BitBlock temp117 = simd_or(temp114, temp116);
520
BitBlock temp118 = simd_and(temp115, temp86);
521
BitBlock temp119 = simd_or(temp117, temp118);
522
BitBlock temp120 = simd_and(temp115, temp88);
523
BitBlock temp121 = simd_or(temp119, temp120);
524
BitBlock temp122 = simd_and(temp115, temp91);
525
BitBlock temp123 = simd_or(temp121, temp122);
526
BitBlock temp124 = simd_and(temp115, temp93);
527
BitBlock temp125 = simd_or(temp123, temp124);
528
BitBlock temp126 = simd_and(temp115, temp95);
529
BitBlock Hex = simd_or(temp125, temp126);
531
LexItem[NonDigit] = simd_not(Digit);
532
LexItem[NonHex] = simd_not(Hex);
536
/* Mark potential occurrences of ']]>' These are all actual
537
occurrences of ]]> as well as occurrences of ]] or ] at
538
the block end. Shifting the RBracket and RAngle streams in
539
negated forms ensures that a potential CD_End is not ruled
540
out at the block boundary. */
541
LexItem[CD_End_check] = simd_andc(RBracket,
542
simd_or(sisd_sbli(simd_not(RBracket), 1),
543
sisd_sbli(simd_not(RAngle), 2)));
544
#ifndef OMIT_CD_End_check_In_Markup_Scan
545
LexItem[MarkupStart] = simd_or(LexItem[MarkupStart], LexItem[CD_End_check]);
550
/* A temporary structure for internal use in ComputeLexicalItemStreams. */
552
BitBlock LexicalItems[LexicalItemCount];
558
template <CodeUnit_Base C>
559
void Lexer<C>::Do_MarkupStreams() {
560
LexicalItemBlock lx_blk[BUFFER_BLOCKS];
561
for (int i = 0; i < buffer_blocks; i++) {
562
ComputeLexicalItemBlocks<C>(x8basis[i].bit, lx_blk[i].LexicalItems);
564
/* NonWS stream already completed by WS_Control method. */
565
for (int j = MarkupStart; j < LexicalItemCount; j++) {
566
for (int i = 0; i < buffer_blocks; i++) {
567
parsing_engine_data->item_stream[j][i] = lx_blk[i].LexicalItems[j];
570
for (int i = 0; i < buffer_blocks; i++) {
571
parsing_engine_data->item_stream[NameFollow][i] =
572
simd_or(parsing_engine_data->item_stream[NameFollow][i],
573
simd_not(parsing_engine_data->item_stream[NonWS][i]));
578
void UTF_8_Lexer::Do_CharsetValidation() {
579
BitBlock u8prefix, u8suffix, u8prefix2, u8prefix3or4, u8prefix3, u8prefix4;
581
/* UTF-8 sequences may cross block boundaries. If a
582
prefix is found near the end of a block that requires
583
one or more suffixes in the next block, then
584
prefix_pending is set to mark the positions.
585
However, at the beginning of the buffer, no suffixes
586
are expected, so this value is initialized to zeroes. */
587
#ifdef TEMPLATED_SIMD_LIB
588
BitBlock prefix_pending = simd<1>::constant<0>();
589
/* If a suffix is pending, then it may involve one of
590
the special case prefixes E0, ED. F0, F4, or the
591
EF prefix or EF_BF combination for FFFF/FFFE detection.*/
592
BitBlock E0ED_pending = simd<1>::constant<0>();
593
BitBlock F0F4_pending = simd<1>::constant<0>();
594
BitBlock bit5_pending = simd<1>::constant<0>();
595
BitBlock EF_pending = simd<1>::constant<0>();
596
BitBlock EF_BF_pending = simd<1>::constant<0>();
598
#ifndef TEMPLATED_SIMD_LIB
599
BitBlock prefix_pending = simd_const_1(0);
600
/* If a suffix is pending, then it may involve one of
601
the special case prefixes E0, ED. F0, F4, or the
602
EF prefix or EF_BF combination for FFFF/FFFE detection.*/
603
BitBlock E0ED_pending = simd_const_1(0);
604
BitBlock F0F4_pending = simd_const_1(0);
605
BitBlock bit5_pending = simd_const_1(0);
606
BitBlock EF_pending = simd_const_1(0);
607
BitBlock EF_BF_pending = simd_const_1(0);
609
/* Temporary variables used within the block. */
610
BitBlock suffix_required_scope;
611
BitBlock prefix_E0ED, E0ED_scope, bit5_scope, E0ED_constraint;
612
BitBlock prefix_F5FF, prefix_F0F4, F0F4_scope, F0F4_constraint;
613
BitBlock X111x, B111x, prefix_EF, BF, EF_BF, EF_scope, EF_BF_scope;
615
for (int i = 0; i < buffer_blocks; i++) {
616
#ifdef TEMPLATED_SIMD_LIB
617
validation_stream[i] = simd<1>::constant<0>();
619
#ifndef TEMPLATED_SIMD_LIB
620
validation_stream[i] = simd_const_1(0);
622
/* If there is no pending suffix and no bit 0, then there
623
are no possible validation issues for this block. */
624
if (!bitblock_has_bit(simd_or(prefix_pending, x8basis[i].bit[0])))
626
/* Compute classifications of UTF-8 bytes. */
627
u8prefix = simd_and(x8basis[i].bit[0], x8basis[i].bit[1]);
628
u8suffix = simd_andc(x8basis[i].bit[0], x8basis[i].bit[1]);
629
u8prefix3or4 = simd_and(u8prefix, x8basis[i].bit[2]);
630
u8prefix2 = simd_andc(u8prefix, x8basis[i].bit[2]);
631
u8prefix3 = simd_andc(u8prefix3or4, x8basis[i].bit[3]);
632
u8prefix4 = simd_and(u8prefix3or4, x8basis[i].bit[3]);
634
/* Initiate validation for two-byte sequences. */
635
error_mask = simd_andc(u8prefix2,
636
simd_or(simd_or(x8basis[i].bit[3], x8basis[i].bit[4]),
637
simd_or(x8basis[i].bit[5], x8basis[i].bit[6])));
638
suffix_required_scope = simd_or(prefix_pending, sisd_sfli(u8prefix, 1));
640
prefix_pending = sisd_sbli(u8prefix, BLOCKSIZE - 1);
641
E0ED_scope = E0ED_pending;
642
F0F4_scope = F0F4_pending;
643
bit5_scope = bit5_pending;
644
EF_scope = EF_pending;
645
EF_BF_scope = EF_BF_pending;
647
/* Default values of pending variables for next iteration. */
648
#ifdef TEMPLATED_SIMD_LIB
649
E0ED_pending = simd<1>::constant<0>();
650
F0F4_pending = simd<1>::constant<0>();
651
bit5_pending = simd<1>::constant<0>();
652
EF_pending = simd<1>::constant<0>();
653
EF_BF_pending = simd<1>::constant<0>();
655
#ifndef TEMPLATED_SIMD_LIB
656
E0ED_pending = simd_const_1(0);
657
F0F4_pending = simd_const_1(0);
658
bit5_pending = simd_const_1(0);
659
EF_pending = simd_const_1(0);
660
EF_BF_pending = simd_const_1(0);
663
X111x = simd_and(simd_and(x8basis[i].bit[4], x8basis[i].bit[5]), x8basis[i].bit[6]);
664
B111x = simd_and(simd_and(u8suffix, simd_and(x8basis[i].bit[2], x8basis[i].bit[3])),
666
BF = simd_and(B111x, x8basis[i].bit[7]);
667
EF_BF = simd_and(EF_scope, BF);
669
if (bitblock_has_bit(u8prefix3or4)) {
670
/* Extend validation for errors in three-byte sequences. */
671
suffix_required_scope = simd_or(suffix_required_scope,
672
sisd_sfli(u8prefix3or4, 2));
673
bit5_scope = simd_or(bit5_scope, sisd_sfli(x8basis[i].bit[5], 1));
674
prefix_E0ED = simd_andc(u8prefix3,
675
simd_or(simd_or(x8basis[i].bit[6],
676
simd_xor(x8basis[i].bit[4], x8basis[i].bit[7])),
677
simd_xor(x8basis[i].bit[4], x8basis[i].bit[5])));
678
E0ED_scope = simd_or(E0ED_scope, sisd_sfli(prefix_E0ED, 1));
679
prefix_EF = simd_and(u8prefix3, simd_and(X111x, x8basis[i].bit[7]));
680
EF_scope = simd_or(EF_scope, sisd_sfli(prefix_EF, 1));
681
EF_BF = simd_and(EF_scope, BF);
683
/* Values for next iteration. */
684
prefix_pending = simd_or(prefix_pending,
685
sisd_sbli(u8prefix3or4, BLOCKSIZE - 2));
686
bit5_pending = sisd_sbli(x8basis[i].bit[5], BLOCKSIZE - 1);
687
E0ED_pending = sisd_sbli(prefix_E0ED, BLOCKSIZE - 1);
688
EF_pending = sisd_sbli(prefix_EF, BLOCKSIZE - 1);
689
EF_BF_pending = sisd_sbli(EF_BF, BLOCKSIZE - 2);
690
if (bitblock_has_bit(u8prefix4)) {
691
/* Extend validation for errors in four-byte sequences. */
692
suffix_required_scope = simd_or(suffix_required_scope,
693
sisd_sfli(u8prefix4, 3));
694
prefix_pending = simd_or(prefix_pending,
695
sisd_sbli(u8prefix4, BLOCKSIZE - 3));
696
prefix_F5FF = simd_and(u8prefix4,
697
simd_or(x8basis[i].bit[4],
698
simd_and(x8basis[i].bit[5],
699
simd_or(x8basis[i].bit[6], x8basis[i].bit[7]))));
700
error_mask = simd_or(error_mask, prefix_F5FF);
701
prefix_F0F4 = simd_andc(u8prefix4,
702
simd_or(x8basis[i].bit[4],
703
simd_or(x8basis[i].bit[6], x8basis[i].bit[7])));
704
F0F4_scope = simd_or(F0F4_scope, sisd_sfli(prefix_F0F4, 1));
705
F0F4_pending = sisd_sbli(prefix_F0F4, BLOCKSIZE - 1);
708
E0ED_constraint = simd_xor(bit5_scope, x8basis[i].bit[2]);
709
error_mask = simd_or(error_mask, simd_andc(E0ED_scope, E0ED_constraint));
710
F0F4_constraint = simd_xor(bit5_scope,
711
simd_or(x8basis[i].bit[2], x8basis[i].bit[3]));
712
error_mask = simd_or(error_mask, simd_andc(F0F4_scope, F0F4_constraint));
713
/* Complete validation by checking for prefix-suffix mismatches. */
714
error_mask = simd_or(error_mask, simd_xor(suffix_required_scope, u8suffix));
716
EF_BF_scope = simd_or(EF_BF_scope, sisd_sfli(EF_BF, 1));
717
error_mask = simd_or(error_mask, simd_and(EF_BF_scope, B111x));
718
validation_stream[i] = error_mask;
719
#ifdef DEBUG_UTF8_VALIDATION
720
// if (bitblock_has_bit(error_mask)) {
721
printf("-%i----------------------\n", i);
722
print_bit_block("x8basis[i].bit[0]", x8basis[i].bit[0]);
723
print_bit_block("x8basis[i].bit[1]", x8basis[i].bit[1]);
724
print_bit_block("x8basis[i].bit[2]", x8basis[i].bit[2]);
725
print_bit_block("x8basis[i].bit[3]", x8basis[i].bit[3]);
726
print_bit_block("u8prefix2", u8prefix2);
727
print_bit_block("u8prefix3", u8prefix3);
728
print_bit_block("u8prefix4", u8prefix4);
729
print_bit_block("suffix_required_scope", suffix_required_scope);
730
print_bit_block("prefix_pending", prefix_pending);
731
print_bit_block("E0ED_pending", E0ED_pending);
732
print_bit_block("F0F4_pending", F0F4_pending);
733
print_bit_block("bit5_pending", bit5_pending);
734
print_bit_block("error_mask", error_mask);
742
void ASCII_7_Lexer::Do_CharsetValidation() {
743
for (int blk = 0; blk < buffer_blocks; blk++) {
744
validation_stream[blk] = x8basis[blk].bit[0];
749
void EASCII_8_Lexer::Do_CharsetValidation() {
750
/* Nothing required for most charsets - but perhaps should have tables. */
751
for (int i = 0; i < buffer_blocks; i++) {
752
#ifdef TEMPLATED_SIMD_LIB
753
validation_stream[i] = simd<1>::constant<0>();
755
#ifndef TEMPLATED_SIMD_LIB
756
validation_stream[i] = simd_const_1(0);
762
void UTF_16_Lexer::Do_CharsetValidation() {
766
void UCS_2_Lexer::Do_CharsetValidation() {
770
void UTF_32_Lexer::Do_CharsetValidation() {
774
void EBCDIC_Lexer::Do_CharsetValidation() {
775
/* Nothing required for most cases - but perhaps should have tables. */
776
for (int i = 0; i < buffer_blocks; i++) {
777
#ifdef TEMPLATED_SIMD_LIB
778
validation_stream[i] = simd<1>::constant<0>();
780
#ifndef TEMPLATED_SIMD_LIB
781
validation_stream[i] = simd_const_1(0);
788
/* Stub out XML 1.1 routines initially. */
790
void UTF_8_Lexer::Do_XML_11_WS_Control() {
791
printf("UTF_8_Lexer::Do_XML_11_WS_Control not yet implemented; using XML 1.0 rules.\n");
792
Do_XML_10_WS_Control();
796
static inline void ASCII_7_WS_Control_Blocks_11(BitBlock bit[], BitBlock& WS, BitBlock& Control) {
797
BitBlock temp1 = simd_or(bit[0], bit[1]);
798
BitBlock temp2 = simd_or(temp1, bit[2]);
799
BitBlock temp3 = simd_andc(bit[1], bit[0]);
800
BitBlock temp4 = simd_and(bit[2], bit[3]);
801
BitBlock temp5 = simd_and(temp3, temp4);
802
BitBlock temp6 = simd_and(bit[4], bit[5]);
803
BitBlock temp7 = simd_and(bit[6], bit[7]);
804
BitBlock temp8 = simd_and(temp6, temp7);
805
BitBlock temp9 = simd_and(temp5, temp8);
806
BitBlock temp10 = simd_andc(temp2, temp9);
807
BitBlock temp11 = simd_andc(temp10, bit[0]);
808
#ifdef TEMPLATED_SIMD_LIB
809
Control = simd_andc(simd<1>::constant<1>(), temp11);
811
#ifndef TEMPLATED_SIMD_LIB
812
Control = simd_andc(simd_const_1(1), temp11);
814
BitBlock temp12 = simd_or(bit[2], bit[3]);
815
BitBlock temp13 = simd_or(temp1, temp12);
816
BitBlock temp14 = simd_andc(bit[7], bit[6]);
817
BitBlock temp15 = simd_and(temp6, temp14);
818
BitBlock CR = simd_andc(temp15, temp13);
819
BitBlock temp16 = simd_andc(bit[4], bit[5]);
820
BitBlock temp17 = simd_andc(bit[6], bit[7]);
821
BitBlock temp18 = simd_and(temp16, temp17);
822
BitBlock LF = simd_andc(temp18, temp13);
823
BitBlock temp19 = simd_and(temp16, temp14);
824
BitBlock HT = simd_andc(temp19, temp13);
825
BitBlock temp20 = simd_andc(bit[2], bit[3]);
826
BitBlock temp21 = simd_andc(temp20, temp1);
827
BitBlock temp22 = simd_or(bit[4], bit[5]);
828
BitBlock temp23 = simd_or(bit[6], bit[7]);
829
BitBlock temp24 = simd_or(temp22, temp23);
830
BitBlock SP = simd_andc(temp21, temp24);
831
WS = simd_or(simd_or(CR, LF), simd_or(HT, SP));
835
void ASCII_7_Lexer::Do_XML_11_WS_Control() {
836
BitBlock WS, Control;
837
for (int i = 0; i < buffer_blocks; i++) {
838
ASCII_7_WS_Control_Blocks_11(x8basis[i].bit, WS, Control);
839
parsing_engine_data->item_stream[NonWS][i] = simd_not(WS);
840
validation_stream[i] = simd_andc(Control, WS);
844
static inline void EASCII_8_WS_Control_Blocks_11(BitBlock bit[], BitBlock& WS, BitBlock& Control) {
845
BitBlock temp1 = simd_or(bit[0], bit[1]);
846
BitBlock temp2 = simd_or(temp1, bit[2]);
847
BitBlock temp3 = simd_andc(bit[1], bit[0]);
848
BitBlock temp4 = simd_and(bit[2], bit[3]);
849
BitBlock temp5 = simd_and(temp3, temp4);
850
BitBlock temp6 = simd_and(bit[4], bit[5]);
851
BitBlock temp7 = simd_and(bit[6], bit[7]);
852
BitBlock temp8 = simd_and(temp6, temp7);
853
BitBlock temp9 = simd_and(temp5, temp8);
854
BitBlock temp10 = simd_andc(temp2, temp9);
855
BitBlock temp11 = simd_andc(bit[0], bit[1]);
856
BitBlock temp12 = simd_andc(temp11, bit[2]);
857
BitBlock temp13 = simd_andc(temp10, temp12);
858
#ifdef TEMPLATED_SIMD_LIB
859
Control = simd_andc(simd<1>::constant<1>(), temp13);
861
#ifndef TEMPLATED_SIMD_LIB
862
Control = simd_andc(simd_const_1(1), temp13);
864
BitBlock temp14 = simd_or(bit[2], bit[3]);
865
BitBlock temp15 = simd_or(temp1, temp14);
866
BitBlock temp16 = simd_andc(bit[7], bit[6]);
867
BitBlock temp17 = simd_and(temp6, temp16);
868
BitBlock CR = simd_andc(temp17, temp15);
869
BitBlock temp18 = simd_andc(bit[4], bit[5]);
870
BitBlock temp19 = simd_andc(bit[6], bit[7]);
871
BitBlock temp20 = simd_and(temp18, temp19);
872
BitBlock LF = simd_andc(temp20, temp15);
873
BitBlock temp21 = simd_and(temp18, temp16);
874
BitBlock HT = simd_andc(temp21, temp15);
875
BitBlock temp22 = simd_andc(bit[2], bit[3]);
876
BitBlock temp23 = simd_andc(temp22, temp1);
877
BitBlock temp24 = simd_or(bit[4], bit[5]);
878
BitBlock temp25 = simd_or(bit[6], bit[7]);
879
BitBlock temp26 = simd_or(temp24, temp25);
880
BitBlock SP = simd_andc(temp23, temp26);
881
BitBlock temp27 = simd_andc(temp11, temp14);
882
BitBlock temp28 = simd_andc(bit[5], bit[4]);
883
BitBlock temp29 = simd_and(temp28, temp16);
884
BitBlock NEL = simd_and(temp27, temp29);
885
WS = simd_or(simd_or(simd_or(CR, LF), simd_or(HT, SP)), NEL);
888
void EASCII_8_Lexer::Do_XML_11_WS_Control() {
889
BitBlock WS, Control;
890
for (int i = 0; i < buffer_blocks; i++) {
891
EASCII_8_WS_Control_Blocks_11(x8basis[i].bit, WS, Control);
892
parsing_engine_data->item_stream[NonWS][i] = simd_not(WS);
893
validation_stream[i] = simd_andc(Control, WS);
898
void U16_Lexer::Do_XML_11_WS_Control() {
899
printf("U16_Lexer::Do_XML_11_WS_Control not yet implemented; using XML 1.0 rules.\n");
900
Do_XML_10_WS_Control();
904
void UTF_32_Lexer::Do_XML_11_WS_Control() {
905
printf("UTF_32_Lexer::Do_XML_11_WS_Control not yet implemented; using XML 1.0 rules.\n");
906
Do_XML_10_WS_Control();
909
static inline void EBCDIC_WS_Control_Blocks_11(BitBlock bit[], BitBlock& WS, BitBlock& Control) {
910
BitBlock temp1 = simd_or(bit[0], bit[1]);
911
BitBlock temp2 = simd_and(bit[0], bit[1]);
912
BitBlock temp3 = simd_and(bit[2], bit[3]);
913
BitBlock temp4 = simd_and(temp2, temp3);
914
BitBlock temp5 = simd_and(bit[4], bit[5]);
915
BitBlock temp6 = simd_and(bit[6], bit[7]);
916
BitBlock temp7 = simd_and(temp5, temp6);
917
BitBlock temp8 = simd_and(temp4, temp7);
918
BitBlock temp9 = simd_andc(temp1, temp8);
919
#ifdef TEMPLATED_SIMD_LIB
920
Control = simd_andc(simd<1>::constant<1>(), temp9);
922
#ifndef TEMPLATED_SIMD_LIB
923
Control = simd_andc(simd_const_1(1), temp9);
925
BitBlock temp10 = simd_or(bit[2], bit[3]);
926
BitBlock temp11 = simd_or(temp1, temp10);
927
BitBlock temp12 = simd_andc(bit[7], bit[6]);
928
BitBlock temp13 = simd_and(temp5, temp12);
929
BitBlock CR = simd_andc(temp13, temp11);
930
BitBlock temp14 = simd_andc(bit[2], bit[3]);
931
BitBlock temp15 = simd_andc(temp14, temp1);
932
BitBlock temp16 = simd_andc(bit[5], bit[4]);
933
BitBlock temp17 = simd_and(temp16, temp12);
934
BitBlock LF = simd_and(temp15, temp17);
935
BitBlock HT = simd_andc(temp17, temp11);
936
BitBlock temp18 = simd_andc(bit[1], bit[0]);
937
BitBlock temp19 = simd_andc(temp18, temp10);
938
BitBlock temp20 = simd_or(bit[4], bit[5]);
939
BitBlock temp21 = simd_or(bit[6], bit[7]);
940
BitBlock temp22 = simd_or(temp20, temp21);
941
BitBlock SP = simd_andc(temp19, temp22);
942
BitBlock temp23 = simd_andc(bit[3], bit[2]);
943
BitBlock temp24 = simd_andc(temp23, temp1);
944
BitBlock NEL = simd_and(temp24, temp17);
945
WS = simd_or(simd_or(simd_or(CR, LF), simd_or(HT, SP)), NEL);
948
void EBCDIC_Lexer::Do_XML_11_WS_Control() {
949
BitBlock WS, Control;
950
for (int i = 0; i < buffer_blocks; i++) {
951
EBCDIC_WS_Control_Blocks_11(x8basis[i].bit, WS, Control);
952
parsing_engine_data->item_stream[NonWS][i] = simd_not(WS);
953
validation_stream[i] = simd_andc(Control, WS);
959
void Lexer_Interface::AnalyzeBuffer(BitBlockBasis * basis, int base_pos, int start_pos, int buffer_limit_pos) {
961
printf("Entered AnalyzeBuffer, buffer_limit_pos = %i\n", buffer_limit_pos);
963
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BITLEX_ALL)
964
code_clocker->start_interval();
967
lexer_base_pos = base_pos; /* for error reporting. */
969
buffer_blocks = (buffer_limit_pos + BLOCKSIZE - 1)/BLOCKSIZE;
970
buffer_units = buffer_limit_pos;
971
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == CHARSET_VALIDATION)
972
code_clocker->start_interval();
975
Do_CharsetValidation();
976
/* Ignore error bits before start_pos which only arise
977
due to UTF8 pending scope streams at buffer boundaries.*/
978
err_pos = bitstream_scan(validation_stream, start_pos);
979
/* Detect validation errors up to the end of file plus one more
980
position in case there is an incomplete code unit at EOF. */
981
if ((err_pos <= buffer_units) && (err_pos < BUFFER_SIZE)) {
982
// printf("start_pos =%i\n, err_pos = %i\n", start_pos, err_pos);
983
// print_bit_block("validation_stream[0]", validation_stream[0]);
985
// print_bit_block("validation_stream[err_pos/128]", validation_stream[err_pos/128]);
987
CharSetValidationError((const char *) entity_Info->encoding, lexer_base_pos + err_pos);
989
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == CHARSET_VALIDATION)
990
code_clocker->end_interval(buffer_units);
992
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == WS_CONTROL)
993
code_clocker->start_interval();
996
if (entity_Info->version == XML_1_1) Do_XML_11_WS_Control();
997
else Do_XML_10_WS_Control();
999
printf("Do_WS_Control() complete.\n");
1001
err_pos = bitstream_scan0(validation_stream);
1002
if (err_pos < buffer_units) XMLCharacterError(lexer_base_pos + err_pos);
1003
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == WS_CONTROL)
1004
code_clocker->end_interval(buffer_units);
1006
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == MARKUP_STREAMS)
1007
code_clocker->start_interval();
1010
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == MARKUP_STREAMS)
1011
code_clocker->end_interval(buffer_units);
1014
printf("Do_MarkupStreams() complete.\n");
1017
if (buffer_units < BUFFER_SIZE) {
1018
#ifdef TEMPLATED_SIMD_LIB
1019
BitBlock final_block_mask =
1020
sisd_sfl(simd<1>::constant<1>(), sisd_from_int(buffer_units % BLOCKSIZE));
1022
#ifndef TEMPLATED_SIMD_LIB
1023
BitBlock final_block_mask =
1024
sisd_sfl(simd_const_1(1), sisd_from_int(buffer_units % BLOCKSIZE));
1026
int lastblk = buffer_units/BLOCKSIZE;
1027
for (int j = minLexicalItem; j < LexicalItemCount; j++) {
1028
parsing_engine_data->item_stream[j][lastblk] =
1029
simd_or(parsing_engine_data->item_stream[j][lastblk],
1033
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BITLEX_ALL)
1034
code_clocker->end_interval(buffer_units);