1
/* bitlex - Lexical Item Stream Module.
2
Copyright (c) 2007, 2008, Robert D. Cameron.
3
Licensed to the public under the Open Software License 3.0.
4
Licensed to International Characters, Inc., under the Academic
10
#include "../lib/lib_simd.h"
11
#include "xml_error.h"
15
#include "../codeclocker/clocker/code_clocker_session.h"
16
Code_Clocker * transpose_clocker;
17
Code_Clocker * WS_Control_clocker;
18
Code_Clocker * MarkupStreams_clocker;
19
Code_Clocker * char_validation_clocker;
24
Lexer_Interface::Lexer_Interface(Entity_Info * e, LexicalStreamSet *l) {
26
parsing_engine_data = l;
28
x8basis = (BitBlockBasis *) simd_new(BUFFER_SIZE/PACKSIZE);
29
validation_stream = (BitBlock *) simd_new(BUFFER_BLOCKS+SENTINEL_BLOCKS);
30
#ifdef TEMPLATED_SIMD_LIB
31
validation_stream[BUFFER_BLOCKS] = simd<1>::constant<1>();
33
#ifndef TEMPLATED_SIMD_LIB
34
validation_stream[BUFFER_BLOCKS] = simd_const_1(1);
39
Lexer_Interface::~Lexer_Interface() {
40
simd_delete((SIMD_type *) validation_stream);
41
simd_delete((SIMD_type *) x8basis);
46
Lexer_Interface * Lexer<ASCII>::LexerFactory(Entity_Info * e, LexicalStreamSet *l) {
49
WS_Control_clocker = register_Code_Clocker("WS", "WhiteSpace/Control\n");
50
MarkupStreams_clocker = register_Code_Clocker("bitlex", "Markup streams\n");
51
char_validation_clocker = register_Code_Clocker("charcheck", "Character validation\n");
54
if (!(e->has_encoding_decl)) {
55
// Must be UTF-8 or UTF-16; UTF-16 requires a ByteOrderMark.
56
if (e->code_unit_size == SingleByte) return new UTF_8_Lexer(e, l);
57
else if ((e->code_unit_size == DoubleByte))
58
if (e->BOM_units == 1) return new UTF_16_Lexer(e, l);
59
else NoEncodingError("UTF-16 implied but no byte order found.");
60
else NoEncodingError("UTF-32 without an encoding declaration.\n");
63
int lgth = strlen((const char *) e->encoding);
64
CodeUnit_ByteOrder order = e->byte_order;
65
switch (e->code_unit_size) {
67
if ((lgth == 5) && at_UTF_8(e->encoding))
68
return new UTF_8_Lexer(e, l);
69
else if ((lgth == 5) && at_ASCII(e->encoding))
70
return new ASCII_7_Lexer(e, l);
71
else if ((lgth == 6) && at_Latin1(e->encoding))
72
return new EASCII_8_Lexer(e, l);
73
/* Really need a table-based lookup here */
74
else EncodingError("8-bit", e->encoding, lgth);
76
if (e->BOM_units == 1)
77
if ((lgth == 6) && at_UTF_16(e->encoding))
78
return new UTF_16_Lexer(e, l);
79
else if ((lgth == 5) && at_UCS_2(e->encoding))
80
return new UCS_2_Lexer(e, l);
81
else EncodingError("16-bit", e->encoding, lgth);
82
else if (order == BigEndian)
83
if ((lgth == 8) && at_UTF_16BE(e->encoding))
84
return new UTF_16_Lexer(e, l);
85
else if ((lgth == 7) && at_UCS_2BE(e->encoding))
86
return new UCS_2_Lexer(e, l);
87
else EncodingError("16BE", e->encoding, lgth);
88
else /*if (order == LittleEndian)*/
89
if ((lgth == 8) && at_UTF_16LE(e->encoding))
90
return new UTF_16_Lexer(e, l);
91
else if ((lgth == 7) && at_UCS_2LE(e->encoding))
92
return new UCS_2_Lexer(e, l);
93
else EncodingError("16LE", e->encoding, lgth);
95
if (e->BOM_units == 1)
96
if ((lgth == 6) && at_UTF_32(e->encoding))
97
return new UTF_32_Lexer(e, l);
98
else if ((lgth == 5) && at_UCS_4(e->encoding))
99
return new UTF_32_Lexer(e, l);
100
else EncodingError("32-bit", e->encoding, lgth);
101
else if (order == BigEndian)
102
if ((lgth == 8) && at_UTF_32BE(e->encoding))
103
return new UTF_32_Lexer(e, l);
104
else if ((lgth == 7) && at_UCS_4BE(e->encoding))
105
return new UTF_32_Lexer(e, l);
106
else EncodingError("32BE", e->encoding, lgth);
107
else if (order == LittleEndian)
108
if ((lgth == 8) && at_UTF_32LE(e->encoding))
109
return new UTF_32_Lexer(e, l);
110
else if ((lgth == 7) && at_UCS_4LE(e->encoding))
111
return new UTF_32_Lexer(e, l);
112
else EncodingError("32LE", e->encoding, lgth);
113
else EncodingError("32-bit", e->encoding, lgth);
119
Lexer_Interface * Lexer<EBCDIC>::LexerFactory(Entity_Info * e, LexicalStreamSet *l) {
120
if (!(e->has_encoding_decl)) {
121
// Must be UTF-8 or UTF-16; UTF-16 requires a ByteOrderMark.
122
NoEncodingError("EBCDIC-family inferred, but no encoding declaration present.\n");
125
int lgth = strlen((const char *) e->encoding);
126
/* Really need a table-based lookup here */
127
if ((lgth == 6) && at_EBCDIC(e->encoding))
128
return new EBCDIC_Lexer(e, l);
129
else EncodingError("EBCDIC family", e->encoding, lgth);
133
template <CodeUnit_Base C>
134
Lexer<C>::Lexer(Entity_Info * e, LexicalStreamSet *l) : Lexer_Interface::Lexer_Interface(e, l) {
137
UTF_8_Lexer::UTF_8_Lexer(Entity_Info * e, LexicalStreamSet *l) : Lexer<ASCII>::Lexer(e, l) {
140
ASCII_7_Lexer::ASCII_7_Lexer(Entity_Info * e, LexicalStreamSet *l) : Lexer<ASCII>::Lexer(e, l) {
143
EASCII_8_Lexer::EASCII_8_Lexer(Entity_Info * e, LexicalStreamSet *l) : Lexer<ASCII>::Lexer(e, l) {
146
U16_Lexer::U16_Lexer(Entity_Info * e, LexicalStreamSet *l) : Lexer<ASCII>::Lexer(e, l) {
149
UTF_16_Lexer::UTF_16_Lexer(Entity_Info * e, LexicalStreamSet *l) : U16_Lexer::U16_Lexer(e, l) {
152
UCS_2_Lexer::UCS_2_Lexer(Entity_Info * e, LexicalStreamSet *l) : U16_Lexer::U16_Lexer(e, l) {
155
UTF_32_Lexer::UTF_32_Lexer(Entity_Info * e, LexicalStreamSet *l) : Lexer<ASCII>::Lexer(e, l) {
158
EBCDIC_Lexer::EBCDIC_Lexer(Entity_Info * e, LexicalStreamSet *l) : Lexer<EBCDIC>::Lexer(e, l) {
161
template <CodeUnit_Base C>
162
static inline void WS_Control_Blocks(BitBlock bit[], BitBlock& WS, BitBlock& Control);
165
static inline void WS_Control_Blocks<ASCII>(BitBlock bit[], BitBlock& WS, BitBlock& Control) {
166
BitBlock temp1 = simd_or(bit[0], bit[1]);
167
BitBlock temp2 = simd_or(temp1, bit[2]);
168
#ifdef TEMPLATED_SIMD_LIB
169
Control = simd_andc(simd<1>::constant<1>(), temp2);
171
#ifndef TEMPLATED_SIMD_LIB
172
Control = simd_andc(simd_const_1(1), temp2);
174
BitBlock temp3 = simd_or(bit[2], bit[3]);
175
BitBlock temp4 = simd_or(temp1, temp3);
176
BitBlock temp5 = simd_and(bit[4], bit[5]);
177
BitBlock temp6 = simd_andc(bit[7], bit[6]);
178
BitBlock temp7 = simd_and(temp5, temp6);
179
BitBlock CR = simd_andc(temp7, temp4);
180
BitBlock temp8 = simd_andc(bit[4], bit[5]);
181
BitBlock temp9 = simd_andc(bit[6], bit[7]);
182
BitBlock temp10 = simd_and(temp8, temp9);
183
BitBlock LF = simd_andc(temp10, temp4);
184
BitBlock temp11 = simd_and(temp8, temp6);
185
BitBlock HT = simd_andc(temp11, temp4);
186
BitBlock temp12 = simd_andc(bit[2], bit[3]);
187
BitBlock temp13 = simd_andc(temp12, temp1);
188
BitBlock temp14 = simd_or(bit[4], bit[5]);
189
BitBlock temp15 = simd_or(bit[6], bit[7]);
190
BitBlock temp16 = simd_or(temp14, temp15);
191
BitBlock SP = simd_andc(temp13, temp16);
192
WS = simd_or(simd_or(CR, LF), simd_or(HT, SP));
196
static inline void WS_Control_Blocks<EBCDIC>(BitBlock bit[], BitBlock& WS, BitBlock& Control) {
197
BitBlock temp1 = simd_or(bit[0], bit[1]);
198
BitBlock temp2 = simd_or(bit[2], bit[3]);
199
BitBlock temp3 = simd_or(temp1, temp2);
200
BitBlock temp4 = simd_or(bit[4], bit[5]);
201
BitBlock temp5 = simd_or(temp3, temp4);
202
BitBlock temp6 = simd_and(bit[2], bit[3]);
203
BitBlock temp7 = simd_andc(temp6, temp1);
204
BitBlock temp8 = simd_andc(bit[5], bit[4]);
205
BitBlock temp9 = simd_and(bit[6], bit[7]);
206
BitBlock temp10 = simd_and(temp8, temp9);
207
BitBlock temp11 = simd_and(temp7, temp10);
208
BitBlock temp12 = simd_andc(temp5, temp11);
209
BitBlock temp13 = simd_andc(bit[2], bit[3]);
210
BitBlock temp14 = simd_andc(temp13, temp1);
211
BitBlock temp15 = simd_and(bit[4], bit[5]);
212
BitBlock temp16 = simd_and(temp14, temp15);
213
BitBlock temp17 = simd_andc(bit[6], bit[7]);
214
BitBlock temp18 = simd_andc(temp16, temp17);
215
BitBlock temp19 = simd_andc(temp12, temp18);
216
BitBlock temp20 = simd_andc(bit[3], bit[2]);
217
BitBlock temp21 = simd_andc(temp20, temp1);
218
BitBlock temp22 = simd_and(temp8, temp17);
219
BitBlock temp23 = simd_and(temp21, temp22);
220
BitBlock temp24 = simd_andc(temp19, temp23);
221
BitBlock temp25 = simd_or(temp1, bit[2]);
222
BitBlock temp26 = simd_or(bit[5], temp9);
223
BitBlock temp27 = simd_and(bit[4], temp26);
224
#ifdef TEMPLATED_SIMD_LIB
225
BitBlock temp28 = simd_andc(simd<1>::constant<1>(), temp4);
227
#ifndef TEMPLATED_SIMD_LIB
228
BitBlock temp28 = simd_andc(simd_const_1(1), temp4);
230
BitBlock temp29 = simd_if(bit[3], temp27, temp28);
231
BitBlock temp30 = simd_andc(temp29, temp25);
232
BitBlock temp31 = simd_andc(temp24, temp30);
233
BitBlock temp32 = simd_andc(temp15, bit[6]);
234
BitBlock temp33 = simd_and(temp7, temp32);
235
BitBlock temp34 = simd_andc(temp31, temp33);
236
BitBlock temp35 = simd_andc(temp17, temp4);
237
BitBlock temp36 = simd_and(temp7, temp35);
238
BitBlock temp37 = simd_andc(temp34, temp36);
239
BitBlock temp38 = simd_and(temp8, bit[6]);
240
BitBlock temp39 = simd_and(temp14, temp38);
241
BitBlock temp40 = simd_andc(temp37, temp39);
242
BitBlock temp41 = simd_andc(bit[4], bit[5]);
243
BitBlock temp42 = simd_andc(temp41, bit[6]);
244
BitBlock temp43 = simd_and(temp21, temp42);
245
BitBlock temp44 = simd_andc(temp40, temp43);
246
BitBlock temp45 = simd_and(temp15, temp9);
247
BitBlock temp46 = simd_and(temp7, temp45);
248
BitBlock temp47 = simd_andc(temp44, temp46);
249
BitBlock temp48 = simd_and(temp21, temp15);
250
BitBlock temp49 = simd_andc(temp47, temp48);
251
#ifdef TEMPLATED_SIMD_LIB
252
Control = simd_andc(simd<1>::constant<1>(), temp49);
254
#ifndef TEMPLATED_SIMD_LIB
255
Control = simd_andc(simd_const_1(1), temp49);
257
BitBlock temp50 = simd_andc(bit[7], bit[6]);
258
BitBlock temp51 = simd_and(temp15, temp50);
259
BitBlock CR = simd_andc(temp51, temp3);
260
BitBlock temp52 = simd_and(temp8, temp50);
261
BitBlock LF = simd_and(temp14, temp52);
262
BitBlock HT = simd_andc(temp52, temp3);
263
BitBlock temp53 = simd_andc(bit[1], bit[0]);
264
BitBlock temp54 = simd_andc(temp53, temp2);
265
BitBlock temp55 = simd_or(bit[6], bit[7]);
266
BitBlock temp56 = simd_or(temp4, temp55);
267
BitBlock SP = simd_andc(temp54, temp56);
268
WS = simd_or(simd_or(CR, LF), simd_or(HT, SP));
273
template <CodeUnit_Base C>
274
void Lexer<C>::Do_XML_10_WS_Control() {
275
BitBlock Control, WS;
276
for (int i = 0; i < buffer_blocks; i++) {
277
WS_Control_Blocks<C>(x8basis[i].bit,
280
parsing_engine_data->item_stream[NonWS][i] = simd_not(WS);
281
validation_stream[i] = simd_andc(Control, WS);
287
template <CodeUnit_Base C>
288
static inline void ComputeLexicalItemBlocks(BitBlock bit[], BitBlock LexItem[]);
290
/* Given the bit[] array of one BitBlock each for the 8 bits of
291
an ASCII-family character representation, compute the parallel
292
lexical item streams needed for XML parsing.
294
WARNING: the following is generated code by charset_compiler.py.
300
static inline void ComputeLexicalItemBlocks<ASCII>(BitBlock bit[], BitBlock LexItem[]) {
301
BitBlock temp1 = simd_or(bit[0], bit[1]);
302
BitBlock temp2 = simd_and(bit[2], bit[3]);
303
BitBlock temp3 = simd_andc(temp2, temp1);
304
BitBlock temp4 = simd_and(bit[4], bit[5]);
305
BitBlock temp5 = simd_or(bit[6], bit[7]);
306
BitBlock temp6 = simd_andc(temp4, temp5);
307
BitBlock temp7 = simd_and(temp3, temp6);
308
BitBlock temp8 = simd_andc(bit[2], bit[3]);
309
BitBlock temp9 = simd_andc(temp8, temp1);
310
BitBlock temp10 = simd_andc(bit[5], bit[4]);
311
BitBlock temp11 = simd_andc(bit[6], bit[7]);
312
BitBlock temp12 = simd_and(temp10, temp11);
313
BitBlock temp13 = simd_and(temp9, temp12);
314
LexItem[MarkupStart] = simd_or(temp7, temp13);
315
BitBlock temp14 = simd_and(temp4, temp11);
316
BitBlock RAngle = simd_and(temp3, temp14);
317
BitBlock temp15 = simd_andc(bit[1], bit[0]);
318
BitBlock temp16 = simd_andc(bit[3], bit[2]);
319
BitBlock temp17 = simd_and(temp15, temp16);
320
BitBlock temp18 = simd_andc(bit[7], bit[6]);
321
BitBlock temp19 = simd_and(temp4, temp18);
322
BitBlock RBracket = simd_and(temp17, temp19);
323
LexItem[Hyphen] = simd_and(temp9, temp19);
324
BitBlock temp20 = simd_and(bit[6], bit[7]);
325
BitBlock temp21 = simd_and(temp4, temp20);
326
LexItem[QMark] = simd_and(temp3, temp21);
327
BitBlock temp22 = simd_or(bit[4], bit[5]);
328
BitBlock temp23 = simd_andc(temp11, temp22);
329
BitBlock temp24 = simd_and(temp10, temp20);
330
BitBlock temp25 = simd_or(temp23, temp24);
331
BitBlock temp26 = simd_and(temp9, temp25);
332
BitBlock temp27 = simd_or(temp26, temp7);
333
LexItem[Quote] = simd_or(temp27, temp13);
334
BitBlock temp28 = simd_and(bit[5], temp5);
335
BitBlock temp29 = simd_and(bit[4], temp28);
336
BitBlock temp30 = simd_andc(temp29, temp21);
337
BitBlock temp31 = simd_andc(temp9, temp30);
338
BitBlock temp32 = simd_and(temp3, bit[4]);
339
BitBlock temp33 = simd_or(bit[5], temp20);
340
BitBlock temp34 = simd_and(temp32, temp33);
341
BitBlock temp35 = simd_or(temp31, temp34);
342
BitBlock temp36 = simd_and(temp17, bit[4]);
343
#ifdef TEMPLATED_SIMD_LIB
344
BitBlock temp37 = simd_andc(simd<1>::constant<1>(), temp20);
346
#ifndef TEMPLATED_SIMD_LIB
347
BitBlock temp37 = simd_andc(simd_const_1(1), temp20);
349
BitBlock temp38 = simd_if(bit[5], temp37, temp20);
350
BitBlock temp39 = simd_and(temp36, temp38);
351
BitBlock temp40 = simd_or(temp35, temp39);
352
BitBlock temp41 = simd_and(temp15, temp2);
353
BitBlock temp42 = simd_and(temp41, bit[4]);
354
BitBlock temp43 = simd_and(temp42, temp38);
355
LexItem[NameFollow] = simd_or(temp40, temp43);
356
#ifdef DIGIT_AND_HEX_ITEMS
357
BitBlock temp44 = simd_or(bit[5], bit[6]);
358
BitBlock temp45 = simd_and(bit[4], temp44);
359
BitBlock Digit = simd_andc(temp3, temp45);
360
BitBlock temp46 = simd_or(bit[2], bit[3]);
361
BitBlock temp47 = simd_andc(temp15, temp46);
362
BitBlock temp48 = simd_andc(temp47, bit[4]);
363
BitBlock temp49 = simd_if(bit[5], temp37, temp5);
364
BitBlock temp50 = simd_and(temp48, temp49);
365
BitBlock temp51 = simd_or(Digit, temp50);
366
BitBlock temp52 = simd_and(temp15, temp8);
367
BitBlock temp53 = simd_andc(temp52, bit[4]);
368
BitBlock temp54 = simd_and(temp53, temp49);
369
BitBlock Hex = simd_or(temp51, temp54);
370
LexItem[NonDigit] = simd_not(Digit);
371
LexItem[NonHex] = simd_not(Hex);
373
#ifdef MARKUP_SORTING
374
BitBlock temp55 = simd_andc(temp20, temp22);
375
BitBlock temp56 = simd_or(temp12, temp55);
376
BitBlock temp57 = simd_or(temp56, temp21);
377
LexItem[AmpHashSlash] = simd_and(temp9, temp57);
380
/* Mark potential occurrences of ']]>' These are all actual
381
occurrences of ]]> as well as occurrences of ]] or ] at
382
the block end. Shifting the RBracket and RAngle streams in
383
negated forms ensures that a potential CD_End is not ruled
384
out at the block boundary. */
385
LexItem[CD_End_check] = simd_andc(RBracket,
386
simd_or(sisd_sbli(simd_not(RBracket), 1),
387
sisd_sbli(simd_not(RAngle), 2)));
388
#ifndef OMIT_CD_End_check_In_Markup_Scan
389
LexItem[MarkupStart] = simd_or(LexItem[MarkupStart], LexItem[CD_End_check]);
394
static inline void ComputeLexicalItemBlocks<EBCDIC>(BitBlock bit[], BitBlock LexItem[]) {
395
BitBlock temp1 = simd_andc(bit[1], bit[0]);
396
BitBlock temp2 = simd_or(bit[2], bit[3]);
397
BitBlock temp3 = simd_andc(temp1, temp2);
398
BitBlock temp4 = simd_and(bit[4], bit[5]);
399
BitBlock temp5 = simd_or(bit[6], bit[7]);
400
BitBlock temp6 = simd_andc(temp4, temp5);
401
BitBlock temp7 = simd_and(temp3, temp6);
402
BitBlock temp8 = simd_andc(bit[3], bit[2]);
403
BitBlock temp9 = simd_and(temp1, temp8);
404
BitBlock temp10 = simd_or(bit[4], bit[5]);
405
BitBlock temp11 = simd_or(temp10, temp5);
406
BitBlock temp12 = simd_andc(temp9, temp11);
407
LexItem[MarkupStart] = simd_or(temp7, temp12);
408
BitBlock temp13 = simd_andc(bit[2], bit[3]);
409
BitBlock temp14 = simd_and(temp1, temp13);
410
BitBlock temp15 = simd_andc(bit[6], bit[7]);
411
BitBlock temp16 = simd_and(temp4, temp15);
412
BitBlock RAngle = simd_and(temp14, temp16);
413
BitBlock temp17 = simd_andc(bit[0], bit[1]);
414
BitBlock temp18 = simd_and(bit[2], bit[3]);
415
BitBlock temp19 = simd_and(temp17, temp18);
416
BitBlock temp20 = simd_andc(bit[4], bit[5]);
417
BitBlock temp21 = simd_and(bit[6], bit[7]);
418
BitBlock temp22 = simd_and(temp20, temp21);
419
BitBlock RBracket = simd_and(temp19, temp22);
420
LexItem[Hyphen] = simd_andc(temp14, temp11);
421
BitBlock temp23 = simd_and(temp4, temp21);
422
BitBlock QMark = simd_and(temp14, temp23);
423
BitBlock temp24 = simd_and(temp1, temp18);
424
BitBlock temp25 = simd_and(temp4, bit[7]);
425
BitBlock temp26 = simd_and(temp24, temp25);
426
BitBlock temp27 = simd_or(temp26, temp7);
427
LexItem[Quote] = simd_or(temp27, temp12);
428
BitBlock temp28 = simd_andc(temp3, temp11);
429
BitBlock temp29 = simd_and(temp20, temp15);
430
BitBlock temp30 = simd_and(temp9, temp29);
431
BitBlock temp31 = simd_or(temp28, temp30);
432
BitBlock temp32 = simd_and(temp24, temp23);
433
BitBlock temp33 = simd_or(temp31, temp32);
434
BitBlock temp34 = simd_and(temp24, temp22);
435
BitBlock temp35 = simd_or(temp33, temp34);
436
BitBlock temp36 = simd_and(temp9, temp22);
437
BitBlock temp37 = simd_or(temp35, temp36);
438
BitBlock temp38 = simd_and(temp14, temp6);
439
BitBlock temp39 = simd_or(temp37, temp38);
440
BitBlock temp40 = simd_or(temp39, temp12);
441
BitBlock temp41 = simd_andc(bit[7], bit[6]);
442
BitBlock temp42 = simd_and(temp4, temp41);
443
BitBlock temp43 = simd_and(temp24, temp42);
444
BitBlock temp44 = simd_or(temp40, temp43);
445
BitBlock temp45 = simd_and(temp3, temp42);
446
BitBlock temp46 = simd_or(temp44, temp45);
447
BitBlock temp47 = simd_and(temp9, temp42);
448
BitBlock temp48 = simd_or(temp46, temp47);
449
BitBlock temp49 = simd_and(temp9, temp6);
450
BitBlock temp50 = simd_or(temp48, temp49);
451
BitBlock temp51 = simd_and(temp3, temp16);
452
BitBlock temp52 = simd_or(temp50, temp51);
453
BitBlock temp53 = simd_and(temp14, temp22);
454
BitBlock temp54 = simd_or(temp52, temp53);
455
BitBlock temp55 = simd_andc(temp41, temp10);
456
BitBlock temp56 = simd_and(temp14, temp55);
457
BitBlock temp57 = simd_or(temp54, temp56);
458
BitBlock temp58 = simd_and(temp9, temp16);
459
BitBlock temp59 = simd_or(temp57, temp58);
460
BitBlock temp60 = simd_or(temp59, temp7);
461
BitBlock temp61 = simd_and(temp24, temp16);
462
BitBlock temp62 = simd_or(temp60, temp61);
463
BitBlock temp63 = simd_or(temp62, RAngle);
464
BitBlock temp64 = simd_or(temp63, QMark);
465
BitBlock temp65 = simd_and(temp19, temp29);
466
BitBlock temp66 = simd_or(temp64, temp65);
467
BitBlock temp67 = simd_and(bit[0], bit[1]);
468
BitBlock temp68 = simd_and(temp67, temp13);
469
BitBlock temp69 = simd_andc(temp68, temp11);
470
BitBlock temp70 = simd_or(temp66, temp69);
471
BitBlock temp71 = simd_or(temp70, RBracket);
472
BitBlock temp72 = simd_andc(temp19, temp11);
473
BitBlock temp73 = simd_or(temp71, temp72);
474
BitBlock temp74 = simd_andc(temp67, temp2);
475
BitBlock temp75 = simd_andc(temp74, temp11);
476
BitBlock temp76 = simd_or(temp73, temp75);
477
BitBlock temp77 = simd_and(temp3, temp23);
478
BitBlock temp78 = simd_or(temp76, temp77);
479
BitBlock temp79 = simd_and(temp67, temp8);
480
BitBlock temp80 = simd_andc(temp79, temp11);
481
BitBlock temp81 = simd_or(temp78, temp80);
482
BitBlock temp82 = simd_and(temp17, temp13);
483
BitBlock temp83 = simd_and(temp82, temp55);
484
LexItem[NameFollow] = simd_or(temp81, temp83);
485
#ifdef DIGIT_AND_HEX_ITEMS
486
BitBlock temp84 = simd_and(temp67, temp18);
487
BitBlock temp85 = simd_andc(temp11, temp55);
488
BitBlock temp86 = simd_andc(temp15, temp10);
489
BitBlock temp87 = simd_andc(temp85, temp86);
490
BitBlock temp88 = simd_andc(temp21, temp10);
491
BitBlock temp89 = simd_andc(temp87, temp88);
492
BitBlock temp90 = simd_andc(bit[5], bit[4]);
493
BitBlock temp91 = simd_andc(temp90, temp5);
494
BitBlock temp92 = simd_andc(temp89, temp91);
495
BitBlock temp93 = simd_and(temp90, temp41);
496
BitBlock temp94 = simd_andc(temp92, temp93);
497
BitBlock temp95 = simd_and(temp90, temp15);
498
BitBlock temp96 = simd_andc(temp94, temp95);
499
BitBlock temp97 = simd_and(temp90, temp21);
500
BitBlock temp98 = simd_andc(temp96, temp97);
501
BitBlock temp99 = simd_andc(temp20, temp5);
502
BitBlock temp100 = simd_andc(temp98, temp99);
503
BitBlock temp101 = simd_and(temp20, temp41);
504
BitBlock temp102 = simd_andc(temp100, temp101);
505
BitBlock Digit = simd_andc(temp84, temp102);
506
BitBlock temp103 = simd_and(temp74, temp55);
507
BitBlock temp104 = simd_or(Digit, temp103);
508
BitBlock temp105 = simd_and(temp74, temp86);
509
BitBlock temp106 = simd_or(temp104, temp105);
510
BitBlock temp107 = simd_and(temp74, temp88);
511
BitBlock temp108 = simd_or(temp106, temp107);
512
BitBlock temp109 = simd_and(temp74, temp91);
513
BitBlock temp110 = simd_or(temp108, temp109);
514
BitBlock temp111 = simd_and(temp74, temp93);
515
BitBlock temp112 = simd_or(temp110, temp111);
516
BitBlock temp113 = simd_and(temp74, temp95);
517
BitBlock temp114 = simd_or(temp112, temp113);
518
BitBlock temp115 = simd_andc(temp17, temp2);
519
BitBlock temp116 = simd_and(temp115, temp55);
520
BitBlock temp117 = simd_or(temp114, temp116);
521
BitBlock temp118 = simd_and(temp115, temp86);
522
BitBlock temp119 = simd_or(temp117, temp118);
523
BitBlock temp120 = simd_and(temp115, temp88);
524
BitBlock temp121 = simd_or(temp119, temp120);
525
BitBlock temp122 = simd_and(temp115, temp91);
526
BitBlock temp123 = simd_or(temp121, temp122);
527
BitBlock temp124 = simd_and(temp115, temp93);
528
BitBlock temp125 = simd_or(temp123, temp124);
529
BitBlock temp126 = simd_and(temp115, temp95);
530
BitBlock Hex = simd_or(temp125, temp126);
532
LexItem[NonDigit] = simd_not(Digit);
533
LexItem[NonHex] = simd_not(Hex);
537
/* Mark potential occurrences of ']]>' These are all actual
538
occurrences of ]]> as well as occurrences of ]] or ] at
539
the block end. Shifting the RBracket and RAngle streams in
540
negated forms ensures that a potential CD_End is not ruled
541
out at the block boundary. */
542
LexItem[CD_End_check] = simd_andc(RBracket,
543
simd_or(sisd_sbli(simd_not(RBracket), 1),
544
sisd_sbli(simd_not(RAngle), 2)));
545
#ifndef OMIT_CD_End_check_In_Markup_Scan
546
LexItem[MarkupStart] = simd_or(LexItem[MarkupStart], LexItem[CD_End_check]);
551
/* A temporary structure for internal use in ComputeLexicalItemStreams. */
553
BitBlock LexicalItems[LexicalItemCount];
559
template <CodeUnit_Base C>
560
void Lexer<C>::Do_MarkupStreams() {
561
LexicalItemBlock lx_blk[BUFFER_BLOCKS];
562
for (int i = 0; i < buffer_blocks; i++) {
563
ComputeLexicalItemBlocks<C>(x8basis[i].bit, lx_blk[i].LexicalItems);
565
/* NonWS stream already completed by WS_Control method. */
566
for (int j = MarkupStart; j < LexicalItemCount; j++) {
567
for (int i = 0; i < buffer_blocks; i++) {
568
parsing_engine_data->item_stream[j][i] = lx_blk[i].LexicalItems[j];
571
for (int i = 0; i < buffer_blocks; i++) {
572
parsing_engine_data->item_stream[NameFollow][i] =
573
simd_or(parsing_engine_data->item_stream[NameFollow][i],
574
simd_not(parsing_engine_data->item_stream[NonWS][i]));
579
void UTF_8_Lexer::Do_CharsetValidation() {
580
BitBlock u8prefix, u8suffix, u8prefix2, u8prefix3or4, u8prefix3, u8prefix4;
582
/* UTF-8 sequences may cross block boundaries. If a
583
prefix is found near the end of a block that requires
584
one or more suffixes in the next block, then
585
prefix_pending is set to mark the positions.
586
However, at the beginning of the buffer, no suffixes
587
are expected, so this value is initialized to zeroes. */
588
#ifdef TEMPLATED_SIMD_LIB
589
BitBlock prefix_pending = simd<1>::constant<0>();
590
/* If a suffix is pending, then it may involve one of
591
the special case prefixes E0, ED. F0, F4, or the
592
EF prefix or EF_BF combination for FFFF/FFFE detection.*/
593
BitBlock E0ED_pending = simd<1>::constant<0>();
594
BitBlock F0F4_pending = simd<1>::constant<0>();
595
BitBlock bit5_pending = simd<1>::constant<0>();
596
BitBlock EF_pending = simd<1>::constant<0>();
597
BitBlock EF_BF_pending = simd<1>::constant<0>();
599
#ifndef TEMPLATED_SIMD_LIB
600
BitBlock prefix_pending = simd_const_1(0);
601
/* If a suffix is pending, then it may involve one of
602
the special case prefixes E0, ED. F0, F4, or the
603
EF prefix or EF_BF combination for FFFF/FFFE detection.*/
604
BitBlock E0ED_pending = simd_const_1(0);
605
BitBlock F0F4_pending = simd_const_1(0);
606
BitBlock bit5_pending = simd_const_1(0);
607
BitBlock EF_pending = simd_const_1(0);
608
BitBlock EF_BF_pending = simd_const_1(0);
610
/* Temporary variables used within the block. */
611
BitBlock suffix_required_scope;
612
BitBlock prefix_E0ED, E0ED_scope, bit5_scope, E0ED_constraint;
613
BitBlock prefix_F5FF, prefix_F0F4, F0F4_scope, F0F4_constraint;
614
BitBlock X111x, B111x, prefix_EF, BF, EF_BF, EF_scope, EF_BF_scope;
616
for (int i = 0; i < buffer_blocks; i++) {
617
#ifdef TEMPLATED_SIMD_LIB
618
validation_stream[i] = simd<1>::constant<0>();
620
#ifndef TEMPLATED_SIMD_LIB
621
validation_stream[i] = simd_const_1(0);
623
/* If there is no pending suffix and no bit 0, then there
624
are no possible validation issues for this block. */
625
if (!bitblock_has_bit(simd_or(prefix_pending, x8basis[i].bit[0])))
627
/* Compute classifications of UTF-8 bytes. */
628
u8prefix = simd_and(x8basis[i].bit[0], x8basis[i].bit[1]);
629
u8suffix = simd_andc(x8basis[i].bit[0], x8basis[i].bit[1]);
630
u8prefix3or4 = simd_and(u8prefix, x8basis[i].bit[2]);
631
u8prefix2 = simd_andc(u8prefix, x8basis[i].bit[2]);
632
u8prefix3 = simd_andc(u8prefix3or4, x8basis[i].bit[3]);
633
u8prefix4 = simd_and(u8prefix3or4, x8basis[i].bit[3]);
635
/* Initiate validation for two-byte sequences. */
636
error_mask = simd_andc(u8prefix2,
637
simd_or(simd_or(x8basis[i].bit[3], x8basis[i].bit[4]),
638
simd_or(x8basis[i].bit[5], x8basis[i].bit[6])));
639
suffix_required_scope = simd_or(prefix_pending, sisd_sfli(u8prefix, 1));
641
prefix_pending = sisd_sbli(u8prefix, BLOCKSIZE - 1);
642
E0ED_scope = E0ED_pending;
643
F0F4_scope = F0F4_pending;
644
bit5_scope = bit5_pending;
645
EF_scope = EF_pending;
646
EF_BF_scope = EF_BF_pending;
648
/* Default values of pending variables for next iteration. */
649
#ifdef TEMPLATED_SIMD_LIB
650
E0ED_pending = simd<1>::constant<0>();
651
F0F4_pending = simd<1>::constant<0>();
652
bit5_pending = simd<1>::constant<0>();
653
EF_pending = simd<1>::constant<0>();
654
EF_BF_pending = simd<1>::constant<0>();
656
#ifndef TEMPLATED_SIMD_LIB
657
E0ED_pending = simd_const_1(0);
658
F0F4_pending = simd_const_1(0);
659
bit5_pending = simd_const_1(0);
660
EF_pending = simd_const_1(0);
661
EF_BF_pending = simd_const_1(0);
664
X111x = simd_and(simd_and(x8basis[i].bit[4], x8basis[i].bit[5]), x8basis[i].bit[6]);
665
B111x = simd_and(simd_and(u8suffix, simd_and(x8basis[i].bit[2], x8basis[i].bit[3])),
667
BF = simd_and(B111x, x8basis[i].bit[7]);
668
EF_BF = simd_and(EF_scope, BF);
670
if (bitblock_has_bit(u8prefix3or4)) {
671
/* Extend validation for errors in three-byte sequences. */
672
suffix_required_scope = simd_or(suffix_required_scope,
673
sisd_sfli(u8prefix3or4, 2));
674
bit5_scope = simd_or(bit5_scope, sisd_sfli(x8basis[i].bit[5], 1));
675
prefix_E0ED = simd_andc(u8prefix3,
676
simd_or(simd_or(x8basis[i].bit[6],
677
simd_xor(x8basis[i].bit[4], x8basis[i].bit[7])),
678
simd_xor(x8basis[i].bit[4], x8basis[i].bit[5])));
679
E0ED_scope = simd_or(E0ED_scope, sisd_sfli(prefix_E0ED, 1));
680
prefix_EF = simd_and(u8prefix3, simd_and(X111x, x8basis[i].bit[7]));
681
EF_scope = simd_or(EF_scope, sisd_sfli(prefix_EF, 1));
682
EF_BF = simd_and(EF_scope, BF);
684
/* Values for next iteration. */
685
prefix_pending = simd_or(prefix_pending,
686
sisd_sbli(u8prefix3or4, BLOCKSIZE - 2));
687
bit5_pending = sisd_sbli(x8basis[i].bit[5], BLOCKSIZE - 1);
688
E0ED_pending = sisd_sbli(prefix_E0ED, BLOCKSIZE - 1);
689
EF_pending = sisd_sbli(prefix_EF, BLOCKSIZE - 1);
690
EF_BF_pending = sisd_sbli(EF_BF, BLOCKSIZE - 2);
691
if (bitblock_has_bit(u8prefix4)) {
692
/* Extend validation for errors in four-byte sequences. */
693
suffix_required_scope = simd_or(suffix_required_scope,
694
sisd_sfli(u8prefix4, 3));
695
prefix_pending = simd_or(prefix_pending,
696
sisd_sbli(u8prefix4, BLOCKSIZE - 3));
697
prefix_F5FF = simd_and(u8prefix4,
698
simd_or(x8basis[i].bit[4],
699
simd_and(x8basis[i].bit[5],
700
simd_or(x8basis[i].bit[6], x8basis[i].bit[7]))));
701
error_mask = simd_or(error_mask, prefix_F5FF);
702
prefix_F0F4 = simd_andc(u8prefix4,
703
simd_or(x8basis[i].bit[4],
704
simd_or(x8basis[i].bit[6], x8basis[i].bit[7])));
705
F0F4_scope = simd_or(F0F4_scope, sisd_sfli(prefix_F0F4, 1));
706
F0F4_pending = sisd_sbli(prefix_F0F4, BLOCKSIZE - 1);
709
E0ED_constraint = simd_xor(bit5_scope, x8basis[i].bit[2]);
710
error_mask = simd_or(error_mask, simd_andc(E0ED_scope, E0ED_constraint));
711
F0F4_constraint = simd_xor(bit5_scope,
712
simd_or(x8basis[i].bit[2], x8basis[i].bit[3]));
713
error_mask = simd_or(error_mask, simd_andc(F0F4_scope, F0F4_constraint));
714
/* Complete validation by checking for prefix-suffix mismatches. */
715
error_mask = simd_or(error_mask, simd_xor(suffix_required_scope, u8suffix));
717
EF_BF_scope = simd_or(EF_BF_scope, sisd_sfli(EF_BF, 1));
718
error_mask = simd_or(error_mask, simd_and(EF_BF_scope, B111x));
719
validation_stream[i] = error_mask;
720
#ifdef DEBUG_UTF8_VALIDATION
721
// if (bitblock_has_bit(error_mask)) {
722
printf("-%i----------------------\n", i);
723
print_bit_block("x8basis[i].bit[0]", x8basis[i].bit[0]);
724
print_bit_block("x8basis[i].bit[1]", x8basis[i].bit[1]);
725
print_bit_block("x8basis[i].bit[2]", x8basis[i].bit[2]);
726
print_bit_block("x8basis[i].bit[3]", x8basis[i].bit[3]);
727
print_bit_block("u8prefix2", u8prefix2);
728
print_bit_block("u8prefix3", u8prefix3);
729
print_bit_block("u8prefix4", u8prefix4);
730
print_bit_block("suffix_required_scope", suffix_required_scope);
731
print_bit_block("prefix_pending", prefix_pending);
732
print_bit_block("E0ED_pending", E0ED_pending);
733
print_bit_block("F0F4_pending", F0F4_pending);
734
print_bit_block("bit5_pending", bit5_pending);
735
print_bit_block("error_mask", error_mask);
743
void ASCII_7_Lexer::Do_CharsetValidation() {
744
for (int blk = 0; blk < buffer_blocks; blk++) {
745
validation_stream[blk] = x8basis[blk].bit[0];
750
void EASCII_8_Lexer::Do_CharsetValidation() {
751
/* Nothing required for most charsets - but perhaps should have tables. */
752
for (int i = 0; i < buffer_blocks; i++) {
753
#ifdef TEMPLATED_SIMD_LIB
754
validation_stream[i] = simd<1>::constant<0>();
756
#ifndef TEMPLATED_SIMD_LIB
757
validation_stream[i] = simd_const_1(0);
763
void UTF_16_Lexer::Do_CharsetValidation() {
767
void UCS_2_Lexer::Do_CharsetValidation() {
771
void UTF_32_Lexer::Do_CharsetValidation() {
775
void EBCDIC_Lexer::Do_CharsetValidation() {
776
/* Nothing required for most cases - but perhaps should have tables. */
777
for (int i = 0; i < buffer_blocks; i++) {
778
#ifdef TEMPLATED_SIMD_LIB
779
validation_stream[i] = simd<1>::constant<0>();
781
#ifndef TEMPLATED_SIMD_LIB
782
validation_stream[i] = simd_const_1(0);
789
/* Stub out XML 1.1 routines initially. */
791
void UTF_8_Lexer::Do_XML_11_WS_Control() {
792
printf("UTF_8_Lexer::Do_XML_11_WS_Control not yet implemented; using XML 1.0 rules.\n");
793
Do_XML_10_WS_Control();
797
static inline void ASCII_7_WS_Control_Blocks_11(BitBlock bit[], BitBlock& WS, BitBlock& Control) {
798
BitBlock temp1 = simd_or(bit[0], bit[1]);
799
BitBlock temp2 = simd_or(temp1, bit[2]);
800
BitBlock temp3 = simd_andc(bit[1], bit[0]);
801
BitBlock temp4 = simd_and(bit[2], bit[3]);
802
BitBlock temp5 = simd_and(temp3, temp4);
803
BitBlock temp6 = simd_and(bit[4], bit[5]);
804
BitBlock temp7 = simd_and(bit[6], bit[7]);
805
BitBlock temp8 = simd_and(temp6, temp7);
806
BitBlock temp9 = simd_and(temp5, temp8);
807
BitBlock temp10 = simd_andc(temp2, temp9);
808
BitBlock temp11 = simd_andc(temp10, bit[0]);
809
#ifdef TEMPLATED_SIMD_LIB
810
Control = simd_andc(simd<1>::constant<1>(), temp11);
812
#ifndef TEMPLATED_SIMD_LIB
813
Control = simd_andc(simd_const_1(1), temp11);
815
BitBlock temp12 = simd_or(bit[2], bit[3]);
816
BitBlock temp13 = simd_or(temp1, temp12);
817
BitBlock temp14 = simd_andc(bit[7], bit[6]);
818
BitBlock temp15 = simd_and(temp6, temp14);
819
BitBlock CR = simd_andc(temp15, temp13);
820
BitBlock temp16 = simd_andc(bit[4], bit[5]);
821
BitBlock temp17 = simd_andc(bit[6], bit[7]);
822
BitBlock temp18 = simd_and(temp16, temp17);
823
BitBlock LF = simd_andc(temp18, temp13);
824
BitBlock temp19 = simd_and(temp16, temp14);
825
BitBlock HT = simd_andc(temp19, temp13);
826
BitBlock temp20 = simd_andc(bit[2], bit[3]);
827
BitBlock temp21 = simd_andc(temp20, temp1);
828
BitBlock temp22 = simd_or(bit[4], bit[5]);
829
BitBlock temp23 = simd_or(bit[6], bit[7]);
830
BitBlock temp24 = simd_or(temp22, temp23);
831
BitBlock SP = simd_andc(temp21, temp24);
832
WS = simd_or(simd_or(CR, LF), simd_or(HT, SP));
836
void ASCII_7_Lexer::Do_XML_11_WS_Control() {
837
BitBlock WS, Control;
838
for (int i = 0; i < buffer_blocks; i++) {
839
ASCII_7_WS_Control_Blocks_11(x8basis[i].bit, WS, Control);
840
parsing_engine_data->item_stream[NonWS][i] = simd_not(WS);
841
validation_stream[i] = simd_andc(Control, WS);
845
static inline void EASCII_8_WS_Control_Blocks_11(BitBlock bit[], BitBlock& WS, BitBlock& Control) {
846
BitBlock temp1 = simd_or(bit[0], bit[1]);
847
BitBlock temp2 = simd_or(temp1, bit[2]);
848
BitBlock temp3 = simd_andc(bit[1], bit[0]);
849
BitBlock temp4 = simd_and(bit[2], bit[3]);
850
BitBlock temp5 = simd_and(temp3, temp4);
851
BitBlock temp6 = simd_and(bit[4], bit[5]);
852
BitBlock temp7 = simd_and(bit[6], bit[7]);
853
BitBlock temp8 = simd_and(temp6, temp7);
854
BitBlock temp9 = simd_and(temp5, temp8);
855
BitBlock temp10 = simd_andc(temp2, temp9);
856
BitBlock temp11 = simd_andc(bit[0], bit[1]);
857
BitBlock temp12 = simd_andc(temp11, bit[2]);
858
BitBlock temp13 = simd_andc(temp10, temp12);
859
#ifdef TEMPLATED_SIMD_LIB
860
Control = simd_andc(simd<1>::constant<1>(), temp13);
862
#ifndef TEMPLATED_SIMD_LIB
863
Control = simd_andc(simd_const_1(1), temp13);
865
BitBlock temp14 = simd_or(bit[2], bit[3]);
866
BitBlock temp15 = simd_or(temp1, temp14);
867
BitBlock temp16 = simd_andc(bit[7], bit[6]);
868
BitBlock temp17 = simd_and(temp6, temp16);
869
BitBlock CR = simd_andc(temp17, temp15);
870
BitBlock temp18 = simd_andc(bit[4], bit[5]);
871
BitBlock temp19 = simd_andc(bit[6], bit[7]);
872
BitBlock temp20 = simd_and(temp18, temp19);
873
BitBlock LF = simd_andc(temp20, temp15);
874
BitBlock temp21 = simd_and(temp18, temp16);
875
BitBlock HT = simd_andc(temp21, temp15);
876
BitBlock temp22 = simd_andc(bit[2], bit[3]);
877
BitBlock temp23 = simd_andc(temp22, temp1);
878
BitBlock temp24 = simd_or(bit[4], bit[5]);
879
BitBlock temp25 = simd_or(bit[6], bit[7]);
880
BitBlock temp26 = simd_or(temp24, temp25);
881
BitBlock SP = simd_andc(temp23, temp26);
882
BitBlock temp27 = simd_andc(temp11, temp14);
883
BitBlock temp28 = simd_andc(bit[5], bit[4]);
884
BitBlock temp29 = simd_and(temp28, temp16);
885
BitBlock NEL = simd_and(temp27, temp29);
886
WS = simd_or(simd_or(simd_or(CR, LF), simd_or(HT, SP)), NEL);
889
void EASCII_8_Lexer::Do_XML_11_WS_Control() {
890
BitBlock WS, Control;
891
for (int i = 0; i < buffer_blocks; i++) {
892
EASCII_8_WS_Control_Blocks_11(x8basis[i].bit, WS, Control);
893
parsing_engine_data->item_stream[NonWS][i] = simd_not(WS);
894
validation_stream[i] = simd_andc(Control, WS);
899
void U16_Lexer::Do_XML_11_WS_Control() {
900
printf("U16_Lexer::Do_XML_11_WS_Control not yet implemented; using XML 1.0 rules.\n");
901
Do_XML_10_WS_Control();
905
void UTF_32_Lexer::Do_XML_11_WS_Control() {
906
printf("UTF_32_Lexer::Do_XML_11_WS_Control not yet implemented; using XML 1.0 rules.\n");
907
Do_XML_10_WS_Control();
910
static inline void EBCDIC_WS_Control_Blocks_11(BitBlock bit[], BitBlock& WS, BitBlock& Control) {
911
BitBlock temp1 = simd_or(bit[0], bit[1]);
912
BitBlock temp2 = simd_and(bit[0], bit[1]);
913
BitBlock temp3 = simd_and(bit[2], bit[3]);
914
BitBlock temp4 = simd_and(temp2, temp3);
915
BitBlock temp5 = simd_and(bit[4], bit[5]);
916
BitBlock temp6 = simd_and(bit[6], bit[7]);
917
BitBlock temp7 = simd_and(temp5, temp6);
918
BitBlock temp8 = simd_and(temp4, temp7);
919
BitBlock temp9 = simd_andc(temp1, temp8);
920
#ifdef TEMPLATED_SIMD_LIB
921
Control = simd_andc(simd<1>::constant<1>(), temp9);
923
#ifndef TEMPLATED_SIMD_LIB
924
Control = simd_andc(simd_const_1(1), temp9);
926
BitBlock temp10 = simd_or(bit[2], bit[3]);
927
BitBlock temp11 = simd_or(temp1, temp10);
928
BitBlock temp12 = simd_andc(bit[7], bit[6]);
929
BitBlock temp13 = simd_and(temp5, temp12);
930
BitBlock CR = simd_andc(temp13, temp11);
931
BitBlock temp14 = simd_andc(bit[2], bit[3]);
932
BitBlock temp15 = simd_andc(temp14, temp1);
933
BitBlock temp16 = simd_andc(bit[5], bit[4]);
934
BitBlock temp17 = simd_and(temp16, temp12);
935
BitBlock LF = simd_and(temp15, temp17);
936
BitBlock HT = simd_andc(temp17, temp11);
937
BitBlock temp18 = simd_andc(bit[1], bit[0]);
938
BitBlock temp19 = simd_andc(temp18, temp10);
939
BitBlock temp20 = simd_or(bit[4], bit[5]);
940
BitBlock temp21 = simd_or(bit[6], bit[7]);
941
BitBlock temp22 = simd_or(temp20, temp21);
942
BitBlock SP = simd_andc(temp19, temp22);
943
BitBlock temp23 = simd_andc(bit[3], bit[2]);
944
BitBlock temp24 = simd_andc(temp23, temp1);
945
BitBlock NEL = simd_and(temp24, temp17);
946
WS = simd_or(simd_or(simd_or(CR, LF), simd_or(HT, SP)), NEL);
949
void EBCDIC_Lexer::Do_XML_11_WS_Control() {
950
BitBlock WS, Control;
951
for (int i = 0; i < buffer_blocks; i++) {
952
EBCDIC_WS_Control_Blocks_11(x8basis[i].bit, WS, Control);
953
parsing_engine_data->item_stream[NonWS][i] = simd_not(WS);
954
validation_stream[i] = simd_andc(Control, WS);
960
void Lexer_Interface::AnalyzeBuffer(BitBlockBasis * basis, int base_pos, int start_pos, int buffer_limit_pos) {
962
printf("Entered AnalyzeBuffer, buffer_limit_pos = %i\n", buffer_limit_pos);
964
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BITLEX_ALL)
965
code_clocker->cc_start_interval();
968
lexer_base_pos = base_pos; /* for error reporting. */
970
buffer_blocks = (buffer_limit_pos + BLOCKSIZE - 1)/BLOCKSIZE;
971
buffer_units = buffer_limit_pos;
972
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == CHARSET_VALIDATION)
973
code_clocker->cc_start_interval();
976
Do_CharsetValidation();
977
/* Ignore error bits before start_pos which only arise
978
due to UTF8 pending scope streams at buffer boundaries.*/
979
err_pos = bitstream_scan(validation_stream, start_pos);
980
/* Detect validation errors up to the end of file plus one more
981
position in case there is an incomplete code unit at EOF. */
982
if ((err_pos <= buffer_units) && (err_pos < BUFFER_SIZE)) {
983
// printf("start_pos =%i\n, err_pos = %i\n", start_pos, err_pos);
984
// print_bit_block("validation_stream[0]", validation_stream[0]);
986
// print_bit_block("validation_stream[err_pos/128]", validation_stream[err_pos/128]);
988
CharSetValidationError((char *) entity_Info->encoding, lexer_base_pos + err_pos);
990
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == CHARSET_VALIDATION)
991
code_clocker->cc_end_interval(buffer_units);
993
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == WS_CONTROL)
994
code_clocker->cc_start_interval();
997
if (entity_Info->version == XML_1_1) Do_XML_11_WS_Control();
998
else Do_XML_10_WS_Control();
1000
printf("Do_WS_Control() complete.\n");
1002
err_pos = bitstream_scan0(validation_stream);
1003
if (err_pos < buffer_units) XMLCharacterError(lexer_base_pos + err_pos);
1004
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == WS_CONTROL)
1005
code_clocker->cc_end_interval(buffer_units);
1007
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == MARKUP_STREAMS)
1008
code_clocker->cc_start_interval();
1011
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == MARKUP_STREAMS)
1012
code_clocker->cc_end_interval(buffer_units);
1015
printf("Do_MarkupStreams() complete.\n");
1018
if (buffer_units < BUFFER_SIZE) {
1019
#ifdef TEMPLATED_SIMD_LIB
1020
BitBlock final_block_mask =
1021
sisd_sfl(simd<1>::constant<1>(), sisd_from_int(buffer_units % BLOCKSIZE));
1023
#ifndef TEMPLATED_SIMD_LIB
1024
BitBlock final_block_mask =
1025
sisd_sfl(simd_const_1(1), sisd_from_int(buffer_units % BLOCKSIZE));
1027
int lastblk = buffer_units/BLOCKSIZE;
1028
for (int j = minLexicalItem; j < LexicalItemCount; j++) {
1029
parsing_engine_data->item_stream[j][lastblk] =
1030
simd_or(parsing_engine_data->item_stream[j][lastblk],
1034
#if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BITLEX_ALL)
1035
code_clocker->cc_end_interval(buffer_units);