/xmlbench/trunk

To get this branch, use:
bzr branch http://darksoft.org/webbzr/xmlbench/trunk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/*  contentmodel.h - Content Models from !ELEMENT declarations.
    Copyright (c) 2008, Robert D. Cameron and Dan Lin.
    Licensed to the public under the Open Software License 3.0.
    Licensed to International Characters, Inc., under the Academic
    Free License 3.0.
*/

#ifndef CONTENTMODEL_H_
#define CONTENTMODEL_H_

#include <iostream>
#include <string>
#include <iterator>

using namespace std;

typedef hash_map<int, int> symbol_set_t;

/*  
  Content Models describe the expected structure of content between
  the start and end tags of an XML element.   The following data
  declarations represent content model information derived from 
  the XML DTD.   Content models from XML Schemas or other specifications
  are not described here.

  The content models are declared using !ELEMENT declarations.
  http://www.w3.org/TR/xml/#NT-elementdecl
  [45] elementdecl ::= '<!ELEMENT' S  Name  S  contentspec  S? '>'
  [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children

*/

enum ContentModel_t {
	cm_Empty, cm_Any, cm_Mixed, cm_RegExp};

class ContentModel {
	public:
	ContentModel_t cm_type;
};

class CM_Any: public ContentModel {
public:
	CM_Any();
};

class CM_Empty: public ContentModel {
public:
	CM_Empty();
};

class CM_Mixed: public ContentModel {
public:
	CM_Mixed();
	symbol_set_t elements;
};

/*
  Class Content_RE is used for content models described using regular
  expressions in the DTD.

  [47] children ::=  (choice | seq) ('?' | '*' | '+')?
  [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
  [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
  [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'

*/

class Content_RE{
public:
	bool matches_empty;
	symbol_set_t first_map;
	symbol_set_t follow_map;
	/* Set_IDs recursively sets sequential numeric identifiers for each Name in a content model. */
	virtual int Set_IDs(int base_ID) = 0;
	/* Build the map of the symbols that may occur first for this content_RE. */
	virtual void Set_First_Map() = 0;
	/* Build the map of the symbols that may follow this content_RE. */
	virtual void Set_Follow_Map(symbol_set_t * transition_map) = 0;
};

/* Models constructed according to the [50] seq production.  
   subCMs are the models for each cp in '(' S? cp ( S? ',' S? cp )* S? ')' */
class CRE_Seq : public Content_RE {
public:
	CRE_Seq();
	vector<Content_RE *> subCMs;
	int Set_IDs(int base_ID);
	void Set_First_Map();
	void Compile();
	void Set_Follow_Map(symbol_set_t * transition_map);
};

/* Models constructed according to the [49] choice production.
   subCMs are the models for each cp in '(' S? cp ( S? '|' S? cp )+ S? ')' */
class CRE_Choice : public Content_RE {
public:
	CRE_Choice();
	vector<Content_RE *> subCMs;
	int Set_IDs(int base_ID);
	void Set_First_Map();
	void Compile();
	void Set_Follow_Map(symbol_set_t * transition_map);
};

/* Models constructed when '*' (Star), '+' (Plus) or '?' (Opt) is used.
   subCM is the model for the Name, choice or seq in
   (Name | choice | seq) ('?' | '*' | '+')   */
class CRE_Star : public Content_RE {
public:
	CRE_Star(Content_RE * s);
	Content_RE * subCM;
	int Set_IDs(int base_ID);
	void Set_First_Map();
	void Set_Follow_Map(symbol_set_t * transition_map);
};

class CRE_Plus : public Content_RE {
public:
	CRE_Plus(Content_RE * s);
	Content_RE * subCM;
	int Set_IDs(int base_ID);
	void Set_First_Map();
	void Set_Follow_Map(symbol_set_t * transition_map);
};

class CRE_Opt : public Content_RE {
public:
	CRE_Opt(Content_RE * s);
	Content_RE * subCM;
	int Set_IDs(int base_ID);
	void Set_First_Map();
	void Set_Follow_Map(symbol_set_t * transition_map);
};

class CRE_Name : public Content_RE {
public:
	CRE_Name(int id);
	int elemID;
	int stateID;
	int Set_IDs(int base_ID);
	void Set_First_Map();
	void Set_Follow_Map(symbol_set_t * transition_map);
};

class CM_RegExp: public ContentModel {
public:
	CM_RegExp();
	symbol_set_t  * transition_map;
	Content_RE * content_re;
};

#endif /*CONTENTMODEL_H_*/