1
/* contentmodel.h - Content Models from !ELEMENT declarations.
2
Copyright (c) 2008, Robert D. Cameron and Dan Lin.
3
Licensed to the public under the Open Software License 3.0.
4
Licensed to International Characters, Inc., under the Academic
8
#ifndef CONTENTMODEL_H_
9
#define CONTENTMODEL_H_
17
typedef hash_map<int, int> symbol_set_t;
20
Content Models describe the expected structure of content between
21
the start and end tags of an XML element. The following data
22
declarations represent content model information derived from
23
the XML DTD. Content models from XML Schemas or other specifications
24
are not described here.
26
The content models are declared using !ELEMENT declarations.
27
http://www.w3.org/TR/xml/#NT-elementdecl
28
[45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
29
[46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
34
cm_Empty, cm_Any, cm_Mixed, cm_RegExp};
38
ContentModel_t cm_type;
41
class CM_Any: public ContentModel {
46
class CM_Empty: public ContentModel {
51
class CM_Mixed: public ContentModel {
54
symbol_set_t elements;
58
Class Content_RE is used for content models described using regular
59
expressions in the DTD.
61
[47] children ::= (choice | seq) ('?' | '*' | '+')?
62
[48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
63
[49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
64
[50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
71
symbol_set_t first_map;
72
symbol_set_t follow_map;
73
/* Set_IDs recursively sets sequential numeric identifiers for each Name in a content model. */
74
virtual int Set_IDs(int base_ID) = 0;
75
/* Build the map of the symbols that may occur first for this content_RE. */
76
virtual void Set_First_Map() = 0;
77
/* Build the map of the symbols that may follow this content_RE. */
78
virtual void Set_Follow_Map(symbol_set_t * transition_map) = 0;
81
/* Models constructed according to the [50] seq production.
82
subCMs are the models for each cp in '(' S? cp ( S? ',' S? cp )* S? ')' */
83
class CRE_Seq : public Content_RE {
86
vector<Content_RE *> subCMs;
87
int Set_IDs(int base_ID);
90
void Set_Follow_Map(symbol_set_t * transition_map);
93
/* Models constructed according to the [49] choice production.
94
subCMs are the models for each cp in '(' S? cp ( S? '|' S? cp )+ S? ')' */
95
class CRE_Choice : public Content_RE {
98
vector<Content_RE *> subCMs;
99
int Set_IDs(int base_ID);
100
void Set_First_Map();
102
void Set_Follow_Map(symbol_set_t * transition_map);
105
/* Models constructed when '*' (Star), '+' (Plus) or '?' (Opt) is used.
106
subCM is the model for the Name, choice or seq in
107
(Name | choice | seq) ('?' | '*' | '+') */
108
class CRE_Star : public Content_RE {
110
CRE_Star(Content_RE * s);
112
int Set_IDs(int base_ID);
113
void Set_First_Map();
114
void Set_Follow_Map(symbol_set_t * transition_map);
117
class CRE_Plus : public Content_RE {
119
CRE_Plus(Content_RE * s);
121
int Set_IDs(int base_ID);
122
void Set_First_Map();
123
void Set_Follow_Map(symbol_set_t * transition_map);
126
class CRE_Opt : public Content_RE {
128
CRE_Opt(Content_RE * s);
130
int Set_IDs(int base_ID);
131
void Set_First_Map();
132
void Set_Follow_Map(symbol_set_t * transition_map);
135
class CRE_Name : public Content_RE {
140
int Set_IDs(int base_ID);
141
void Set_First_Map();
142
void Set_Follow_Map(symbol_set_t * transition_map);
145
class CM_RegExp: public ContentModel {
148
symbol_set_t * transition_map;
149
Content_RE * content_re;
152
#endif /*CONTENTMODEL_H_*/