/xmlbench/trunk

To get this branch, use:
bzr branch http://darksoft.org/webbzr/xmlbench/trunk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/*  xmlmodel.h - XML Model Processor
    Copyright (c) 2007, 2008 Robert D. Cameron
    Licensed to the public under the Open Software License 3.0.
    Licensed to International Characters, Inc., under the Academic
    Free License 3.0.

    The XML Model Processor gathers information that guides
    interpretation of an XML document as it is processed.
    This information arises from a variety of sources,
    including:
      (a) the document prolog, including
          (a1) the encoding signature,
          (a2) the XML declaration (or text declaration for
               external entities), and
          (a3) the Document Type Definition (internal and
               external subsets).
      FUTURE:
      (b) XML Schema documents (and/or Relax NG, Schematron)
      (c) XPath sets specifying information to retrieve.
*/

#ifndef XMLMODEL_H
#define XMLMODEL_H

//  Encoding signature, XML declaration processing included in xmldecl.h
#include "xmldecl.h"

#include <vector>
#include <iostream>
#include <string>
#include <ext/hash_map>

using namespace __gnu_cxx;
using namespace std;

#include "contentmodel.h"
#include "symtab.h"


/* Attribute Modeling */

enum ATT_type {CDATA_att, ID_att, IDREF_att, IDREFS_att, ENTITY_att, ENTITIES_att,
               NMTOKEN_att, NMTOKENS_att, NOTATION_att, enumeration_att};
/* Possible attribute types as specified in ATTLIST declarations. */

enum ATT_default_kind {REQUIRED_att, IMPLIED_att, FIXED_att, DEFAULT_att};
/* Possible kinds of attribute default in ATTLIST declarations. */


class ATT_info {
public:
	int globalATT_id;
	ATT_type attType;
	hash_map<int, int > enumValues; /* For NOTATION_att or enumeration_att.*/
	ATT_default_kind defaultKind;
	unsigned char * defaultValue;
	int defaultValueLgth;
};


class GEntity_info {
public:
	int globalGEntity_id;
	bool is_external;
	const char * ReplacementText;
	char * systemLiteral;
	char * pubidLiteral;
	char * NDataName;
	bool is_simple;

};

class PEntity_info {
public:
	int globalPEntity_id;
	bool is_external;
	char * ReplacementText;
	char * systemLiteral;
	char * pubidLiteral;
};

/* The complete Attribute model for a given element is a vector of ATT_info
   specifications for particular attribute names. */
//typedef vector<ATT_info> ElementAttributeModel;


class Notation_info {
public:
	char * systemLiteral;
	char * pubidLiteral;
};


class Model_Info {

public:
	Model_Info();
	~Model_Info();
	bool has_external_DTD;
	char * external_DTD_systemLiteral;
	char * external_DTD_pubidLiteral;
	Symbol_Table * symbol_table;


	/* Information computed from ATTLIST, ELEMENT, NOTATION and ENTITY declarations. */

	hash_map<int, int > GlobalAttributeTable;
	hash_map<int, int > GlobalElementTable;
	hash_map<int, int > GlobalNotationTable;
	hash_map<int, int > GlobalGEntityTable;
	hash_map<int, int > GlobalPEntityTable;


	int globalElementCount;
	int globalAttributeCount;
	int globalNotationCount;
	int globalGEntityCount;
	int globalPEntityCount;
    /* For each element, we have an ElementAttributeModel */
	vector<vector<ATT_info *> > ElementAttributeData;
	int getOrInsertGlobalElement(int elem_nameID);
	int getOrInsertGlobalAttName(int att_nameID);
	// rootModel is a content model for the document root, consisting
	// of a single occurrence of the element named in the DOCTYPE declaration.
	CM_RegExp * rootModel;
//	vector<ContentModel *> ContentModelData;
	hash_map<int, ContentModel * > ContentModelData;


	vector<GEntity_info *> GEntityData;
	vector<PEntity_info *> PEntityData;
	vector<Notation_info *> NotationData;

	void SimpleEntity(const char * entity_Name, const char * replText);
};

#endif /*XMLMODEL_H*/