File indexing completed on 2025-08-03 08:19:38
0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <GTL/gml_scanner.h>
0010
0011 #include <cstdlib>
0012 #include <cctype>
0013 #include <cassert>
0014 #include <cstring>
0015
0016 #ifdef __GTL_MSVCC
0017 # ifdef _DEBUG
0018 # ifndef SEARCH_MEMORY_LEAKS_ENABLED
0019 # error SEARCH NOT ENABLED
0020 # endif
0021 # define new DEBUG_NEW
0022 # undef THIS_FILE
0023 static char THIS_FILE[] = __FILE__;
0024 # endif
0025 #endif
0026
0027 __GTL_BEGIN_NAMESPACE
0028
0029
0030
0031
0032
0033 const char* GML_table[] = {
0034 " ",
0035 "¡",
0036 "¢",
0037 "£",
0038 "¤",
0039 "¥",
0040 "¦",
0041 "§",
0042 "¨",
0043 "©",
0044 "ª",
0045 "«",
0046 "¬",
0047 "­",
0048 "®",
0049 "¯",
0050 "°",
0051 "±",
0052 "²",
0053 "³",
0054 "´",
0055 "µ",
0056 "¶",
0057 "·",
0058 "¸",
0059 "¹",
0060 "º",
0061 "»",
0062 "¼",
0063 "½",
0064 "¾",
0065 "¿",
0066 "À",
0067 "Á",
0068 "Â",
0069 "Ã",
0070 "Ä",
0071 "Å",
0072 "Æ",
0073 "Ç",
0074 "È",
0075 "É",
0076 "Ê",
0077 "Ë",
0078 "Ì",
0079 "Í",
0080 "Î",
0081 "Ï",
0082 "Ð",
0083 "Ñ",
0084 "Ò",
0085 "Ó",
0086 "Ô",
0087 "Õ",
0088 "Ö",
0089 "×",
0090 "Ø",
0091 "Ù",
0092 "Ú",
0093 "Û",
0094 "Ü",
0095 "Ý",
0096 "Þ",
0097 "ß",
0098 "à",
0099 "á",
0100 "â",
0101 "ã",
0102 "ä",
0103 "å",
0104 "æ",
0105 "ç",
0106 "è",
0107 "é",
0108 "ê",
0109 "ë",
0110 "ì",
0111 "í",
0112 "î",
0113 "ï",
0114 "ð",
0115 "ñ",
0116 "ò",
0117 "ó",
0118 "ô",
0119 "õ",
0120 "ö",
0121 "÷",
0122 "ø",
0123 "ù",
0124 "ú",
0125 "û",
0126 "ü",
0127 "ý",
0128 "þ",
0129 "ÿ"
0130 };
0131
0132
0133 unsigned int GML_line;
0134 unsigned int GML_column;
0135
0136
0137 int GML_search_ISO (char* str, int len) {
0138
0139 int i;
0140 int ret = '&';
0141
0142
0143
0144
0145
0146 if (!strncmp (str, """, len)) {
0147 return 34;
0148 } else if (!strncmp (str, "&", len)) {
0149 return 38;
0150 } else if (!strncmp (str, "<", len)) {
0151 return 60;
0152 } else if (!strncmp (str, ">", len)) {
0153 return 62;
0154 }
0155
0156 for (i = 0; i < 96; i++) {
0157 if (!strncmp (str, GML_table[i], len)) {
0158 ret = i + 160;
0159 break;
0160 }
0161 }
0162
0163 return ret;
0164 }
0165
0166
0167 void GML_init () {
0168
0169 GML_line = 1;
0170 GML_column = 1;
0171 }
0172
0173
0174
0175 struct GML_token GML_scanner (FILE* source) {
0176
0177 int cur_max_size = INITIAL_SIZE;
0178 static char buffer[INITIAL_SIZE];
0179 char* tmp = buffer;
0180 char* ret = tmp;
0181 struct GML_token token;
0182 int is_float = 0;
0183 int count = 0;
0184 int next;
0185 char ISO_buffer[8];
0186 int ISO_count;
0187
0188 assert (source != NULL);
0189
0190
0191
0192
0193
0194 do {
0195 next = fgetc (source);
0196 GML_column++;
0197
0198 if (next == '\n') {
0199 GML_line++;
0200 GML_column = 1;
0201 } else if (next == EOF) {
0202 token.kind = GML_END;
0203 return token;
0204 }
0205 } while (isspace (next));
0206
0207 if (isdigit (next) || next == '.' || next == '+' || next == '-') {
0208
0209
0210
0211
0212
0213 do {
0214 if (count == INITIAL_SIZE - 1) {
0215 token.value.err.err_num = GML_TOO_MANY_DIGITS;
0216 token.value.err.line = GML_line;
0217 token.value.err.column = GML_column + count;
0218 token.kind = GML_ERROR;
0219 return token;
0220 }
0221
0222 if (next == '.' || next == 'E') {
0223 is_float = 1;
0224 }
0225
0226 buffer[count] = next;
0227 count++;
0228 next = fgetc (source);
0229
0230 } while (!isspace(next) && next != ']');
0231
0232 if (next == ']') {
0233 ungetc (next, source);
0234 }
0235
0236 buffer[count] = 0;
0237
0238 if (next == '\n') {
0239 GML_line++;
0240 GML_column = 1;
0241 } else {
0242 GML_column += count;
0243 }
0244
0245 if (is_float) {
0246 token.value.floating = atof (tmp);
0247 token.kind = GML_DOUBLE;
0248 } else {
0249 token.value.integer = atol (tmp);
0250 token.kind = GML_INT;
0251 }
0252
0253 return token;
0254
0255 } else if (isalpha (next) || next == '_') {
0256
0257
0258
0259
0260
0261 do {
0262 if (count == cur_max_size - 1) {
0263 *tmp = 0;
0264 tmp = (char*) malloc(2 * cur_max_size * sizeof (char));
0265 strcpy (tmp, ret);
0266
0267 if (cur_max_size > INITIAL_SIZE) {
0268 free (ret);
0269 }
0270
0271 ret = tmp;
0272 tmp += count;
0273 cur_max_size *= 2;
0274 }
0275
0276 if (!isalnum (next) && next != '_') {
0277 token.value.err.err_num = GML_UNEXPECTED;
0278 token.value.err.line = GML_line;
0279 token.value.err.column = GML_column + count;
0280 token.kind = GML_ERROR;
0281
0282 if (cur_max_size > INITIAL_SIZE) {
0283 free (ret);
0284 }
0285
0286 return token;
0287 }
0288
0289 *tmp++ = next;
0290 count++;
0291 next = fgetc (source);
0292 } while (!isspace (next) && next != EOF);
0293
0294 if (next == '\n') {
0295 GML_line++;
0296 GML_column = 1;
0297 } else {
0298 GML_column += count;
0299 }
0300
0301 *tmp = 0;
0302 token.kind = GML_KEY;
0303 token.value.str = (char*) malloc((count+1) * sizeof (char));
0304 strcpy (token.value.str, ret);
0305
0306 if (cur_max_size > INITIAL_SIZE) {
0307 free (ret);
0308 }
0309
0310 return token;
0311
0312 } else {
0313
0314
0315
0316
0317 switch (next) {
0318 case '#':
0319 do {
0320 next = fgetc (source);
0321 } while (next != '\n' && next != EOF);
0322
0323 GML_line++;
0324 GML_column = 1;
0325 return GML_scanner (source);
0326
0327 case '[':
0328 token.kind = GML_L_BRACKET;
0329 return token;
0330
0331 case ']':
0332 token.kind = GML_R_BRACKET;
0333 return token;
0334
0335 case '"':
0336 next = fgetc (source);
0337 GML_column++;
0338
0339 while (next != '"') {
0340
0341 if (count >= cur_max_size - 8) {
0342 *tmp = 0;
0343 tmp = (char*) malloc (2 * cur_max_size * sizeof(char));
0344 strcpy (tmp, ret);
0345
0346 if (cur_max_size > INITIAL_SIZE) {
0347 free (ret);
0348 }
0349
0350 ret = tmp;
0351 tmp += count;
0352 cur_max_size *= 2;
0353 }
0354
0355 if (next == '&') {
0356 ISO_count = 0;
0357
0358 while (next != ';') {
0359 if (next == '"' || next == EOF) {
0360 ungetc (next, source);
0361 ISO_count = 0;
0362 break;
0363 }
0364
0365 if (ISO_count < 8) {
0366 ISO_buffer[ISO_count] = next;
0367 ISO_count++;
0368 }
0369
0370 next = fgetc (source);
0371 }
0372
0373 if (ISO_count == 8) {
0374 ISO_count = 0;
0375 }
0376
0377 if (ISO_count) {
0378 ISO_buffer[ISO_count] = ';';
0379 ISO_count++;
0380 next = GML_search_ISO (ISO_buffer, ISO_count);
0381 ISO_count = 0;
0382 } else {
0383 next = '&';
0384 }
0385 }
0386
0387 *tmp++ = next;
0388 count++;
0389 GML_column++;
0390
0391 next = fgetc (source);
0392
0393 if (next == EOF) {
0394 token.value.err.err_num = GML_PREMATURE_EOF;
0395 token.value.err.line = GML_line;
0396 token.value.err.column = GML_column + count;
0397 token.kind = GML_ERROR;
0398
0399 if (cur_max_size > INITIAL_SIZE) {
0400 free (ret);
0401 }
0402
0403 return token;
0404 }
0405
0406 if (next == '\n') {
0407 GML_line++;
0408 GML_column = 1;
0409 }
0410 }
0411
0412 *tmp = 0;
0413 token.kind = GML_STRING;
0414 token.value.str = (char*) malloc((count+1) * sizeof (char));
0415 strcpy (token.value.str, ret);
0416
0417 if (cur_max_size > INITIAL_SIZE) {
0418 free (ret);
0419 }
0420
0421 return token;
0422
0423 default:
0424 token.value.err.err_num = GML_UNEXPECTED;
0425 token.value.err.line = GML_line;
0426 token.value.err.column = GML_column;
0427 token.kind = GML_ERROR;
0428 return token;
0429 }
0430 }
0431 }
0432
0433 __GTL_END_NAMESPACE
0434
0435
0436
0437