00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00044 #include <stdlib.h>
00045 #include <string.h>
00046
00047 #include "wbxml.h"
00048
00049
00050
00051 #define WBXML_PARSER_MALLOC_BLOCK 5000
00052 #define WBXML_PARSER_STRING_TABLE_MALLOC_BLOCK 200
00053 #define WBXML_PARSER_ATTR_VALUE_MALLOC_BLOCK 100
00054
00056 #define WBXML_PARSER_BEST_EFFORT 1
00057
00059 #define WBXML_PARSER_UNKNOWN_STRING ((WB_UTINY *)"unknown")
00060
00062 #define WBXML_PARSER_MAX_ENTITY_CODE 999999
00063
00064
00068 typedef enum WBXMLTokenType_e {
00069 WBXML_TAG_TOKEN,
00070 WBXML_ATTR_TOKEN
00071 } WBXMLTokenType;
00072
00073
00080 struct WBXMLParser_s {
00081 void *user_data;
00082 WBXMLContentHandler *content_hdl;
00083 WBXMLBuffer *wbxml;
00084 WBXMLBuffer *strstbl;
00085 const WBXMLLangEntry *langTable;
00086 const WBXMLLangEntry *mainTable;
00087 const WBXMLTagEntry *current_tag;
00089 WBXMLLanguage lang_forced;
00090 WB_ULONG public_id;
00091 WB_LONG public_id_index;
00092 WB_LONG charset;
00093 WB_ULONG pos;
00094 WBXMLVersion version;
00095 WB_UTINY tagCodePage;
00096 WB_UTINY attrCodePage;
00097 };
00098
00099
00100
00101
00102
00103
00104
00105
00106 static void wbxml_parser_reinit(WBXMLParser *parser);
00107
00108
00109 static WB_BOOL is_token(WBXMLParser *parser, WB_UTINY token);
00110 static WB_BOOL is_literal(WBXMLParser *parser);
00111 static WB_BOOL is_attr_value(WBXMLParser *parser);
00112 static WB_BOOL is_string(WBXMLParser *parser);
00113 static WB_BOOL is_extension(WBXMLParser *parser);
00114 static WB_BOOL check_public_id(WBXMLParser *parser);
00115
00116
00117 static WBXMLError parse_version(WBXMLParser *parser);
00118 static WBXMLError parse_publicid(WBXMLParser *parser);
00119 static WBXMLError parse_charset(WBXMLParser *parser);
00120 static WBXMLError parse_strtbl(WBXMLParser *parser);
00121 static WBXMLError parse_body(WBXMLParser *parser);
00122
00123 static WBXMLError parse_pi(WBXMLParser *parser);
00124 static WBXMLError parse_element(WBXMLParser *parser);
00125 static void free_attrs_table(WBXMLAttribute **attrs);
00126
00127 static WBXMLError parse_switch_page(WBXMLParser *parser, WBXMLTokenType code_space);
00128 static WBXMLError parse_stag(WBXMLParser *parser, WB_UTINY *tag, WBXMLTag **element);
00129 static WBXMLError parse_tag(WBXMLParser *parser, WB_UTINY *tag, WBXMLTag **element);
00130 static WBXMLError parse_attribute(WBXMLParser *parser, WBXMLAttribute **attr);
00131 static WBXMLError parse_content(WBXMLParser *parser, WB_UTINY **content, WB_LONG *len, WB_BOOL *static_content);
00132
00133 static WBXMLError parse_string(WBXMLParser *parser, WB_UTINY **str, WB_LONG *len);
00134 static WBXMLError parse_extension(WBXMLParser *parser, WBXMLTokenType code_space, WB_UTINY **ext, WB_LONG *len);
00135 static WBXMLError parse_entity(WBXMLParser *parser, WB_UTINY **entity, WB_LONG *len);
00136 static WBXMLError parse_opaque(WBXMLParser *parser, WB_UTINY **data, WB_LONG *len);
00137
00138 static WBXMLError parse_literal(WBXMLParser *parser, WB_UTINY *tag, WB_UTINY **result);
00139
00140 static WBXMLError parse_attr_start(WBXMLParser *parser, WBXMLAttributeName **name, WB_UTINY **value);
00141 static WBXMLError parse_attr_value(WBXMLParser *parser, WB_UTINY **value, WB_LONG *len, WB_BOOL *static_value);
00142
00143 static WBXMLError parse_termstr(WBXMLParser *parser, WB_UTINY **str, WB_LONG *len);
00144 static WBXMLError parse_inline(WBXMLParser *parser, WB_UTINY **str, WB_LONG *len);
00145 static WBXMLError parse_tableref(WBXMLParser *parser, WB_UTINY **str, WB_LONG *len);
00146 static WBXMLError parse_entcode(WBXMLParser *parser, WB_ULONG *result);
00147
00148 static WBXMLError get_strtbl_reference(WBXMLParser *parser, WB_ULONG index, WB_UTINY **str, WB_LONG *len);
00149
00150
00151 static WBXMLError parse_uint8(WBXMLParser *parser, WB_UTINY *result);
00152 static WBXMLError parse_mb_uint32(WBXMLParser *parser, WB_ULONG *result);
00153
00154
00155 static WBXMLError decode_datetime(WBXMLBuffer *buff);
00156
00157 static WBXMLError decode_opaque_content(WBXMLParser *parser, WB_UTINY **data, WB_LONG *len, WB_BOOL *static_content);
00158
00159 #if defined( WBXML_SUPPORT_WV )
00160 static WBXMLError decode_wv_content(WBXMLParser *parser, WB_UTINY **data, WB_LONG *len, WB_BOOL *static_content);
00161 static WBXMLError decode_wv_integer(WB_UTINY **data, WB_LONG *len);
00162 static WBXMLError decode_wv_datetime(WB_UTINY **data, WB_LONG *len);
00163 #endif
00164
00165 #if defined( WBXML_SUPPORT_DRMREL )
00166 static WBXMLError decode_drmrel_keyvalue(WB_UTINY **data, WB_LONG *len);
00167 #endif
00168
00169
00170 #define CHECK_ERROR if (ret != WBXML_OK) return ret;
00171
00172
00173
00174
00175
00176
00177 WBXML_DECLARE(WBXMLParser *) wbxml_parser_create(void)
00178 {
00179 WBXMLParser *parser = NULL;
00180
00181 parser = (WBXMLParser *) wbxml_malloc(sizeof(WBXMLParser));
00182 if (parser == NULL) {
00183 return NULL;
00184 }
00185
00186 parser->wbxml = NULL;
00187 parser->user_data = NULL;
00188 parser->content_hdl = NULL;
00189 parser->strstbl = NULL;
00190 parser->langTable = NULL;
00191
00192
00193 parser->mainTable = wbxml_tables_get_main();
00194
00195 parser->current_tag = NULL;
00196
00197 parser->lang_forced = WBXML_LANG_UNKNOWN;
00198 parser->public_id = WBXML_PUBLIC_ID_UNKNOWN;
00199 parser->public_id_index = -1;
00200 parser->charset = -1;
00201 parser->version = WBXML_VERSION_UNKNOWN;
00202
00203 parser->pos = 0;
00204 parser->tagCodePage = 0;
00205 parser->attrCodePage = 0;
00206
00207 return parser;
00208 }
00209
00210
00211 WBXML_DECLARE(void) wbxml_parser_destroy(WBXMLParser *parser)
00212 {
00213 if (parser == NULL)
00214 return;
00215
00216 wbxml_buffer_destroy(parser->wbxml);
00217 wbxml_buffer_destroy(parser->strstbl);
00218
00219 wbxml_free(parser);
00220 }
00221
00222
00223 WBXML_DECLARE(WBXMLError) wbxml_parser_parse(WBXMLParser *parser, WB_UTINY *wbxml, WB_ULONG wbxml_len)
00224 {
00225 WBXMLError ret = WBXML_OK;
00226
00227 if (parser == NULL)
00228 return WBXML_ERROR_NULL_PARSER;
00229
00230 if ((wbxml == NULL) || (wbxml_len <= 0))
00231 return WBXML_ERROR_EMPTY_WBXML;
00232
00233
00234 wbxml_parser_reinit(parser);
00235
00236 parser->wbxml = wbxml_buffer_create(wbxml, wbxml_len, WBXML_PARSER_MALLOC_BLOCK);
00237 if (parser->wbxml == NULL)
00238 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
00239
00240
00241 ret = parse_version(parser);
00242 CHECK_ERROR
00243
00244 if (parser->version > WBXML_VERSION_13)
00245 WBXML_WARNING((WBXML_PARSER, "This library only supports WBXML " WBXML_VERSION_TEXT_13));
00246
00247
00248 ret = parse_publicid(parser);
00249 CHECK_ERROR
00250
00251
00252 if (parser->lang_forced != WBXML_LANG_UNKNOWN)
00253 parser->public_id = wbxml_tables_get_wbxml_publicid(wbxml_tables_get_main(), parser->lang_forced);
00254
00255
00256 if (parser->version != WBXML_VERSION_10) {
00257 ret = parse_charset(parser);
00258 CHECK_ERROR
00259
00261 }
00262
00263
00264 ret = parse_strtbl(parser);
00265 CHECK_ERROR
00266
00267
00268 if (!check_public_id(parser)) {
00269 WBXML_ERROR((WBXML_PARSER, "PublicID not found"));
00270 return WBXML_ERROR_UNKNOWN_PUBLIC_ID;
00271 }
00272
00273
00274 if ((parser->content_hdl != NULL) && (parser->content_hdl->start_document_clb != NULL))
00275 parser->content_hdl->start_document_clb(parser->user_data, parser->charset, parser->langTable);
00276
00277
00278 ret = parse_body(parser);
00279 CHECK_ERROR
00280
00281
00282 if ((parser->content_hdl != NULL) && (parser->content_hdl->end_document_clb != NULL))
00283 parser->content_hdl->end_document_clb(parser->user_data);
00284
00285 return ret;
00286 }
00287
00288
00289 WBXML_DECLARE(void) wbxml_parser_set_user_data(WBXMLParser *parser, void *user_data)
00290 {
00291 if (parser != NULL)
00292 parser->user_data = user_data;
00293 }
00294
00295
00296 WBXML_DECLARE(void) wbxml_parser_set_content_handler(WBXMLParser *parser, WBXMLContentHandler *content_handler)
00297 {
00298 if (parser != NULL)
00299 parser->content_hdl = content_handler;
00300 }
00301
00302
00303 WBXML_DECLARE(void) wbxml_parser_set_main_table(WBXMLParser *parser, const WBXMLLangEntry *main_table)
00304 {
00305 if (parser != NULL)
00306 parser->mainTable = main_table;
00307 }
00308
00309
00310 WBXML_DECLARE(WB_BOOL) wbxml_parser_set_language(WBXMLParser *parser, WBXMLLanguage lang)
00311 {
00312 if (parser != NULL) {
00313 parser->lang_forced = lang;
00314 return TRUE;
00315 }
00316
00317 return FALSE;
00318 }
00319
00320
00321 WBXML_DECLARE(WB_ULONG) wbxml_parser_get_wbxml_public_id(WBXMLParser *parser)
00322 {
00323 if ((parser != NULL) && (parser->langTable != NULL) && (parser->langTable->publicID != NULL))
00324 return parser->langTable->publicID->wbxmlPublicID;
00325 else
00326 return WBXML_PUBLIC_ID_UNKNOWN;
00327 }
00328
00329
00330 WBXML_DECLARE(const WB_UTINY *) wbxml_parser_get_xml_public_id(WBXMLParser *parser)
00331 {
00332 if ((parser != NULL) && (parser->langTable != NULL) && (parser->langTable->publicID != NULL))
00333 return (const WB_UTINY *) parser->langTable->publicID->xmlPublicID;
00334 else
00335 return NULL;
00336 }
00337
00338
00339 WBXML_DECLARE(WBXMLVersion) wbxml_parser_get_wbxml_version(WBXMLParser *parser)
00340 {
00341 if (parser != NULL)
00342 return parser->version;
00343 else
00344 return WBXML_VERSION_UNKNOWN;
00345 }
00346
00347
00348 WBXML_DECLARE(WB_LONG) wbxml_parser_get_current_byte_index(WBXMLParser *parser)
00349 {
00350 if (parser != NULL)
00351 return parser->pos - 1;
00352 else
00353 return 0;
00354 }
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00371 static void wbxml_parser_reinit(WBXMLParser *parser)
00372 {
00373 if (parser == NULL)
00374 return;
00375
00376 wbxml_buffer_destroy(parser->wbxml);
00377 parser->wbxml = NULL;
00378
00379 wbxml_buffer_destroy(parser->strstbl);
00380 parser->strstbl = NULL;
00381
00382 parser->langTable = NULL;
00383
00384 parser->current_tag = NULL;
00385
00386 parser->public_id = WBXML_PUBLIC_ID_UNKNOWN;
00387 parser->public_id_index = -1;
00388 parser->charset = -1;
00389 parser->version = WBXML_VERSION_UNKNOWN;
00390
00391 parser->pos = 0;
00392 parser->tagCodePage = 0;
00393 parser->attrCodePage = 0;
00394 }
00395
00396
00397
00398
00399
00400
00407 static WB_BOOL is_token(WBXMLParser *parser, WB_UTINY token)
00408 {
00409 WB_UTINY result;
00410
00411 if (!wbxml_buffer_get_char(parser->wbxml, parser->pos, &result))
00412 return FALSE;
00413
00414 return (result == token);
00415 }
00416
00417
00423 static WB_BOOL is_literal(WBXMLParser *parser)
00424 {
00425 WB_UTINY result;
00426
00427 if (!wbxml_buffer_get_char(parser->wbxml, parser->pos, &result))
00428 return FALSE;
00429
00430 return ((result == WBXML_LITERAL) || (result == WBXML_LITERAL_A) || (result == WBXML_LITERAL_C) || (result == WBXML_LITERAL_AC));
00431 }
00432
00433
00440 static WB_BOOL is_attr_value(WBXMLParser *parser)
00441 {
00442 WB_UTINY cur_byte, next_byte;
00443
00444
00445 if (!wbxml_buffer_get_char(parser->wbxml, parser->pos, &cur_byte))
00446 return FALSE;
00447
00448
00449 if (is_token(parser, WBXML_SWITCH_PAGE)) {
00450 if (!wbxml_buffer_get_char(parser->wbxml, parser->pos + 2, &next_byte))
00451 return FALSE;
00452
00453
00454 if ((next_byte & 0x80) == 0x80)
00455 return TRUE;
00456 }
00457
00458
00459 if (((cur_byte & 0x80) == 0x80) ||
00460 (is_string(parser)) ||
00461 (is_extension(parser)) ||
00462 (is_token(parser, WBXML_ENTITY)) ||
00463 (is_token(parser, WBXML_OPAQUE)))
00464 return TRUE;
00465
00466 return FALSE;
00467 }
00468
00469
00475 static WB_BOOL is_string(WBXMLParser *parser)
00476 {
00477 return (is_token(parser, WBXML_STR_I) || is_token(parser, WBXML_STR_T));
00478 }
00479
00480
00486 static WB_BOOL is_extension(WBXMLParser *parser)
00487 {
00488 WB_UTINY cur_byte;
00489
00490
00491 if (is_token(parser, WBXML_SWITCH_PAGE)) {
00492 if (!wbxml_buffer_get_char(parser->wbxml, parser->pos + 2, &cur_byte))
00493 return FALSE;
00494 }
00495 else {
00496 if (!wbxml_buffer_get_char(parser->wbxml, parser->pos, &cur_byte))
00497 return FALSE;
00498 }
00499
00500 return ((cur_byte == WBXML_EXT_I_0) || (cur_byte == WBXML_EXT_I_1) || (cur_byte == WBXML_EXT_I_2) ||
00501 (cur_byte == WBXML_EXT_T_0) || (cur_byte == WBXML_EXT_T_1) || (cur_byte == WBXML_EXT_T_2) ||
00502 (cur_byte == WBXML_EXT_0) || (cur_byte == WBXML_EXT_1) || (cur_byte == WBXML_EXT_2));
00503 }
00504
00505
00511 static WB_BOOL check_public_id(WBXMLParser *parser)
00512 {
00513 WB_LONG index = 0, len = 0;
00514 WB_UTINY *public_id = NULL;
00515
00516 WBXML_DEBUG((WBXML_PARSER, "\t Checking PublicID"));
00517
00518
00519 if ((parser->lang_forced == WBXML_LANG_UNKNOWN) &&
00520 (parser->public_id == WBXML_PUBLIC_ID_UNKNOWN) &&
00521 (parser->public_id_index == -1))
00522 {
00523 return FALSE;
00524 }
00525
00526
00527
00528
00529
00530
00531 if (parser->lang_forced != WBXML_LANG_UNKNOWN) {
00532
00533 while (parser->mainTable[index].langID != -1) {
00534 if (parser->mainTable[index].langID == parser->lang_forced) {
00535 parser->langTable = &(parser->mainTable[index]);
00536
00537 WBXML_DEBUG((WBXML_PARSER, "\t Language Forced - PublicID : '%s'", parser->mainTable[index].publicID->xmlPublicID));
00538
00539 return TRUE;
00540 }
00541
00542 index++;
00543 }
00544 }
00545
00546
00547
00548
00549
00550
00551
00552 if (parser->public_id != WBXML_PUBLIC_ID_UNKNOWN) {
00553 WBXML_DEBUG((WBXML_PARSER, "\t PublicID token: 0x%X", parser->public_id));
00554
00555
00556 while (parser->mainTable[index].publicID != NULL) {
00557 if (parser->mainTable[index].publicID->wbxmlPublicID == parser->public_id) {
00558 parser->langTable = &(parser->mainTable[index]);
00559
00560 WBXML_DEBUG((WBXML_PARSER, "\t PublicID : '%s'", parser->mainTable[index].publicID->xmlPublicID));
00561
00562 return TRUE;
00563 }
00564
00565 index++;
00566 }
00567 }
00568
00569
00570
00571
00572
00573 if (parser->public_id_index != -1) {
00574 WBXML_DEBUG((WBXML_PARSER, "\t PublicID is in String Table (index: 0x%X)", parser->public_id_index));
00575
00576 if (get_strtbl_reference(parser, (WB_ULONG) parser->public_id_index, &public_id, &len) != WBXML_OK) {
00577 WBXML_ERROR((WBXML_PARSER, "Bad publicID reference in string table"));
00578 return FALSE;
00579 }
00580
00581 WBXML_DEBUG((WBXML_PARSER, "\t PublicID : '%s'", public_id));
00582
00583
00584 while (parser->mainTable[index].publicID != NULL)
00585 {
00586 if ((parser->mainTable[index].publicID->xmlPublicID != NULL) &&
00587 (WBXML_STRCASECMP(parser->mainTable[index].publicID->xmlPublicID, public_id) == 0))
00588 {
00589 parser->langTable = &(parser->mainTable[index]);
00590
00591 return TRUE;
00592 }
00593
00594 index++;
00595 }
00596 }
00597
00598
00599 return FALSE;
00600 }
00601
00602
00603
00604
00605
00606
00607
00614 static WBXMLError parse_version(WBXMLParser *parser)
00615 {
00616 WBXMLError ret = WBXML_OK;
00617
00618 if ((ret = parse_uint8(parser, (WB_UTINY*) &parser->version)) != WBXML_OK)
00619 return ret;
00620
00621 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsed version: '0x%X'", parser->pos - 1, (WB_TINY) parser->version));
00622
00623 return WBXML_OK;
00624 }
00625
00626
00634 static WBXMLError parse_publicid(WBXMLParser *parser)
00635 {
00636 WB_UTINY public_id;
00637
00638 if (!wbxml_buffer_get_char(parser->wbxml, parser->pos, &public_id))
00639 return WBXML_ERROR_END_OF_BUFFER;
00640
00641 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsed publicid: '0x%X'", parser->pos, public_id));
00642
00643 if (public_id == 0x00) {
00644 parser->pos++;
00645
00646
00647 return parse_mb_uint32(parser, (WB_ULONG *)&parser->public_id_index);
00648 }
00649 else {
00650
00651 return parse_mb_uint32(parser, &parser->public_id);
00652 }
00653 }
00654
00655
00670 static WBXMLError parse_charset(WBXMLParser *parser)
00671 {
00672 WB_ULONG startpos = parser->pos;
00673 WBXMLError ret = parse_mb_uint32(parser, (WB_ULONG *)&parser->charset);
00674
00675 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsed charset: '0x%X'", startpos, parser->charset));
00676
00677 return ret;
00678 }
00679
00680
00688 static WBXMLError parse_strtbl(WBXMLParser *parser)
00689 {
00690 WB_UTINY *data = NULL;
00691 WB_ULONG strtbl_len = 0;
00692 WBXMLError ret = WBXML_OK;
00693
00694 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing strtbl", parser->pos));
00695
00696
00697 ret = parse_mb_uint32(parser, &strtbl_len);
00698 if (ret != WBXML_OK)
00699 return WBXML_ERROR_END_OF_BUFFER;
00700
00701 if (strtbl_len > 0) {
00702
00703 if (parser->pos + strtbl_len > wbxml_buffer_len(parser->wbxml))
00704 return WBXML_ERROR_STRTBL_LENGTH;
00705
00706
00707 data = wbxml_buffer_get_cstr(parser->wbxml);
00708 parser->strstbl = wbxml_buffer_create(data + parser->pos, strtbl_len, WBXML_PARSER_STRING_TABLE_MALLOC_BLOCK);
00709 if (parser->strstbl == NULL)
00710 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
00711
00712 parser->pos = parser->pos + strtbl_len;
00713 }
00714
00715 return WBXML_OK;
00716 }
00717
00718
00725 static WBXMLError parse_body(WBXMLParser *parser)
00726 {
00727 WBXMLError ret = WBXML_OK;
00728
00729 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing body", parser->pos));
00730
00731 while (is_token(parser, WBXML_PI)) {
00732 if ((ret = parse_pi(parser)) != WBXML_OK)
00733 return ret;
00734 }
00735
00736 if ((ret = parse_element(parser)) != WBXML_OK)
00737 return ret;
00738
00739 while (is_token(parser, WBXML_PI)) {
00740 if ((ret = parse_pi(parser)) != WBXML_OK)
00741 return ret;
00742 }
00743
00744 return WBXML_OK;
00745 }
00746
00747
00754 static WBXMLError parse_pi(WBXMLParser *parser)
00755 {
00756 WBXMLAttributeName *name = NULL;
00757 WB_UTINY *value = NULL;
00758 WBXMLBuffer *attr_value = NULL;
00759 WB_LONG len = 0;
00760 WBXMLError ret = WBXML_OK;
00761 WB_BOOL static_value;
00762
00763 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing PI", parser->pos));
00764
00765
00766 parser->pos++;
00767
00768
00769 ret = parse_attr_start(parser, &name, &value);
00770 if (ret != WBXML_OK)
00771 return ret;
00772
00773 if (value != NULL)
00774 attr_value = wbxml_buffer_create(value, WBXML_STRLEN(value), WBXML_PARSER_ATTR_VALUE_MALLOC_BLOCK);
00775 else
00776 attr_value = wbxml_buffer_create(NULL, 0, WBXML_PARSER_ATTR_VALUE_MALLOC_BLOCK);
00777
00778 if (attr_value == NULL) {
00779 wbxml_attribute_name_destroy(name);
00780 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
00781 }
00782
00783 while (!is_token(parser, WBXML_END))
00784 {
00785 static_value = TRUE;
00786
00787
00788 ret = parse_attr_value(parser, &value, &len, &static_value);
00789 if (ret != WBXML_OK) {
00790 wbxml_attribute_name_destroy(name);
00791 wbxml_buffer_destroy(attr_value);
00792 return ret;
00793 }
00794
00795 if (!wbxml_buffer_append_data(attr_value, value, len)) {
00796 wbxml_attribute_name_destroy(name);
00797 wbxml_buffer_destroy(attr_value);
00798 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
00799 }
00800
00801 if (!static_value) {
00802 wbxml_free(value);
00803 value = NULL;
00804 }
00805 }
00806
00807
00808 parser->pos++;
00809
00810
00811 if (wbxml_buffer_len(attr_value) > 0)
00812 wbxml_buffer_append_char(attr_value, '\0');
00813
00814
00815 if ((parser->content_hdl != NULL) && (parser->content_hdl->pi_clb != NULL))
00816 parser->content_hdl->pi_clb(parser->user_data, wbxml_attribute_name_get_xml_name(name), wbxml_buffer_get_cstr(attr_value));
00817
00818 wbxml_attribute_name_destroy(name);
00819 wbxml_buffer_destroy(attr_value);
00820
00821 return WBXML_OK;
00822 }
00823
00824
00831 static WBXMLError parse_element(WBXMLParser *parser)
00832 {
00833 WBXMLTag *element = NULL;
00834 WBXMLAttribute *attr = NULL;
00835 WBXMLAttribute **attrs = NULL;
00836 WB_UTINY *content = NULL;
00837
00838 WB_ULONG attrs_nb = 0;
00839 WB_LONG len;
00840 WBXMLError ret = WBXML_OK;
00841 WB_UTINY tag;
00842 WB_BOOL static_content, is_empty;
00843
00844 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing element", parser->pos));
00845
00846 if (is_token(parser, WBXML_SWITCH_PAGE)) {
00847 ret = parse_switch_page(parser, WBXML_TAG_TOKEN);
00848 if (ret != WBXML_OK)
00849 return ret;
00850 }
00851
00852
00853 ret = parse_stag(parser, &tag, &element);
00854 if (ret != WBXML_OK)
00855 return ret;
00856
00857
00858 if (element->type == WBXML_VALUE_TOKEN)
00859 parser->current_tag = element->u.token;
00860
00861
00862 if (tag & WBXML_TOKEN_WITH_ATTRS) {
00863
00864 do {
00865
00866 ret = parse_attribute(parser, &attr);
00867 if (ret != WBXML_OK) {
00868 wbxml_tag_destroy(element);
00869 free_attrs_table(attrs);
00870 return ret;
00871 }
00872
00873
00874 attrs_nb++;
00875
00876 attrs = wbxml_realloc(attrs, (attrs_nb + 1) * sizeof(*attrs));
00877 if (attrs == NULL) {
00878
00879 wbxml_tag_destroy(element);
00880 wbxml_attribute_destroy(attr);
00881 free_attrs_table(attrs);
00882 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
00883 }
00884
00885 attrs[(attrs_nb - 1)] = attr;
00886 attrs[attrs_nb] = NULL;
00887 } while (!is_token(parser, WBXML_END));
00888
00889
00890 parser->pos++;
00891 }
00892
00893
00894 is_empty = !(tag & WBXML_TOKEN_WITH_CONTENT);
00895
00896
00897 if ((parser->content_hdl != NULL) && (parser->content_hdl->start_element_clb != NULL))
00898 parser->content_hdl->start_element_clb(parser->user_data, element, attrs, is_empty);
00899
00900
00901 free_attrs_table(attrs);
00902
00903
00904 if (!is_empty) {
00905
00906 while (!is_token(parser, WBXML_END))
00907 {
00908 static_content = TRUE;
00909
00910 ret = parse_content(parser, &content, &len, &static_content);
00911 if (ret != WBXML_OK) {
00912 wbxml_tag_destroy(element);
00913 return ret;
00914 }
00915
00916
00917 if ((content != NULL) && (parser->content_hdl != NULL) && (parser->content_hdl->characters_clb != NULL))
00918 parser->content_hdl->characters_clb(parser->user_data, content, 0, len);
00919
00920
00921 if (!static_content)
00922 wbxml_free(content);
00923
00924 content = NULL;
00925 }
00926
00927
00928 parser->pos++;
00929 }
00930
00931
00932 if ((parser->content_hdl != NULL) && (parser->content_hdl->end_element_clb != NULL))
00933 parser->content_hdl->end_element_clb(parser->user_data, element, is_empty);
00934
00935
00936 wbxml_tag_destroy(element);
00937
00938
00939 parser->current_tag = NULL;
00940
00941 return WBXML_OK;
00942 }
00943
00944
00949 static void free_attrs_table(WBXMLAttribute **attrs)
00950 {
00951 WB_LONG i = 0;
00952
00953 if (attrs != NULL) {
00954 while (attrs[i] != NULL) {
00955
00956 wbxml_attribute_destroy(attrs[i++]);
00957 }
00958 wbxml_free(attrs);
00959 }
00960 }
00961
00962
00970 static WBXMLError parse_switch_page(WBXMLParser *parser, WBXMLTokenType code_space)
00971 {
00972 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing switchPage", parser->pos));
00973
00974 if ((WB_UTINY) parser->version < (WB_UTINY) WBXML_VERSION_12)
00975 WBXML_WARNING((WBXML_PARSER, "No Switch Page mecanism possible in WBXML < %s", WBXML_VERSION_TEXT_12));
00976
00977
00978 parser->pos++;
00979
00980
00981 if (code_space == WBXML_TAG_TOKEN)
00982 return parse_uint8(parser, &parser->tagCodePage);
00983 else
00984 if (code_space == WBXML_ATTR_TOKEN)
00985 return parse_uint8(parser, &parser->attrCodePage);
00986 else
00987 return WBXML_ERROR_INTERNAL;
00988 }
00989
00990
00999 static WBXMLError parse_stag(WBXMLParser *parser, WB_UTINY *tag, WBXMLTag **element)
01000 {
01001 WB_UTINY *name = NULL;
01002 WBXMLError ret = WBXML_OK;
01003
01004 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing stag", parser->pos));
01005
01006 if (is_literal(parser)) {
01007 if ((ret = parse_literal(parser, tag, &name)) != WBXML_OK)
01008 return ret;
01009
01010
01011 if ((*element = wbxml_tag_create_literal(name)) == NULL)
01012 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
01013
01014 return WBXML_OK;
01015 }
01016
01017 return parse_tag(parser, tag, element);
01018 }
01019
01020
01028 static WBXMLError parse_tag(WBXMLParser *parser, WB_UTINY *tag, WBXMLTag **element)
01029 {
01030 WB_ULONG index = 0;
01031 WB_UTINY token;
01032 WBXMLError ret = WBXML_OK;
01033
01034
01035 ret = parse_uint8(parser, tag);
01036 if (ret != WBXML_OK)
01037 return ret;
01038
01039
01040 token = *tag & WBXML_TOKEN_MASK;
01041
01042
01043 if (parser->langTable == NULL)
01044 return WBXML_ERROR_LANG_TABLE_UNDEFINED;
01045
01046 if (parser->langTable->tagTable == NULL)
01047 return WBXML_ERROR_TAG_TABLE_UNDEFINED;
01048
01049
01050 while ((parser->langTable->tagTable[index].xmlName != NULL) &&
01051 ((parser->langTable->tagTable[index].wbxmlToken != token) ||
01052 (parser->langTable->tagTable[index].wbxmlCodePage != parser->tagCodePage))) {
01053 index++;
01054 }
01055
01056
01057 if (parser->langTable->tagTable[index].xmlName == NULL) {
01058 #if WBXML_PARSER_BEST_EFFORT
01059
01060 if ((*element = wbxml_tag_create_literal(WBXML_PARSER_UNKNOWN_STRING)) == NULL)
01061 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
01062
01063 return WBXML_OK;
01064 #else
01065 return WBXML_ERROR_UNKNOWN_TAG;
01066 #endif
01067 }
01068
01069 if ((*element = wbxml_tag_create(WBXML_VALUE_TOKEN)) == NULL)
01070 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
01071
01072 (*element)->u.token = &(parser->langTable->tagTable[index]);
01073
01074 WBXML_DEBUG((WBXML_PARSER, "(%d) Token: 0x%X", parser->pos - 1, token));
01075
01076 return WBXML_OK;
01077 }
01078
01079
01088 static WBXMLError parse_attribute(WBXMLParser *parser, WBXMLAttribute **attr)
01089 {
01090 WBXMLAttributeName *attr_name = NULL;
01091 WBXMLBuffer *attr_value = NULL;
01092 WB_UTINY *value = NULL;
01093 WB_LONG len;
01094 WBXMLError ret = WBXML_OK;
01095 WB_BOOL static_value;
01096
01097 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing attribute", parser->pos));
01098
01099
01100 ret = parse_attr_start(parser, &attr_name, &value);
01101 if (ret != WBXML_OK)
01102 return ret;
01103
01104 if (value != NULL)
01105 attr_value = wbxml_buffer_create(value, WBXML_STRLEN(value), WBXML_PARSER_ATTR_VALUE_MALLOC_BLOCK);
01106 else
01107 attr_value = wbxml_buffer_create(NULL, 0, WBXML_PARSER_ATTR_VALUE_MALLOC_BLOCK);
01108
01109 if (attr_value == NULL) {
01110 wbxml_attribute_name_destroy(attr_name);
01111 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
01112 }
01113
01114
01115 while (is_attr_value(parser))
01116 {
01117 static_value = TRUE;
01118
01119
01120 ret = parse_attr_value(parser, &value, &len, &static_value);
01121 if (ret != WBXML_OK) {
01122 wbxml_attribute_name_destroy(attr_name);
01123 wbxml_buffer_destroy(attr_value);
01124 return ret;
01125 }
01126
01127 if (len > 0) {
01128 if (!wbxml_buffer_append_data(attr_value, value, len)) {
01129 wbxml_attribute_name_destroy(attr_name);
01130 wbxml_buffer_destroy(attr_value);
01131 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
01132 }
01133 }
01134
01135 if (!static_value) {
01136 wbxml_free(value);
01137 value = NULL;
01138 }
01139 }
01140
01141 if ((wbxml_buffer_len(attr_value) > 0) && (attr_name->type == WBXML_VALUE_TOKEN))
01142 {
01143
01144 switch (parser->langTable->langID) {
01145 #if defined( WBXML_SUPPORT_SI )
01146 case WBXML_LANG_SI10:
01147
01148 if ((attr_name->u.token->wbxmlCodePage == 0x00) &&
01149 ((attr_name->u.token->wbxmlToken == 0x0a) || (attr_name->u.token->wbxmlToken == 0x10)))
01150 {
01151 if ((ret = decode_datetime(attr_value)) != WBXML_OK) {
01152 wbxml_attribute_name_destroy(attr_name);
01153 wbxml_buffer_destroy(attr_value);
01154 return ret;
01155 }
01156 }
01157 break;
01158 #endif
01159
01160 #if defined( WBXML_SUPPORT_EMN )
01161 case WBXML_LANG_EMN10:
01162
01163 if ((attr_name->u.token->wbxmlCodePage == 0x00) && (attr_name->u.token->wbxmlToken == 0x05))
01164 {
01165 if ((ret = decode_datetime(attr_value)) != WBXML_OK) {
01166 wbxml_attribute_name_destroy(attr_name);
01167 wbxml_buffer_destroy(attr_value);
01168 return ret;
01169 }
01170 }
01171 break;
01172 #endif
01173
01174 default:
01175 break;
01176 }
01177 }
01178
01179
01180 if (wbxml_buffer_len(attr_value) > 0)
01181 wbxml_buffer_append_char(attr_value, '\0');
01182
01183 if ((*attr = wbxml_attribute_create()) == NULL) {
01184 wbxml_attribute_name_destroy(attr_name);
01185 wbxml_buffer_destroy(attr_value);
01186 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
01187 }
01188
01189 (*attr)->name = attr_name;
01190 (*attr)->value = attr_value;
01191
01192 return WBXML_OK;
01193 }
01194
01195
01206 static WBXMLError parse_content(WBXMLParser *parser, WB_UTINY **content, WB_LONG *len, WB_BOOL *static_content)
01207 {
01208 WB_UTINY cur_byte;
01209 WBXMLError ret = WBXML_OK;
01210
01211
01212 if (!wbxml_buffer_get_char(parser->wbxml, parser->pos, &cur_byte))
01213 return WBXML_ERROR_END_OF_BUFFER;
01214
01215 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing content: '0x%X'", parser->pos, cur_byte));
01216
01217 *static_content = FALSE;
01218
01219
01220 if (is_extension(parser))
01221 return parse_extension(parser, WBXML_TAG_TOKEN, content, len);
01222
01223
01224 if (is_token(parser, WBXML_ENTITY))
01225 return parse_entity(parser, content, len);
01226
01227 *static_content = TRUE;
01228
01229
01230 if (is_string(parser))
01231 return parse_string(parser, content, len);
01232
01233
01234 if (is_token(parser, WBXML_OPAQUE)) {
01235 if ((ret = parse_opaque(parser, content, len)) != WBXML_OK)
01236 return ret;
01237
01238 return decode_opaque_content(parser, content, len, static_content);
01239 }
01240
01241
01242 if (is_token(parser, WBXML_PI))
01243 return parse_pi(parser);
01244
01246 return parse_element(parser);
01247 }
01248
01249
01258 static WBXMLError parse_string(WBXMLParser *parser, WB_UTINY **str, WB_LONG *len)
01259 {
01260 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing string", parser->pos));
01261
01262 if (is_token(parser, WBXML_STR_I))
01263 return parse_inline(parser, str, len);
01264
01265 if (is_token(parser, WBXML_STR_T))
01266 return parse_tableref(parser, str, len);
01267
01268 return WBXML_ERROR_STRING_EXPECTED;
01269 }
01270
01271
01286 static WBXMLError parse_extension(WBXMLParser *parser, WBXMLTokenType code_space, WB_UTINY **ext, WB_LONG *len)
01287 {
01288 WBXMLError ret = WBXML_OK;
01289 WB_UTINY token;
01290
01291 #if ( defined ( WBXML_SUPPORT_WML ) || defined ( WBXML_SUPPORT_WTA ) )
01292 WB_UTINY var_begin[3] = "$(",
01293 var_end[2] = ")",
01294 escape[8] = ":escape",
01295 unesc[7] = ":unesc",
01296 noesc[7] = ":noesc",
01297 *var = NULL;
01298
01299 WB_LONG var_len;
01300 WB_ULONG index;
01301 #endif
01302
01303 #if defined ( WBXML_SUPPORT_WV )
01304 WB_ULONG ext_value;
01305 WB_UTINY tab_index;
01306 #endif
01307
01308 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing extension", parser->pos));
01309
01310
01311 if (is_token(parser, WBXML_SWITCH_PAGE)) {
01312 ret = parse_switch_page(parser, code_space);
01313 if (ret != WBXML_OK)
01314 return ret;
01315 }
01316
01317
01318 ret = parse_uint8(parser, &token);
01319 if (ret != WBXML_OK)
01320 return ret;
01321
01322 *len = 0;
01323
01324 switch(parser->langTable->langID) {
01325 #if defined( WBXML_SUPPORT_WML )
01326 case WBXML_LANG_WML10:
01327 case WBXML_LANG_WML11:
01328 case WBXML_LANG_WML12:
01329 case WBXML_LANG_WML13:
01330 #endif
01331
01332 #if defined( WBXML_SUPPORT_WTA )
01333 case WBXML_LANG_WTAWML12:
01334 #endif
01335
01336 #if ( defined( WBXML_SUPPORT_WML ) || defined( WBXML_SUPPORT_WTA ) )
01337
01338
01339
01340
01341 switch(token) {
01342 case WBXML_EXT_0:
01343 case WBXML_EXT_1:
01344 case WBXML_EXT_2:
01345 WBXML_WARNING((WBXML_PARSER, "This extension token is reserved for futur use (ignoring)"));
01346 return WBXML_OK;
01347 case WBXML_EXT_I_0:
01348 case WBXML_EXT_I_1:
01349 case WBXML_EXT_I_2:
01350
01351 if ((ret = parse_termstr(parser, &var, &var_len)) != WBXML_OK) {
01352 WBXML_ERROR((WBXML_PARSER, "Bad Inline Extension"));
01353 return ret;
01354 }
01355 break;
01356 case WBXML_EXT_T_0:
01357 case WBXML_EXT_T_1:
01358 case WBXML_EXT_T_2:
01359
01360 if ((ret = parse_mb_uint32(parser, &index)) != WBXML_OK)
01361 return ret;
01362
01363 if ((ret = get_strtbl_reference(parser, index, &var, &var_len)) != WBXML_OK) {
01364 WBXML_ERROR((WBXML_PARSER, "Bad Extension reference in string table"));
01365 return ret;
01366 }
01367 break;
01368 default:
01369 return WBXML_ERROR_UNKNOWN_EXTENSION_TOKEN;
01370 }
01371
01372
01373 *ext = (WB_UTINY*) wbxml_malloc(WBXML_STRLEN(var_begin) + var_len + WBXML_STRLEN(escape) + WBXML_STRLEN(var_end) + 1);
01374 if (*ext == NULL)
01375 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
01376
01377
01378 memcpy(*ext + *len, var_begin, WBXML_STRLEN(var_begin));
01379 *len += WBXML_STRLEN(var_begin);
01380
01381
01382 memcpy(*ext + *len, var, var_len);
01383 *len += WBXML_STRLEN(var);
01384
01385 switch(token) {
01386 case WBXML_EXT_I_0:
01387 case WBXML_EXT_T_0:
01388
01389 memcpy(*ext + *len, escape, WBXML_STRLEN(escape));
01390 *len += WBXML_STRLEN(escape);
01391 break;
01392 case WBXML_EXT_I_1:
01393 case WBXML_EXT_T_1:
01394
01395 memcpy(*ext + *len, unesc, WBXML_STRLEN(unesc));
01396 *len += WBXML_STRLEN(unesc);
01397 break;
01398 case WBXML_EXT_I_2:
01399 case WBXML_EXT_T_2:
01400
01401 memcpy(*ext + *len, noesc, WBXML_STRLEN(noesc));
01402 *len += WBXML_STRLEN(noesc);
01403 break;
01404 default:
01405 return WBXML_ERROR_UNKNOWN_EXTENSION_TOKEN;
01406 }
01407
01408
01409 memcpy(*ext + *len, var_end, WBXML_STRLEN(var_end));
01410 *len += WBXML_STRLEN(var_end);
01411 break;
01412 #endif
01413
01414 #if defined( WBXML_SUPPORT_WV )
01415 case WBXML_LANG_WV_CSP11:
01416 case WBXML_LANG_WV_CSP12:
01417 if (token != WBXML_EXT_T_0) {
01418 WBXML_ERROR((WBXML_PARSER, "Only EXT_T_0 extensions authorized with Wireless Village CSP"));
01419 return WBXML_OK;
01420 }
01421
01422
01423 if ((ret = parse_mb_uint32(parser, &ext_value)) != WBXML_OK)
01424 return ret;
01425
01426
01427 if (parser->langTable == NULL)
01428 return WBXML_ERROR_LANG_TABLE_UNDEFINED;
01429
01430 if (parser->langTable->extValueTable == NULL)
01431 return WBXML_ERROR_EXT_VALUE_TABLE_UNDEFINED;
01432
01433 tab_index = 0;
01434
01435 while ((parser->langTable->extValueTable[tab_index].xmlName != NULL) &&
01436 (parser->langTable->extValueTable[tab_index].wbxmlToken != ext_value)) {
01437 tab_index++;
01438 }
01439
01440 if (parser->langTable->extValueTable[tab_index].xmlName == NULL) {
01441 #if WBXML_PARSER_BEST_EFFORT
01442 *ext = (WB_UTINY *) wbxml_strdup((const WB_TINY*) WBXML_PARSER_UNKNOWN_STRING);
01443 *len = WBXML_STRLEN(WBXML_PARSER_UNKNOWN_STRING);
01444 return WBXML_OK;
01445 #else
01446 return WBXML_ERROR_UNKNOWN_EXTENSION_VALUE;
01447 #endif
01448 }
01449
01450 *ext = (WB_UTINY *) wbxml_strdup((const WB_TINY*) parser->langTable->extValueTable[tab_index].xmlName);
01451 *len = WBXML_STRLEN(parser->langTable->extValueTable[tab_index].xmlName);
01452 break;
01453 #endif
01454
01455 default:
01456 WBXML_ERROR((WBXML_PARSER, "Extension tokens not allowed with this Document !"));
01457 }
01458
01459 return WBXML_OK;
01460 }
01461
01462
01477 static WBXMLError parse_entity(WBXMLParser *parser, WB_UTINY **entity, WB_LONG *len)
01478 {
01479 WB_ULONG code;
01480 WBXMLError ret = WBXML_OK;
01481
01482 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing entity", parser->pos));
01483
01484
01485 parser->pos++;
01486
01487 if ((ret = parse_entcode(parser, &code)) != WBXML_OK)
01488 return ret;
01489
01490
01491 if (code > WBXML_PARSER_MAX_ENTITY_CODE)
01492 return WBXML_ERROR_ENTITY_CODE_OVERFLOW;
01493
01494
01495
01496
01497 *entity = (WB_UTINY*) wbxml_malloc(10 * sizeof(WB_UTINY));
01498 if ((*entity) == NULL)
01499 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
01500
01501 *len = sprintf((WB_TINY *)(*entity), "&#%u;", code);
01502
01503 return WBXML_OK;
01504 }
01505
01506
01516 static WBXMLError parse_opaque(WBXMLParser *parser, WB_UTINY **data, WB_LONG *len)
01517 {
01518 WBXMLError ret = WBXML_OK;
01519
01520 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing opaque", parser->pos));
01521
01522 if (parser->version < WBXML_VERSION_11)
01523 WBXML_WARNING((WBXML_PARSER, "No 'opaque' support in WBXML < %s", WBXML_VERSION_TEXT_11));
01524
01525
01526 parser->pos++;
01527
01528 ret = parse_mb_uint32(parser, (WB_ULONG *)len);
01529 if (ret != WBXML_OK)
01530 return ret;
01531
01532 *data = wbxml_buffer_get_cstr(parser->wbxml) + parser->pos;
01533
01534
01535 if (parser->pos + (WB_ULONG) *len > wbxml_buffer_len(parser->wbxml))
01536 return WBXML_ERROR_BAD_OPAQUE_LENGTH;
01537
01538 parser->pos+= *len;
01539
01540 return ret;
01541 }
01542
01543
01553 static WBXMLError parse_literal(WBXMLParser *parser, WB_UTINY *mask, WB_UTINY **result)
01554 {
01555 WBXMLError ret = WBXML_OK;
01556 WB_UTINY token;
01557 WB_ULONG index;
01558
01559 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing literalTag", parser->pos));
01560
01561
01562 ret = parse_uint8(parser, &token);
01563 if (ret != WBXML_OK)
01564 return ret;
01565
01566
01567 ret = parse_mb_uint32(parser, &index);
01568 if (ret != WBXML_OK)
01569 return ret;
01570
01571
01572 if (index > wbxml_buffer_len(parser->strstbl))
01573 return WBXML_ERROR_BAD_LITERAL_INDEX;
01574
01577 *result = wbxml_buffer_get_cstr(parser->strstbl) + index;
01578
01579
01580 if ((index + WBXML_STRLEN(*result)) > wbxml_buffer_len(parser->strstbl))
01581 return WBXML_ERROR_LITERAL_NOT_NULL_TERMINATED_IN_STRING_TABLE;
01582
01583 switch(token) {
01584 case WBXML_LITERAL:
01585 *mask = WBXML_TOKEN_MASK;
01586 break;
01587 case WBXML_LITERAL_C:
01588 *mask = WBXML_TOKEN_WITH_CONTENT;
01589 break;
01590 case WBXML_LITERAL_A:
01591 *mask = WBXML_TOKEN_WITH_ATTRS;
01592 break;
01593 case WBXML_LITERAL_AC:
01594 *mask = (WBXML_TOKEN_WITH_CONTENT | WBXML_TOKEN_WITH_ATTRS);
01595 break;
01596 default:
01597 return WBXML_ERROR_INTERNAL;
01598 }
01599
01600 return WBXML_OK;
01601 }
01602
01603
01612 static WBXMLError parse_attr_start(WBXMLParser *parser, WBXMLAttributeName **name, WB_UTINY **value)
01613 {
01614 WB_UTINY *literal_str = NULL;
01615 WB_UTINY literal;
01616 WB_UTINY tag;
01617 WBXMLError ret = WBXML_OK;
01618 WB_ULONG index = 0;
01619
01620 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing attrStart", parser->pos));
01621
01622
01623
01624
01625
01626
01627 if (is_token(parser, WBXML_LITERAL)) {
01628 ret = parse_literal(parser, &literal, &literal_str);
01629 if (ret != WBXML_OK)
01630 return ret;
01631
01632 if ((*name = wbxml_attribute_name_create_literal(literal_str)) == NULL)
01633 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
01634
01638 return WBXML_OK;
01639 }
01640
01641
01642
01643
01644
01645
01646
01647 if (is_token(parser, WBXML_SWITCH_PAGE)) {
01648 ret = parse_switch_page(parser, WBXML_ATTR_TOKEN);
01649 if (ret != WBXML_OK)
01650 return ret;
01651 }
01652
01653
01654 ret = parse_uint8(parser, &tag);
01655 if (ret != WBXML_OK)
01656 return ret;
01657
01658 WBXML_DEBUG((WBXML_PARSER, "\tToken: 0x%X", tag));
01659
01660
01661 if (parser->langTable == NULL)
01662 return WBXML_ERROR_LANG_TABLE_UNDEFINED;
01663
01664 if (parser->langTable->attrTable == NULL)
01665 return WBXML_ERROR_ATTR_TABLE_UNDEFINED;
01666
01667 while ((parser->langTable->attrTable[index].xmlName != NULL) &&
01668 ((parser->langTable->attrTable[index].wbxmlToken != tag) ||
01669 (parser->langTable->attrTable[index].wbxmlCodePage != parser->attrCodePage))) {
01670 index++;
01671 }
01672
01673 if (parser->langTable->attrTable[index].xmlName == NULL) {
01674 #if WBXML_PARSER_BEST_EFFORT
01675
01676 if ((*name = wbxml_attribute_name_create_literal(WBXML_PARSER_UNKNOWN_STRING)) == NULL)
01677 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
01678
01679 return WBXML_OK;
01680 #else
01681 return WBXML_ERROR_UNKNOWN_ATTR;
01682 #endif
01683 }
01684
01685
01686 if ((*name = wbxml_attribute_name_create(WBXML_VALUE_TOKEN)) == NULL)
01687 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
01688
01689 (*name)->u.token = &(parser->langTable->attrTable[index]);
01690
01691
01692 if (parser->langTable->attrTable[index].xmlValue != NULL)
01693 *value = (WB_UTINY *) parser->langTable->attrTable[index].xmlValue;
01694
01695 return WBXML_OK;
01696 }
01697
01698
01708 static WBXMLError parse_attr_value(WBXMLParser *parser, WB_UTINY **value, WB_LONG *len, WB_BOOL *static_value)
01709 {
01710 WB_ULONG index = 0;
01711 WB_UTINY tag;
01712 WBXMLError ret = WBXML_OK;
01713
01714 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing attrValue", parser->pos));
01715
01716 *static_value = FALSE;
01717
01718
01719 if (is_extension(parser))
01720 return parse_extension(parser, WBXML_ATTR_TOKEN, value, len);
01721
01722
01723 if (is_token(parser, WBXML_ENTITY))
01724 return parse_entity(parser, value, len);
01725
01726 *static_value = TRUE;
01727
01728
01729 if (is_string(parser))
01730 return parse_string(parser, value, len);
01731
01732
01733 if (is_token(parser, WBXML_OPAQUE)) {
01734 if (parser->version < WBXML_VERSION_12)
01735 WBXML_ERROR((WBXML_PARSER, "An Attribute value can't be 'opaque' in WBXML version < %s", WBXML_VERSION_TEXT_12));
01736
01737 return parse_opaque(parser, value, len);
01738 }
01739
01740
01741
01742
01743
01744
01745
01746 if (is_token(parser, WBXML_SWITCH_PAGE)) {
01747 ret = parse_switch_page(parser, WBXML_ATTR_TOKEN);
01748 if (ret != WBXML_OK)
01749 return ret;
01750 }
01751
01752
01753 ret = parse_uint8(parser, &tag);
01754 if (ret != WBXML_OK)
01755 return ret;
01756
01757
01758 if (parser->langTable == NULL)
01759 return WBXML_ERROR_LANG_TABLE_UNDEFINED;
01760
01761 if (parser->langTable->attrValueTable == NULL)
01762 return WBXML_ERROR_ATTR_VALUE_TABLE_UNDEFINED;
01763
01764 while ((parser->langTable->attrValueTable[index].xmlName != NULL) &&
01765 ((parser->langTable->attrValueTable[index].wbxmlToken != tag) ||
01766 (parser->langTable->attrValueTable[index].wbxmlCodePage != parser->tagCodePage))) {
01767 index++;
01768 }
01769
01770 if (parser->langTable->attrValueTable[index].xmlName == NULL)
01771 return WBXML_ERROR_UNKNOWN_ATTR_VALUE;
01772
01773 *value = (WB_UTINY *) parser->langTable->attrValueTable[index].xmlName;
01774 *len = WBXML_STRLEN(*value);
01775
01776 return WBXML_OK;
01777 }
01778
01779
01788 static WBXMLError parse_termstr(WBXMLParser *parser, WB_UTINY **str, WB_LONG *len)
01789 {
01790 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing termstr", parser->pos));
01791
01794 *str = wbxml_buffer_get_cstr(parser->wbxml) + parser->pos;
01795 *len = WBXML_STRLEN(*str);
01796
01797
01798 if (parser->pos + *len > wbxml_buffer_len(parser->wbxml))
01799 return WBXML_ERROR_NOT_NULL_TERMINATED_INLINE_STRING;
01800
01801
01802 parser->pos = parser->pos + *len + 1;
01803
01804 WBXML_DEBUG((WBXML_PARSER, "(%d) termstr: %s", parser->pos, *str));
01805
01806 return WBXML_OK;
01807 }
01808
01809
01818 static WBXMLError parse_inline(WBXMLParser *parser, WB_UTINY **str, WB_LONG *len)
01819 {
01820 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing inline", parser->pos));
01821
01822
01823 parser->pos++;
01824
01825 return parse_termstr(parser, str, len);
01826 }
01827
01828
01838 static WBXMLError parse_tableref(WBXMLParser *parser, WB_UTINY **str, WB_LONG *len)
01839 {
01840 WB_ULONG index;
01841 WBXMLError ret = WBXML_OK;
01842
01843 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing tableref", parser->pos));
01844
01845
01846 parser->pos++;
01847
01848
01849 if ((ret = parse_mb_uint32(parser, &index)) != WBXML_OK)
01850 return ret;
01851
01852 return get_strtbl_reference(parser, index, str, len);
01853 }
01854
01855
01863 static WBXMLError parse_entcode(WBXMLParser *parser, WB_ULONG *result)
01864 {
01865 WBXML_DEBUG((WBXML_PARSER, "(%d) Parsing entcode", parser->pos));
01866
01867 return parse_mb_uint32(parser, result);
01868 }
01869
01870
01879 static WBXMLError get_strtbl_reference(WBXMLParser *parser, WB_ULONG index, WB_UTINY **str, WB_LONG *len)
01880 {
01881
01883 if (parser->strstbl == NULL)
01884 return WBXML_ERROR_NULL_STRING_TABLE;
01885
01886 if (index >= wbxml_buffer_len(parser->strstbl))
01887 return WBXML_ERROR_INVALID_STRTBL_INDEX;
01888
01891 *str = wbxml_buffer_get_cstr(parser->strstbl) + index;
01892 *len = WBXML_STRLEN(*str);
01893
01894
01895 if (index + *len > wbxml_buffer_len(parser->strstbl))
01896 return WBXML_ERROR_BAD_NULL_TERMINATED_STRING_IN_STRING_TABLE;
01897
01898 WBXML_DEBUG((WBXML_PARSER, "(%d) String Table Reference: %s", parser->pos, *str));
01899
01900 return WBXML_OK;
01901 }
01902
01903
01904
01905
01906
01907
01915 static WBXMLError parse_uint8(WBXMLParser *parser, WB_UTINY *result)
01916 {
01917 if (parser == NULL)
01918 return WBXML_ERROR_NULL_PARSER;
01919
01920 if (result == NULL)
01921 return WBXML_ERROR_BAD_PARAMETER;
01922
01923 if (parser->pos == wbxml_buffer_len(parser->wbxml))
01924 return WBXML_ERROR_END_OF_BUFFER;
01925
01926 if (!wbxml_buffer_get_char(parser->wbxml, parser->pos, result))
01927 return WBXML_ERROR_END_OF_BUFFER;
01928
01929 parser->pos++;
01930
01931 return WBXML_OK;
01932 }
01933
01934
01942 static WBXMLError parse_mb_uint32(WBXMLParser *parser, WB_ULONG *result)
01943 {
01944 WB_ULONG uint = 0, byte_pos;
01945 WB_UTINY cur_byte;
01946
01947 if (parser == NULL)
01948 return WBXML_ERROR_NULL_PARSER;
01949
01950 if (result == NULL)
01951 return WBXML_ERROR_BAD_PARAMETER;
01952
01953
01954 for (byte_pos = 0; byte_pos < 5; byte_pos++) {
01955
01956 if (!wbxml_buffer_get_char(parser->wbxml, parser->pos, &cur_byte))
01957 return WBXML_ERROR_END_OF_BUFFER;
01958
01959
01960 parser->pos++;
01961
01962
01963 uint = (uint << 7) | ((WB_UTINY)cur_byte & 0x7F);
01964
01965
01966 if (!((WB_UTINY)cur_byte & 0x80)) {
01967 *result = uint;
01968 return WBXML_OK;
01969 }
01970 }
01971
01972 return WBXML_ERROR_UNVALID_MBUINT32;
01973 }
01974
01975
01976
01977
01978
01979
01980 #if ( defined( WBXML_SUPPORT_SI ) || defined( WBXML_SUPPORT_EMN ) )
01981
01982
01983
01984
01985
01994 static WBXMLError decode_datetime(WBXMLBuffer *buff)
01995 {
01996 WB_ULONG len = 0;
01997
01998
01999 if (!wbxml_buffer_binary_to_hex(buff, TRUE))
02000 return WBXML_ERROR_INTERNAL;
02001
02002
02003 len = wbxml_buffer_len(buff);
02004 if ((len < 8) || (len > 14) || (len == 9) || (len == 11) || (len == 13))
02005 return WBXML_ERROR_BAD_DATETIME;
02006
02007
02008
02009
02010 if (!wbxml_buffer_insert_cstr(buff, (WB_UTINY *)"-", 4))
02011 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
02012
02013
02014 if (!wbxml_buffer_insert_cstr(buff, (WB_UTINY *)"-", 7))
02015 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
02016
02017
02018 if (!wbxml_buffer_insert_cstr(buff, (WB_UTINY *)"T", 10))
02019 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
02020
02021
02022
02023
02024 if (len > 10) {
02025 if (!wbxml_buffer_insert_cstr(buff, (WB_UTINY *)":", 13))
02026 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
02027 }
02028
02029 if (len > 12) {
02030 if (!wbxml_buffer_insert_cstr(buff, (WB_UTINY *)":", 16))
02031 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
02032 }
02033
02034
02035 switch (len) {
02036 case 8:
02037 if (!wbxml_buffer_append_cstr(buff, (WB_UTINY *)"00:00:00"))
02038 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
02039 break;
02040 case 10:
02041 if (!wbxml_buffer_append_cstr(buff, (WB_UTINY *)":00:00"))
02042 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
02043 break;
02044 case 12:
02045 if (!wbxml_buffer_append_cstr(buff, (WB_UTINY *)":00"))
02046 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
02047 break;
02048 default:
02049
02050 break;
02051 }
02052
02053
02054 if (!wbxml_buffer_append_char(buff, 'Z'))
02055 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
02056
02057 return WBXML_OK;
02058 }
02059
02060 #endif
02061
02062
02073 static WBXMLError decode_opaque_content(WBXMLParser *parser, WB_UTINY **data, WB_LONG *len, WB_BOOL *static_content)
02074 {
02075 WBXMLError ret = WBXML_OK;
02076
02077
02078
02079
02080 *static_content = TRUE;
02081
02082 switch (parser->langTable->langID) {
02083 #if defined( WBXML_SUPPORT_WV )
02084 case WBXML_LANG_WV_CSP11:
02085 case WBXML_LANG_WV_CSP12:
02086 return decode_wv_content(parser, data, len, static_content);
02087 break;
02088 #endif
02089
02090 #if defined( WBXML_SUPPORT_DRMREL )
02091 case WBXML_LANG_DRMREL10:
02092 if ((parser->current_tag->wbxmlCodePage == 0x00) &&
02093 (parser->current_tag->wbxmlToken == 0x0C))
02094 {
02095
02096 if ((ret = decode_drmrel_keyvalue(data, len)) != WBXML_OK)
02097 return ret;
02098
02099 *static_content = FALSE;
02100 return WBXML_OK;
02101 }
02102 break;
02103 #endif
02104
02105 default:
02106
02107 break;
02108 }
02109
02110 return WBXML_OK;
02111 }
02112
02113
02114 #if defined( WBXML_SUPPORT_WV )
02115
02116
02117
02118
02119
02129 static WBXMLError decode_wv_content(WBXMLParser *parser, WB_UTINY **data, WB_LONG *len, WB_BOOL *static_content)
02130 {
02131 WBXMLWVDataType data_type = WBXML_WV_DATA_TYPE_STRING;
02132 WBXMLError ret = WBXML_OK;
02133
02134
02135
02136
02137
02138
02139
02140
02141
02142
02143
02144
02145
02146
02147
02148
02149
02150
02151
02152
02153
02154
02155
02156
02157
02158
02159
02160
02161
02162
02163
02164
02165
02166
02167
02168
02169
02170
02171
02172 switch (parser->current_tag->wbxmlCodePage) {
02173 case 0x00:
02174
02175 switch (parser->current_tag->wbxmlToken) {
02176 case 0x0B:
02177 case 0x0F:
02178 case 0x1A:
02179 case 0x3C:
02180
02181 data_type = WBXML_WV_DATA_TYPE_INTEGER;
02182 break;
02183 case 0x11:
02184
02185 data_type = WBXML_WV_DATA_TYPE_DATE_AND_TIME;
02186 break;
02187 case 0x0D:
02188
02190
02191
02192
02193
02194
02195 data_type = WBXML_WV_DATA_TYPE_STRING;
02196 break;
02197 default:
02198
02199 data_type = WBXML_WV_DATA_TYPE_STRING;
02200 break;
02201 }
02202 break;
02203 case 0x01:
02204
02205 switch (parser->current_tag->wbxmlToken) {
02206 case 0x1C:
02207 case 0x25:
02208 case 0x26:
02209 case 0x27:
02210 case 0x28:
02211 case 0x32:
02212
02213 data_type = WBXML_WV_DATA_TYPE_INTEGER;
02214 break;
02215 default:
02216
02217 data_type = WBXML_WV_DATA_TYPE_STRING;
02218 break;
02219 }
02220 break;
02221 case 0x03:
02222
02223 switch (parser->current_tag->wbxmlToken) {
02224 case 0x05:
02225 case 0x06:
02226 case 0x0C:
02227 case 0x0D:
02228 case 0x0E:
02229 case 0x12:
02230 case 0x13:
02231
02232 data_type = WBXML_WV_DATA_TYPE_INTEGER;
02233 break;
02234 default:
02235
02236 data_type = WBXML_WV_DATA_TYPE_STRING;
02237 break;
02238 }
02239 break;
02240 case 0x06:
02241
02242 switch (parser->current_tag->wbxmlToken) {
02243 case 0x1A:
02244
02245 data_type = WBXML_WV_DATA_TYPE_DATE_AND_TIME;
02246 break;
02247 default:
02248
02249 data_type = WBXML_WV_DATA_TYPE_STRING;
02250 break;
02251 }
02252 break;
02253 case 0x09:
02254
02255 switch (parser->current_tag->wbxmlToken) {
02256 case 0x08:
02257 case 0x0A:
02258
02259 data_type = WBXML_WV_DATA_TYPE_INTEGER;
02260 break;
02261 default:
02262
02263 data_type = WBXML_WV_DATA_TYPE_STRING;
02264 break;
02265 }
02266 break;
02267 default:
02268 data_type = WBXML_WV_DATA_TYPE_STRING;
02269 break;
02270 }
02271
02272
02273
02274
02275
02276
02277 switch (data_type) {
02278 case WBXML_WV_DATA_TYPE_INTEGER:
02279
02280 if ((ret = decode_wv_integer(data, len)) != WBXML_OK)
02281 return ret;
02282
02283 *static_content = FALSE;
02284 return WBXML_OK;
02285 break;
02286 case WBXML_WV_DATA_TYPE_DATE_AND_TIME:
02287
02288 if ((ret = decode_wv_datetime(data, len)) != WBXML_OK)
02289 return ret;
02290
02291 *static_content = FALSE;
02292 return WBXML_OK;
02293 break;
02294 case WBXML_WV_DATA_TYPE_BINARY:
02296 break;
02297 default:
02298
02299 break;
02300 }
02301
02302 return WBXML_OK;
02303 }
02304
02318 static WBXMLError decode_wv_integer(WB_UTINY **data, WB_LONG *len)
02319 {
02320 WB_LONG i = 0;
02321 WB_ULONG the_int = 0;
02322 WB_UTINY ch = 0;
02323 WB_UTINY tmp[11];
02324
02325 if ((data == NULL) || (len == NULL) || (*len <= 0))
02326 return WBXML_ERROR_INTERNAL;
02327
02328 for (i = 0; i < *len; i++) {
02329 ch = (*data)[i];
02330 the_int = (the_int << 8) | (ch & 0xff);
02331 }
02332
02333 if (the_int > 0xffffffff)
02334 return WBXML_ERROR_WV_INTEGER_OVERFLOW;
02335
02336 sprintf((WB_TINY *)tmp, "%u", the_int);
02337
02338 if (((*data) = (WB_UTINY *) wbxml_malloc((WBXML_STRLEN(tmp) + 1) * sizeof(WB_UTINY))) == NULL)
02339 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
02340
02341 *len = WBXML_STRLEN(tmp);
02342
02343 memcpy(*data, tmp, *len);
02344 (*data)[*len] = '\0';
02345
02346 return WBXML_OK;
02347 }
02348
02349
02378 static WBXMLError decode_wv_datetime(WB_UTINY **data, WB_LONG *len)
02379 {
02380 WB_UTINY *result = NULL;
02381 WB_TINY the_year[5], the_month[3], the_date[3],
02382 the_hour[3], the_minute[3], the_second[3];
02383 WB_ULONG the_value = 0;
02384
02387 if ((data == NULL) || (*data == NULL) || (len == NULL))
02388 return WBXML_ERROR_INTERNAL;
02389
02390 if (*len != 6)
02391 return WBXML_ERROR_WV_DATETIME_FORMAT;
02392
02393 if ((result = (WB_UTINY *) wbxml_malloc(17 * sizeof(WB_UTINY))) == NULL)
02394 return WBXML_ERROR_NOT_ENOUGH_MEMORY;
02395
02396
02397 the_value = (WB_ULONG) (((*data)[0] << 6) | (((*data)[1] >> 2) && 0x3F));
02398 sprintf(the_year, "%u", the_value);
02399
02400
02401 the_value = (WB_ULONG) ((((*data)[1] && 0x03) << 2) | (((*data)[2] >> 6) && 0x3F));
02402 sprintf(the_month, "%u", the_value);
02403
02404
02405 the_value = (WB_ULONG) (((*data)[2] >> 1) && 0x1F);
02406 sprintf(the_date, "%u", the_value);
02407
02408
02409 the_value = (WB_ULONG) ((((*data)[2] && 0x01) << 4) | (((*data)[3] >> 4) && 0x0F));
02410 sprintf(the_hour, "%u", the_value);
02411
02412
02413 the_value = (WB_ULONG) ((((*data)[3] && 0x0F) << 2) | (((*data)[4] >> 6) && 0x03));
02414 sprintf(the_minute, "%u", the_value);
02415
02416
02417 the_value = (WB_ULONG) ((*data)[4] && 0x3F);
02418 sprintf(the_second, "%u", the_value);
02419
02420
02421 if ((*data)[5] == 'Z')
02422 sprintf((WB_TINY *) result, "%s%s%sT%s%s%sZ", the_year, the_month, the_date, the_hour, the_minute, the_second);
02423 else
02424 sprintf((WB_TINY *) result, "%s%s%sT%s%s%s", the_year, the_month, the_date, the_hour, the_minute, the_second);
02425
02426 *data = result;
02427 *len = WBXML_STRLEN(result);
02428
02429 return WBXML_OK;
02430 }
02431
02432 #endif
02433
02434
02435 #if defined( WBXML_SUPPORT_DRMREL )
02436
02437
02438
02439
02440
02454 static WBXMLError decode_drmrel_keyvalue(WB_UTINY **data, WB_LONG *len)
02455 {
02456 WB_UTINY *result = NULL;
02457
02458 if ((data == NULL) || (*data == NULL) || (len == NULL))
02459 return WBXML_ERROR_INTERNAL;
02460
02461 if ((result = wbxml_base64_encode((const WB_UTINY *) *data, *len)) == NULL)
02462 return WBXML_ERROR_B64_ENC;
02463
02464 *data = result;
02465 *len = WBXML_STRLEN(result);
02466
02467 return WBXML_OK;
02468 }
02469
02470 #endif