diff --git a/c_gen/c_code_gen.py b/c_gen/c_code_gen.py index 09c4f2f9f..472222d8d 100644 --- a/c_gen/c_code_gen.py +++ b/c_gen/c_code_gen.py @@ -69,11 +69,11 @@ def member_returns_val(cls, m_name): Should get accessor return a value rather than void @param cls The class name @param m_name The member name - @return True if of_g config and the specific member allow a + @return True if of_g config and the specific member allow a return value. Otherwise False """ m_type = of_g.unified[cls]["union"][m_name]["m_type"] - return (config_check("get_returns") =="value" and + return (config_check("get_returns") =="value" and m_type in of_g.of_scalar_types) # TODO serialize match outside accessor? @@ -139,7 +139,7 @@ def identifiers_gen(out, filename): /** * For each identifier from an OpenFlow header file, a Loxi version * of the identifier is generated. For example, ofp_port_flood becomes - * OF_PORT_DEST_FLOOD. Loxi provides the following macros related to + * OF_PORT_DEST_FLOOD. Loxi provides the following macros related to * OpenFlow identifiers (using OF_IDENT_ as an example below): * OF_IDENT_BY_VERSION(version) Get the value for the specific version * OF_IDENT_SUPPORTED(version) Boolean: Is OF_IDENT defined for version @@ -248,7 +248,7 @@ def identifiers_gen(out, filename): idents.sort() out.write(""" /**************************************************************** - * Identifiers from %s + * Identifiers from %s *****************************************************************/ """ % group) for ident in idents: @@ -578,10 +578,10 @@ def gen_len_offset_macros(out): /* * Match structs in 1.2 come at the end of the fixed length part * of structures. They add 8 bytes to the minimal length of the - * message, but are also variable length. This means that the - * type/length offsets are 8 bytes back from the end of the fixed - * length part of the object. The right way to handle this is to - * expose the offset of the match member more explicitly. For now, + * message, but are also variable length. This means that the + * type/length offsets are 8 bytes back from the end of the fixed + * length part of the object. The right way to handle this is to + * expose the offset of the match member more explicitly. For now, * we make the calculation as described here. */ @@ -613,7 +613,7 @@ def gen_len_offset_macros(out): * @param obj An object with a match member * @param match_offset The wire offset of the match object. * - * See above; for 1.2, + * See above; for 1.2, * The match length is raw bytes but the actual space it takes * up is padded for alignment to 64-bits */ @@ -1187,7 +1187,7 @@ def gen_version_enum(out): */ #define OF_VERSION_ARRAY_MAX %d """ % (max + 1)) - + def gen_object_enum(out): """ Generate the enumerated type for object identification in LoxiGen @@ -1474,7 +1474,7 @@ def field_ver_get(cls, m_name): def v3_match_offset_get(cls): """ - Return the offset of an OF 1.2 match in an object if it has such; + Return the offset of an OF 1.2 match in an object if it has such; otherwise return -1 """ result = field_ver_get(cls, "match") @@ -1653,7 +1653,7 @@ def gen_flow_add_setup_function_declarations(out): * idle_timeout * hard_timeout * - * Note that the actions/instructions of a flow may be modified by a + * Note that the actions/instructions of a flow may be modified by a * subsequent flow modify message. To facilitate implementations, * the "effects" parameter is provided. If effects is NULL, the * actions/instructions are taken from the flow_add message. @@ -1817,7 +1817,7 @@ def gen_bind(out, cls, m_name, m_type): %(i_call)s; /* Derive offset and length of child in parent */ - OF_TRY(of_object_child_attach(parent, child, + OF_TRY(of_object_child_attach(parent, child, if ((rv = of_list_first((of_object_t *)list, (of_object_t *)obj)) < 0) { return rv; } @@ -1849,7 +1849,7 @@ def gen_list_next(out, cls, e_type): len_str = "obj->header.length" else: len_str = "obj->length" - + out.write(""" /** * Advance an iterator to the next element in a list @@ -1936,7 +1936,7 @@ def gen_list_accessors(out, cls): # ################################################################ - + def gen_accessor_declarations(out): """ Generate the declaration of each version independent accessor @@ -1987,7 +1987,7 @@ def gen_accessor_declarations(out): %(gparams)s); """ % dict(base_name=base_name, gparams=gparams, sparams=sparams, get_ret_type=get_ret_type, set_ret_type=set_ret_type)) - + if loxi_utils.class_is_list(cls): e_type = loxi_utils.list_to_entry_type(cls) out.write(""" @@ -2101,7 +2101,7 @@ def length_of(m_type, version): return of_g.base_length[(m_type[:-2], version)] print "Unknown length request", m_type, version sys.exit(1) - + def gen_get_accessor_body(out, cls, m_type, m_name): """ @@ -2231,7 +2231,7 @@ def gen_of_object_get(out, cls, m_name, m_type): sub_cls = m_type[:-2] out.write(""" /** - * Create a copy of %(m_name)s into a new variable of type %(m_type)s from + * Create a copy of %(m_name)s into a new variable of type %(m_type)s from * a %(cls)s instance. * * @param obj Pointer to the source of type %(cls)s_t @@ -2602,7 +2602,7 @@ def gen_init_fn_body(cls, out): * If bytes < 0, then the default fixed length is used for the object * * This is a "coerce" function that sets up the pointers for the - * accessors properly. + * accessors properly. * * If anything other than 0 is passed in for the buffer size, the underlying * wire buffer will have 'grow' called. @@ -2679,7 +2679,7 @@ def gen_wire_push_fn(cls, out): %(name)s)); } """ % dict(name = enum_name(cls))) - + for version in of_g.of_version_range: if type_maps.class_is_extension(cls, version): exp_name = type_maps.extension_to_experimenter_macro_name(cls) @@ -2697,7 +2697,7 @@ def gen_wire_push_fn(cls, out): } """ % dict(exp_name=exp_name, version=of_g.wire_ver_map[version], subtype=str(subtype))) - + else: # Not a message if loxi_utils.class_is_tlv16(cls): out.write(""" @@ -2714,7 +2714,7 @@ def gen_wire_push_fn(cls, out): /* Extended TLV obj; Call specific accessor */ of_extension_object_id_set(obj, %(enum)s); """ % dict(cls=cls, enum=enum_name(cls))) - + if loxi_utils.class_is_oxm(cls): out.write("""\ @@ -3175,7 +3175,7 @@ def gen_accessor_doc(out, name): out.write(""" /** - * Structure for %(cls)s object. Get/set + * Structure for %(cls)s object. Get/set * accessors available in all versions unless noted otherwise * """ % dict(cls=cls)) @@ -3310,7 +3310,7 @@ def gen_code_samples(out, name): """) def gen_jump_table_template(out=sys.stdout, all_unhandled=True, - cxn_type="ls_cxn_handle_t", + cxn_type="ls_cxn_handle_t", unhandled="unhandled_message"): """ Generate a template for a jump table. @@ -3340,11 +3340,11 @@ def gen_jump_table_template(out=sys.stdout, all_unhandled=True, if not all_unhandled: fn_name = "%s_handler" % cls[3:] out.write(" %s%s /* %s */\n" % (fn_name, comma, enum_name(cls))) - + out.write("};\n") def gen_message_switch_stmt_tmeplate(out=sys.stdout, all_unhandled=True, - cxn_type="ls_cxn_handle_t", + cxn_type="ls_cxn_handle_t", unhandled="unhandled_message"): out.write(""" /* @@ -3533,7 +3533,7 @@ def gen_setup_from_add_fns(out): of_flow_add_idle_timeout_get(flow_add, &val16); of_flow_removed_idle_timeout_set(obj, val16); - + if (obj->version >= OF_VERSION_1_2) { of_flow_add_hard_timeout_get(flow_add, &val16); of_flow_removed_hard_timeout_set(obj, val16); @@ -3550,13 +3550,13 @@ def gen_setup_from_add_fns(out): { switch (obj->version) { case OF_VERSION_1_0: - return flow_removed_setup_from_flow_add_common(obj, flow_add, + return flow_removed_setup_from_flow_add_common(obj, flow_add, 8, 8); break; case OF_VERSION_1_1: case OF_VERSION_1_2: case OF_VERSION_1_3: - return flow_removed_setup_from_flow_add_common(obj, flow_add, + return flow_removed_setup_from_flow_add_common(obj, flow_add, 48, 48); break; default: @@ -3625,7 +3625,7 @@ def gen_setup_from_add_fns(out): case OF_VERSION_1_1: case OF_VERSION_1_2: case OF_VERSION_1_3: - return flow_stats_entry_setup_from_flow_add_common(obj, flow_add, + return flow_stats_entry_setup_from_flow_add_common(obj, flow_add, effects, 48, 48); break; default: diff --git a/c_gen/c_dump_gen.py b/c_gen/c_dump_gen.py index 08abae0b6..aae2dc47a 100644 --- a/c_gen/c_dump_gen.py +++ b/c_gen/c_dump_gen.py @@ -49,7 +49,7 @@ def gen_obj_dump_h(out, name): * * AUTOMATICALLY GENERATED FILE. Edits will be lost on regen. * - * Header file for object dumping. + * Header file for object dumping. */ /** @@ -75,9 +75,9 @@ def gen_obj_dump_h(out, name): /** - * Dump any OF object. + * Dump any OF object. */ -int of_object_dump(loci_writer_f writer, void* cookie, of_object_t* obj); +int of_object_dump(loci_writer_f writer, void* cookie, of_object_t* obj); @@ -110,8 +110,8 @@ def gen_obj_dump_c(out, name): * * AUTOMATICALLY GENERATED FILE. Edits will be lost on regen. * - * Source file for object dumping. - * + * Source file for object dumping. + * */ #define DISABLE_WARN_UNUSED_RESULT @@ -122,9 +122,9 @@ def gen_obj_dump_c(out, name): static int unknown_dump(loci_writer_f writer, void* cookie, of_object_t *obj) { - return writer(cookie, "Unable to print object of type %d, version %d\\n", + return writer(cookie, "Unable to print object of type %d, version %d\\n", obj->object_id, obj->version); -} +} """) for version in of_g.of_version_range: @@ -188,7 +188,7 @@ def gen_obj_dump_c(out, name): out.write(""" %(cls)s_%(m_name)s_bind(obj, &%(v_name)s); out += %(sub_cls)s_%(ver_name)s_dump(writer, cookie, &%(v_name)s); -""" % dict(cls=cls, sub_cls=sub_cls, m_name=m_name, +""" % dict(cls=cls, sub_cls=sub_cls, m_name=m_name, v_name=var_name_map(m_type), ver_name=ver_name)) out.write(""" @@ -236,11 +236,11 @@ def gen_obj_dump_c(out, name): if j < len(of_g.all_class_order) - 1: # Avoid ultimate comma comma = "," - if (not loxi_utils.class_in_version(cls, version) or + if (not loxi_utils.class_in_version(cls, version) or cls in type_maps.inheritance_map): out.write(" unknown_dump%s\n" % comma); else: - out.write(" %s_%s_dump%s\n" % + out.write(" %s_%s_dump%s\n" % (cls, loxi_utils.version_to_name(version), comma)) out.write("};\n\n") diff --git a/c_gen/c_match.py b/c_gen/c_match.py index 8c27bb5d8..18686c474 100644 --- a/c_gen/c_match.py +++ b/c_gen/c_match.py @@ -600,9 +600,9 @@ def gen_unified_match_to_v3(out): of_oxm_%(key)s_masked_init(elt, src->version, -1, 1); of_list_oxm_append_bind(oxm_list, &oxm_entry); - of_oxm_%(key)s_masked_value_set(elt, + of_oxm_%(key)s_masked_value_set(elt, src->fields.%(key)s); - of_oxm_%(key)s_masked_value_mask_set(elt, + of_oxm_%(key)s_masked_value_mask_set(elt, src->masks.%(key)s); } else { /* Active, but not masked */ of_oxm_%(key)s_t *elt; @@ -900,7 +900,7 @@ def gen_deserialize(out): of_match_v%(version)d_t wire_match; of_match_v%(version)d_init(&wire_match, %(ver_name)s, -1, 1); - of_object_buffer_bind((of_object_t *)&wire_match, + of_object_buffer_bind((of_object_t *)&wire_match, octets->data, octets->bytes, NULL); OF_TRY(of_match_v%(version)d_to_match(&wire_match, match)); @@ -958,7 +958,7 @@ def gen_match_comp(out=sys.stdout): int idx; for (idx = 0; idx < OF_IPV6_BYTES; idx++) { - if ((v1->addr[idx] & mask->addr[idx]) != + if ((v1->addr[idx] & mask->addr[idx]) != (v2->addr[idx] & mask->addr[idx])) { return 0; } @@ -977,7 +977,7 @@ def gen_match_comp(out=sys.stdout): int idx; for (idx = 0; idx < OF_IPV6_BYTES; idx++) { - if (((v1->addr[idx] & m1->addr[idx]) & m2->addr[idx]) != + if (((v1->addr[idx] & m1->addr[idx]) & m2->addr[idx]) != ((v2->addr[idx] & m1->addr[idx]) & m2->addr[idx])) { return 0; } @@ -1020,12 +1020,12 @@ def gen_match_comp(out=sys.stdout): * Boolean test if two values agree when restricted to a mask */ static inline int -of_restricted_match_mac_addr(of_mac_addr_t *v1, of_mac_addr_t *v2, +of_restricted_match_mac_addr(of_mac_addr_t *v1, of_mac_addr_t *v2, of_mac_addr_t *mask) { int idx; for (idx = 0; idx < OF_MAC_ADDR_BYTES; idx++) { - if ((v1->addr[idx] & mask->addr[idx]) != + if ((v1->addr[idx] & mask->addr[idx]) != (v2->addr[idx] & mask->addr[idx])) { return 0; } @@ -1044,7 +1044,7 @@ def gen_match_comp(out=sys.stdout): int idx; for (idx = 0; idx < OF_MAC_ADDR_BYTES; idx++) { - if (((v1->addr[idx] & m1->addr[idx]) & m2->addr[idx]) != + if (((v1->addr[idx] & m1->addr[idx]) & m2->addr[idx]) != ((v2->addr[idx] & m1->addr[idx]) & m2->addr[idx])) { return 0; } @@ -1147,7 +1147,7 @@ def gen_match_comp(out=sys.stdout): %(q_m)s)) { return 0; } -""" % dict(match_type=match_type, comp=comp, q_f=q_f, e_f=e_f, +""" % dict(match_type=match_type, comp=comp, q_f=q_f, e_f=e_f, q_m=q_m, e_m=e_m, key=key)) out.write(""" @@ -1193,7 +1193,7 @@ def gen_match_comp(out=sys.stdout): f2 = "f2->%s" % key out.write(""" /* Check overlap for %(key)s */ - if (!%(check)s(%(f1)s, %(f2)s, + if (!%(check)s(%(f1)s, %(f2)s, %(m2)s, %(m1)s)) { return 0; /* This field differentiates; all done */ } diff --git a/c_gen/c_show_gen.py b/c_gen/c_show_gen.py index 14709abf0..0ec81b4dc 100644 --- a/c_gen/c_show_gen.py +++ b/c_gen/c_show_gen.py @@ -49,7 +49,7 @@ def gen_obj_show_h(out, name): * * AUTOMATICALLY GENERATED FILE. Edits will be lost on regen. * - * Header file for object showing. + * Header file for object showing. */ /** @@ -75,9 +75,9 @@ def gen_obj_show_h(out, name): /** - * Show any OF object. + * Show any OF object. */ -int of_object_show(loci_writer_f writer, void* cookie, of_object_t* obj); +int of_object_show(loci_writer_f writer, void* cookie, of_object_t* obj); @@ -110,8 +110,8 @@ def gen_obj_show_c(out, name): * * AUTOMATICALLY GENERATED FILE. Edits will be lost on regen. * - * Source file for object showing. - * + * Source file for object showing. + * */ #define DISABLE_WARN_UNUSED_RESULT @@ -122,9 +122,9 @@ def gen_obj_show_c(out, name): static int unknown_show(loci_writer_f writer, void* cookie, of_object_t *obj) { - return writer(cookie, "Unable to print object of type %d, version %d\\n", + return writer(cookie, "Unable to print object of type %d, version %d\\n", obj->object_id, obj->version); -} +} """) for version in of_g.of_version_range: @@ -158,7 +158,7 @@ def gen_obj_show_c(out, name): m_type = member["m_type"] m_name = member["name"] #emitter = "LOCI_SHOW_" + loxi_utils.type_to_short_name(m_type) - emitter = "LOCI_SHOW_" + loxi_utils.type_to_short_name(m_type) + "_" + m_name; + emitter = "LOCI_SHOW_" + loxi_utils.type_to_short_name(m_type) + "_" + m_name; if loxi_utils.skip_member_name(m_name): continue if (loxi_utils.type_is_scalar(m_type) or @@ -167,7 +167,7 @@ def gen_obj_show_c(out, name): %(cls)s_%(m_name)s_get(obj, &%(v_name)s); out += writer(cookie, "%(m_name)s="); out += %(emitter)s(writer, cookie, %(v_name)s); - out += writer(cookie, " "); + out += writer(cookie, " "); """ % dict(cls=cls, m_name=m_name, m_type=m_type, v_name=var_name_map(m_type), emitter=emitter)) elif loxi_utils.class_is_list(m_type): @@ -179,7 +179,7 @@ def gen_obj_show_c(out, name): %(u_type)s_ITER(&%(v_name)s, &elt, rv) { of_object_show(writer, cookie, (of_object_t *)&elt); } - out += writer(cookie, "} "); + out += writer(cookie, "} "); """ % dict(sub_cls=sub_cls, u_type=sub_cls.upper(), v_name=var_name_map(m_type), elt_type=elt_type, cls=cls, m_name=m_name, m_type=m_type)) else: @@ -187,7 +187,7 @@ def gen_obj_show_c(out, name): out.write(""" %(cls)s_%(m_name)s_bind(obj, &%(v_name)s); out += %(sub_cls)s_%(ver_name)s_show(writer, cookie, &%(v_name)s); -""" % dict(cls=cls, sub_cls=sub_cls, m_name=m_name, +""" % dict(cls=cls, sub_cls=sub_cls, m_name=m_name, v_name=var_name_map(m_type), ver_name=ver_name)) out.write(""" @@ -207,12 +207,12 @@ def gen_obj_show_c(out, name): for key, entry in match.of_match_members.items(): m_type = entry["m_type"] #emitter = "LOCI_SHOW_" + loxi_utils.type_to_short_name(m_type) - emitter = "LOCI_SHOW_" + loxi_utils.type_to_short_name(m_type) + "_" + key; + emitter = "LOCI_SHOW_" + loxi_utils.type_to_short_name(m_type) + "_" + key; out.write(""" if (OF_MATCH_MASK_%(ku)s_ACTIVE_TEST(match)) { - out += writer(cookie, "%(key)s active="); + out += writer(cookie, "%(key)s active="); out += %(emitter)s(writer, cookie, match->fields.%(key)s); - out += writer(cookie, "/"); + out += writer(cookie, "/"); out += %(emitter)s(writer, cookie, match->masks.%(key)s); out += writer(cookie, " "); } @@ -234,11 +234,11 @@ def gen_obj_show_c(out, name): if j < len(of_g.all_class_order) - 1: # Avoid ultimate comma comma = "," - if (not loxi_utils.class_in_version(cls, version) or + if (not loxi_utils.class_in_version(cls, version) or cls in type_maps.inheritance_map): out.write(" unknown_show%s\n" % comma); else: - out.write(" %s_%s_show%s\n" % + out.write(" %s_%s_show%s\n" % (cls, loxi_utils.version_to_name(version), comma)) out.write("};\n\n") diff --git a/c_gen/c_test_gen.py b/c_gen/c_test_gen.py index 97cc78d66..3609f98bb 100644 --- a/c_gen/c_test_gen.py +++ b/c_gen/c_test_gen.py @@ -83,11 +83,11 @@ def var_name_map(m_type): of_fm_cmd_t="fm_cmd", of_wc_bmap_t="wc_bmap", of_match_bmap_t = "match_bmap", - of_port_name_t="port_name", + of_port_name_t="port_name", of_table_name_t="table_name", of_desc_str_t="desc_str", - of_serial_num_t="ser_num", - of_mac_addr_t="mac_addr", + of_serial_num_t="ser_num", + of_mac_addr_t="mac_addr", of_ipv6_t="ipv6", # Non-scalars; more TBD of_octets_t="octets", @@ -104,7 +104,7 @@ def var_name_map(m_type): "of_port_no_t", "of_fm_cmd_t", "of_wc_bmap_t", "of_match_bmap_t"] string_types = [ "of_port_name_t", "of_table_name_t", - "of_desc_str_t", "of_serial_num_t", "of_mac_addr_t", + "of_desc_str_t", "of_serial_num_t", "of_mac_addr_t", "of_ipv6_t"] scalar_types = integer_types[:] @@ -240,7 +240,7 @@ def gen_fill_string(out): for key, entry in match.of_match_members.items(): out.write(""" - if (!(of_match_incompat[version] & + if (!(of_match_incompat[version] & OF_OXM_BIT(OF_OXM_INDEX_%(ku)s))) { OF_MATCH_MASK_%(ku)s_EXACT_SET(match); VAR_%(u_type)s_INIT(match->fields.%(key)s, value); @@ -362,7 +362,7 @@ def gen_common_test_header(out, name): * Declarations for list population and check primitives */ """) - + for version in of_g.of_version_range: for cls in of_g.ordered_list_objects: if cls in type_maps.inheritance_map: @@ -411,7 +411,7 @@ def gen_common_test(out, name): int exit_on_error = 1; /** - * Global error state: 0 is okay, 1 is error + * Global error state: 0 is okay, 1 is error */ int global_error = 0; @@ -501,7 +501,7 @@ def gen_message_scalar_test(out, name): out.write(" RUN_TEST(%s_scalar);\n" % test_name) out.write(" return TEST_PASS;\n}\n"); - + def message_scalar_test(out, version, cls): """ Generate one test case for the given version and class @@ -523,7 +523,7 @@ def message_scalar_test(out, version, cls): TEST_ASSERT(obj->length == %(length)d); TEST_ASSERT(obj->parent == NULL); TEST_ASSERT(obj->object_id == %(u_cls)s); -""" % dict(cls=cls, u_cls=cls.upper(), +""" % dict(cls=cls, u_cls=cls.upper(), v_name=v_name, length=length, version=version)) if not type_maps.class_is_virtual(cls): out.write(""" @@ -539,7 +539,7 @@ def message_scalar_test(out, version, cls): /* Check values just set */ TEST_ASSERT(%(cls)s_%(v_name)s_check_scalars(obj, 1) != 0); -""" % dict(cls=cls, u_cls=cls.upper(), +""" % dict(cls=cls, u_cls=cls.upper(), v_name=v_name, length=length, version=version)) out.write(""" @@ -566,7 +566,7 @@ def scalar_member_types_get(cls, version): for member in members: m_type = member["m_type"] m_name = member["name"] - if (not loxi_utils.type_is_scalar(m_type) or + if (not loxi_utils.type_is_scalar(m_type) or ignore_member(cls, version, m_name, m_type)): continue if not m_type in member_types: @@ -580,8 +580,8 @@ def scalar_funs_instance(out, cls, version, members, member_types): """ out.write(""" /** - * Populate the scalar values in obj of type %(cls)s, - * version %(v_name)s + * Populate the scalar values in obj of type %(cls)s, + * version %(v_name)s * @param obj Pointer to an object to populate * @param value The seed value to use in populating the object * @returns The value after increments for this object's values @@ -608,11 +608,11 @@ def scalar_funs_instance(out, cls, version, members, member_types): return value; } """) - + out.write(""" /** - * Check scalar values in obj of type %(cls)s, - * version %(v_name)s + * Check scalar values in obj of type %(cls)s, + * version %(v_name)s * @param obj Pointer to an object to check * @param value Starting value for checking * @returns The value after increments for this object's values @@ -644,7 +644,7 @@ def scalar_funs_instance(out, cls, version, members, member_types): def gen_scalar_set_check_funs(out): """ - For each object class with scalar members, generate functions that + For each object class with scalar members, generate functions that set and check their values """ for version in of_g.of_version_range: @@ -658,7 +658,7 @@ def setup_instance(out, cls, subcls, instance, v_name, inst_len, version): base_type = loxi_utils.list_to_entry_type(cls) setup_template = """ %(subcls)s_init(%(inst)s, %(v_name)s, -1, 1); - %(cls)s_append_bind(list, + %(cls)s_append_bind(list, (%(base_type)s_t *)%(inst)s); value = %(subcls)s_%(v_name)s_populate( %(inst)s, value); @@ -670,8 +670,8 @@ def setup_instance(out, cls, subcls, instance, v_name, inst_len, version): """ % subcls) for i in range(2): out.write(setup_template % - dict(inst=instance, subcls=subcls, v_name=v_name, - base_type=base_type, cls=cls, inst_len=inst_len, + dict(inst=instance, subcls=subcls, v_name=v_name, + base_type=base_type, cls=cls, inst_len=inst_len, version=version)) def check_instance(out, cls, subcls, instance, v_name, inst_len, version, last): @@ -695,7 +695,7 @@ def check_instance(out, cls, subcls, instance, v_name, inst_len, version, last): """ out.write("\n /* Check two instances of type %s */" % instance) - out.write(check_template % + out.write(check_template % dict(elt_name=loxi_utils.enum_name(subcls), inst_len=inst_len, inst=instance, subcls=subcls, v_name=loxi_utils.version_to_name(version))) @@ -703,7 +703,7 @@ def check_instance(out, cls, subcls, instance, v_name, inst_len, version, last): TEST_OK(%(cls)s_next(list, &elt)); """ % dict(cls=cls)) - out.write(check_template % + out.write(check_template % dict(elt_name=loxi_utils.enum_name(subcls), inst_len=inst_len, inst=instance, subcls=subcls, v_name=loxi_utils.version_to_name(version))) @@ -735,7 +735,7 @@ def setup_list_fn(out, version, cls): %(base_type)s_t elt; int cur_len = 0; """ % dict(cls=cls, base_type=base_type)) - + sub_classes = type_maps.sub_class_map(base_type, version) v_name = loxi_utils.version_to_name(version) @@ -770,7 +770,7 @@ def check_list_fn(out, version, cls): """ out.write(""" /** - * Check a list of type %(cls)s generated by + * Check a list of type %(cls)s generated by * list_setup_%(cls)s_%(v_name)s */ int @@ -782,7 +782,7 @@ def check_list_fn(out, version, cls): out.write(""" %(base_type)s_t elt; """ % dict(cls=cls, base_type=base_type)) - + sub_classes = type_maps.sub_class_map(base_type, version) v_name = loxi_utils.version_to_name(version) @@ -804,7 +804,7 @@ def check_list_fn(out, version, cls): inst_len = -1 else: inst_len = loxi_utils.base_type_to_length(base_type, version) - check_instance(out, cls, base_type, "elt_p", v_name, inst_len, + check_instance(out, cls, base_type, "elt_p", v_name, inst_len, version, True) else: count = 0 @@ -814,7 +814,7 @@ def check_list_fn(out, version, cls): inst_len = -1 else: inst_len = of_g.base_length[(subcls, version)] - check_instance(out, cls, subcls, instance, v_name, inst_len, + check_instance(out, cls, subcls, instance, v_name, inst_len, version, count==len(sub_classes)) out.write(""" @@ -856,7 +856,7 @@ def list_test(out, version, cls): value = list_setup_%(cls)s_%(v_name)s(list, value); TEST_ASSERT(value != 0); -""" % dict(cls=cls, base_type=base_type, v_name=loxi_utils.version_to_name(version), +""" % dict(cls=cls, base_type=base_type, v_name=loxi_utils.version_to_name(version), enum_cls=loxi_utils.enum_name(cls))) out.write(""" @@ -890,7 +890,7 @@ def gen_list_test(out, name): #include """) - + for version in of_g.of_version_range: v_name = loxi_utils.version_to_name(version) out.write(""" @@ -1034,9 +1034,9 @@ def gen_match_test(out, name): out.write(""" /* Serialize to version %(v_name)s */ TEST_ASSERT((value = of_match_populate(&match1, %(v_name)s, value)) > 0); - TEST_ASSERT(of_match_serialize(%(v_name)s, &match1, &octets) == + TEST_ASSERT(of_match_serialize(%(v_name)s, &match1, &octets) == OF_ERROR_NONE); - TEST_ASSERT(of_match_deserialize(%(v_name)s, &match2, &octets) == + TEST_ASSERT(of_match_deserialize(%(v_name)s, &match2, &octets) == OF_ERROR_NONE); TEST_ASSERT(memcmp(&match1, &match2, sizeof(match1)) == 0); FREE(octets.data); @@ -1132,7 +1132,7 @@ def gen_msg_test(out, name): out.write(" RUN_TEST(%s);\n" % test_name) out.write("\n return TEST_PASS;\n}\n"); - + def gen_list_setup_check(out, cls, version): """ @@ -1156,7 +1156,7 @@ def gen_list_setup_check(out, cls, version): %(base_type)s_t elt; int cur_len = 0; """ % dict(cls=cls, base_type=base_type)) - + sub_classes = type_maps.sub_class_map(base_type, version) v_name = loxi_utils.version_to_name(version) @@ -1188,7 +1188,7 @@ def gen_list_setup_check(out, cls, version): else: for instance, subcls in sub_classes: inst_len = of_g.base_length[(subcls, version)] - setup_instance(out, cls, subcls, instance, v_name, + setup_instance(out, cls, subcls, instance, v_name, inst_len, version) out.write(""" return value; @@ -1196,7 +1196,7 @@ def gen_list_setup_check(out, cls, version): """) out.write(""" /** - * Check a list of type %(cls)s generated by + * Check a list of type %(cls)s generated by * %(cls)s_%(v_name)s_populate * @param list Pointer to the list that was populated * @param value Starting value for checking @@ -1213,7 +1213,7 @@ def gen_list_setup_check(out, cls, version): int count = 0; int rv; """ % dict(cls=cls, base_type=base_type)) - + sub_classes = type_maps.sub_class_map(base_type, version) v_name = loxi_utils.version_to_name(version) @@ -1238,7 +1238,7 @@ def gen_list_setup_check(out, cls, version): inst_len = -1 else: inst_len = loxi_utils.base_type_to_length(base_type, version) - check_instance(out, cls, base_type, "elt_p", v_name, inst_len, + check_instance(out, cls, base_type, "elt_p", v_name, inst_len, version, True) else: count = 0 @@ -1248,7 +1248,7 @@ def gen_list_setup_check(out, cls, version): inst_len = -1 else: inst_len = of_g.base_length[(subcls, version)] - check_instance(out, cls, subcls, instance, v_name, inst_len, + check_instance(out, cls, subcls, instance, v_name, inst_len, version, count==len(sub_classes)) out.write(""" """ % dict(base_type=base_type)) @@ -1332,7 +1332,7 @@ def gen_class_setup_check(out, cls, version): /* Test bind */ %(cls)s_%(m_name)s_bind(obj, &sub_cls); } -""" % dict(var_name=var_name_map(m_type), cls=cls, +""" % dict(var_name=var_name_map(m_type), cls=cls, m_name=m_name, sub_cls=sub_cls, v_name=loxi_utils.version_to_name(version))) @@ -1355,7 +1355,7 @@ def gen_class_setup_check(out, cls, version): TEST_ASSERT(value != 0); %(cls)s_%(m_name)s_set( obj, &%(var_name)s); -""" % dict(cls=cls, var_name=var_name_map(m_type), +""" % dict(cls=cls, var_name=var_name_map(m_type), m_name=m_name, v_name=loxi_utils.version_to_name(version))) elif m_type == "of_octets_t": out.write("""\ @@ -1498,7 +1498,7 @@ def unified_accessor_test_case(out, cls, version): TEST_ASSERT(obj->length == %(length)d); TEST_ASSERT(obj->parent == NULL); TEST_ASSERT(obj->object_id == %(u_cls)s); -""" % dict(cls=cls, u_cls=cls.upper(), +""" % dict(cls=cls, u_cls=cls.upper(), v_name=v_name, length=length, version=version)) if (not type_maps.class_is_virtual(cls)) or loxi_utils.class_is_list(cls): out.write(""" @@ -1522,7 +1522,7 @@ def unified_accessor_test_case(out, cls, version): /* Check values just set */ TEST_ASSERT(%(cls)s_%(v_name)s_check( obj, 1) != 0); -""" % dict(cls=cls, u_cls=cls.upper(), +""" % dict(cls=cls, u_cls=cls.upper(), v_name=v_name, length=length, version=version)) out.write(""" @@ -1602,7 +1602,7 @@ def gen_dup_list(out, cls, version): elt_type = loxi_utils.list_to_entry_type(cls) out.write(""" /** - * Duplicate a list of type %(cls)s + * Duplicate a list of type %(cls)s * using accessor functions * @param src Pointer to object to be duplicated * @returns A new object of type %(cls)s. @@ -1643,7 +1643,7 @@ def gen_dup_inheritance(out, cls, version): ver_name = loxi_utils.version_to_name(version) out.write(""" /** - * Duplicate a super class object of type %(cls)s + * Duplicate a super class object of type %(cls)s * @param src Pointer to object to be duplicated * @returns A new object of type %(cls)s. * @@ -1680,7 +1680,7 @@ def gen_dup_cls(out, cls, version): out.write(""" /** - * Duplicate an object of type %(cls)s + * Duplicate an object of type %(cls)s * using accessor functions * @param src Pointer to object to be duplicated * @returns A new object of type %(cls)s. @@ -1741,7 +1741,7 @@ def gen_dup_cls(out, cls, version): } %(cls)s_%(m_name)s_set(dst, dst_%(v_name)s); %(sub_cls)s_delete(dst_%(v_name)s); -""" % dict(sub_cls=sub_cls, cls=cls, m_name=m_name, +""" % dict(sub_cls=sub_cls, cls=cls, m_name=m_name, v_name=var_name_map(m_type), ver_name=ver_name)) out.write(""" @@ -1902,7 +1902,7 @@ def gen_log_test(out): of_object_dump((loci_writer_f)fprintf, out, obj); of_object_delete(obj); """ % dict(cls=cls, version=of_g.of_version_wire2name[version])) - + out.write(""" fclose(out); return TEST_PASS; diff --git a/c_gen/c_type_maps.py b/c_gen/c_type_maps.py index d790c85ed..578bf0ad0 100644 --- a/c_gen/c_type_maps.py +++ b/c_gen/c_type_maps.py @@ -61,46 +61,46 @@ def gen_object_id_to_type(out): comma = "," if cls in type_maps.stats_reply_list: - out.write(" %d%s /* %s */\n" % + out.write(" %d%s /* %s */\n" % (type_maps.type_val[("of_stats_reply", version)], comma, cls)) elif cls in type_maps.stats_request_list: - out.write(" %d%s /* %s */\n" % + out.write(" %d%s /* %s */\n" % (type_maps.type_val[("of_stats_request", version)], comma, cls)) elif cls in type_maps.flow_mod_list: - out.write(" %d%s /* %s */\n" % + out.write(" %d%s /* %s */\n" % (type_maps.type_val[("of_flow_mod", version)], comma, cls)) elif (cls, version) in type_maps.type_val: - out.write(" %d%s /* %s */\n" % + out.write(" %d%s /* %s */\n" % (type_maps.type_val[(cls, version)], comma, cls)) elif type_maps.message_is_extension(cls, version): - out.write(" %d%s /* %s */\n" % + out.write(" %d%s /* %s */\n" % (type_maps.type_val[("of_experimenter", version)], comma, cls)) elif type_maps.action_is_extension(cls, version): - out.write(" %d%s /* %s */\n" % + out.write(" %d%s /* %s */\n" % (type_maps.type_val[("of_action_experimenter", version)], comma, cls)) elif type_maps.action_id_is_extension(cls, version): - out.write(" %d%s /* %s */\n" % + out.write(" %d%s /* %s */\n" % (type_maps.type_val[("of_action_id_experimenter", version)], comma, cls)) elif type_maps.instruction_is_extension(cls, version): - out.write(" %d%s /* %s */\n" % + out.write(" %d%s /* %s */\n" % (type_maps.type_val[("of_instruction_experimenter", version)], comma, cls)) elif type_maps.queue_prop_is_extension(cls, version): - out.write(" %d%s /* %s */\n" % + out.write(" %d%s /* %s */\n" % (type_maps.type_val[("of_queue_prop_experimenter", version)], comma, cls)) elif type_maps.table_feature_prop_is_extension(cls, version): - out.write(" %d%s /* %s */\n" % + out.write(" %d%s /* %s */\n" % (type_maps.type_val[("of_table_feature_prop_experimenter", version)], comma, cls)) @@ -142,7 +142,7 @@ def gen_object_id_to_extension_data(out): if type_maps.class_is_extension(cls, version): exp_name = type_maps.extension_to_experimenter_macro_name(cls) subtype = type_maps.extension_to_subtype(cls, version) - out.write(" {1, %s, %d}%s /* %s */\n" % + out.write(" {1, %s, %d}%s /* %s */\n" % (exp_name, subtype, comma, cls)) else: out.write(" {0, 0, 0}%s /* %s (non-extension) */\n" % @@ -219,7 +219,7 @@ def gen_type_to_object_id(out, type_str, prefix, template, out.write(""" }; -""" % dict(name=type_str, u_name=type_str.upper(), +""" % dict(name=type_str, u_name=type_str.upper(), max_val=max_val, c_name=prefix.lower())) def gen_type_maps(out): @@ -238,7 +238,7 @@ def gen_type_maps(out): "OF_ACTION_ID_%s", type_maps.action_id_types, max_type_value) gen_type_to_object_id(out, "instruction_type_to_id", "OF_INSTRUCTION", - "OF_INSTRUCTION_%s", type_maps.instruction_types, + "OF_INSTRUCTION_%s", type_maps.instruction_types, max_type_value) gen_type_to_object_id(out, "queue_prop_type_to_id", "OF_QUEUE_PROP", "OF_QUEUE_PROP_%s", type_maps.queue_prop_types, @@ -301,10 +301,10 @@ def gen_type_to_obj_map_functions(out): * @param version The version associated with the check * @return The %(name)s OF object type * @return OF_OBJECT_INVALID if type does not map to an object - * + * */ static inline of_object_id_t -of_%(name)s_to_object_id(int %(name)s, of_version_t version) +of_%(name)s_to_object_id(int %(name)s, of_version_t version) { if (!OF_VERSION_OKAY(version)) { return OF_OBJECT_INVALID; @@ -332,10 +332,10 @@ def gen_type_to_obj_map_functions(out): * @param version The version associated with the check * @return The %(name)s OF object type * @return OF_OBJECT_INVALID if type does not map to an object - * + * */ static inline of_object_id_t -of_%(name)s_to_object_id(int %(name)s, of_version_t version) +of_%(name)s_to_object_id(int %(name)s, of_version_t version) { if (!OF_VERSION_OKAY(version)) { return OF_OBJECT_INVALID; @@ -367,10 +367,10 @@ def gen_type_to_obj_map_functions(out): * @param version The version associated with the check * @return The %(name)s OF object type * @return OF_OBJECT_INVALID if type does not map to an object - * + * */ static inline of_object_id_t -of_%(name)s_to_object_id(int %(name)s, of_version_t version) +of_%(name)s_to_object_id(int %(name)s, of_version_t version) { if (!OF_VERSION_OKAY(version)) { return OF_OBJECT_INVALID; @@ -417,7 +417,7 @@ def gen_type_to_obj_map_functions(out): for version, experimenter_lists in type_maps.extension_message_subtype.items(): for exp, subtypes in experimenter_lists.items(): experimenter_function += """ - if ((experimenter_id == OF_EXPERIMENTER_ID_%(exp_name)s) && + if ((experimenter_id == OF_EXPERIMENTER_ID_%(exp_name)s) && (version == %(ver_name)s)) { """ % dict(exp_name=exp.upper(), ver_name=of_g.wire_ver_map[version]) for ext_msg, subtype in subtypes.items(): @@ -510,69 +510,69 @@ def gen_type_to_obj_map_functions(out): # Action types array gen ar_len = type_maps.type_array_len(type_maps.action_types, max_type_value) - out.write(map_with_experimenter_template % + out.write(map_with_experimenter_template % dict(name="action", u_name="ACTION", ar_len=ar_len)) # Action ID types array gen ar_len = type_maps.type_array_len(type_maps.action_id_types, max_type_value) - out.write(map_with_experimenter_template % + out.write(map_with_experimenter_template % dict(name="action_id", u_name="ACTION_ID", ar_len=ar_len)) # Instruction types array gen ar_len = type_maps.type_array_len(type_maps.instruction_types, max_type_value) - out.write(map_with_experimenter_template % + out.write(map_with_experimenter_template % dict(name="instruction", u_name="INSTRUCTION", ar_len=ar_len)) # Queue prop types array gen ar_len = type_maps.type_array_len(type_maps.queue_prop_types, max_type_value) - out.write(map_with_experimenter_template % + out.write(map_with_experimenter_template % dict(name="queue_prop", u_name="QUEUE_PROP", ar_len=ar_len)) # Table feature prop types array gen ar_len = type_maps.type_array_len(type_maps.table_feature_prop_types, max_type_value) - out.write(map_with_experimenter_template % + out.write(map_with_experimenter_template % dict(name="table_feature_prop", u_name="TABLE_FEATURE_PROP", ar_len=ar_len)) # Meter band types array gen ar_len = type_maps.type_array_len(type_maps.meter_band_types, max_type_value) - out.write(map_with_experimenter_template % + out.write(map_with_experimenter_template % dict(name="meter_band", u_name="METER_BAND", ar_len=ar_len)) # Hello elem types array gen ar_len = type_maps.type_array_len(type_maps.hello_elem_types, max_type_value) - out.write(map_template % + out.write(map_template % dict(name="hello_elem", u_name="HELLO_ELEM", ar_len=ar_len)) # Stats types array gen ar_len = type_maps.type_array_len(type_maps.stats_types, max_type_value) - out.write(stats_template % + out.write(stats_template % dict(name="stats_reply", u_name="STATS_REPLY", ar_len=ar_len)) - out.write(stats_template % - dict(name="stats_request", u_name="STATS_REQUEST", + out.write(stats_template % + dict(name="stats_request", u_name="STATS_REQUEST", ar_len=ar_len)) ar_len = type_maps.type_array_len(type_maps.flow_mod_types, max_type_value) - out.write(map_template % + out.write(map_template % dict(name="flow_mod", u_name="FLOW_MOD", ar_len=ar_len)) ar_len = type_maps.type_array_len(type_maps.oxm_types, max_type_value) out.write(""" /* NOTE: We could optimize the OXM and only generate OF 1.2 versions. */ """) - out.write(map_template % + out.write(map_template % dict(name="oxm", u_name="OXM", ar_len=ar_len)) out.write(experimenter_function) # Must follow stats reply/request ar_len = type_maps.type_array_len(type_maps.message_types, max_type_value) - out.write(msg_template % + out.write(msg_template % dict(name="message", u_name="MESSAGE", ar_len=ar_len)) def gen_obj_to_type_map_functions(out): @@ -784,7 +784,7 @@ def gen_type_maps_header(out): out.write(""" /** - * Generic experimenter type value. Applies to all except + * Generic experimenter type value. Applies to all except * top level message: Action, instruction, error, stats, queue_props, oxm */ #define OF_EXPERIMENTER_TYPE 0xffff @@ -926,7 +926,7 @@ def gen_type_data_header(out): out.write(""" /** * Special length calculation for %(cls)s->%(name)s. - * @param obj An object of type %(cls)s to check for + * @param obj An object of type %(cls)s to check for * length of %(name)s * @param bytes[out] Where to store the calculated length * @@ -937,7 +937,7 @@ def gen_type_data_header(out): /** * Special offset calculation for %(cls)s->%(name)s. - * @param obj An object of type %(cls)s to check for + * @param obj An object of type %(cls)s to check for * length of %(name)s * @param offset[out] Where to store the calculated length * @@ -966,7 +966,7 @@ def gen_type_data_header(out): # */ # extern int of_length_%(s_cls)s_get( # %(cls)s_t *obj, int *bytes); -# """ % dict(cls=cls, s_cls=s_cls)) +# """ % dict(cls=cls, s_cls=s_cls)) out.write(""" /**************************************************************** @@ -992,15 +992,15 @@ def gen_type_data_header(out): extern void of_action_wire_object_id_get(of_object_t *obj, of_object_id_t *id); extern void of_action_id_wire_object_id_get(of_object_t *obj, of_object_id_t *id); -extern void of_instruction_wire_object_id_get(of_object_t *obj, +extern void of_instruction_wire_object_id_get(of_object_t *obj, of_object_id_t *id); -extern void of_queue_prop_wire_object_id_get(of_object_t *obj, +extern void of_queue_prop_wire_object_id_get(of_object_t *obj, of_object_id_t *id); -extern void of_table_feature_prop_wire_object_id_get(of_object_t *obj, +extern void of_table_feature_prop_wire_object_id_get(of_object_t *obj, of_object_id_t *id); -extern void of_meter_band_wire_object_id_get(of_object_t *obj, +extern void of_meter_band_wire_object_id_get(of_object_t *obj, of_object_id_t *id); -extern void of_hello_elem_wire_object_id_get(of_object_t *obj, +extern void of_hello_elem_wire_object_id_get(of_object_t *obj, of_object_id_t *id); /** @fixme VERIFY LENGTH IS NUMBER OF BYTES OF ENTRY INCLUDING HDR */ @@ -1064,7 +1064,7 @@ def gen_length_array(out): }; """) - + ################################################################ ################################################################ diff --git a/c_gen/c_validator_gen.py b/c_gen/c_validator_gen.py index 3ab6acf70..f617175ef 100644 --- a/c_gen/c_validator_gen.py +++ b/c_gen/c_validator_gen.py @@ -210,9 +210,9 @@ def gen_validator(out, cls, version): """ % dict(m_name=m_name, m_offset=m_offset, cls=cls)) else: out.write(""" - + { int %(m_name)s_len = len - %(m_offset)s; - + """ % dict(m_name=m_name, m_offset=m_offset)) out.write(""" if (%(m_cls)s_%(ver_name)s_validate(buf + %(m_offset)s, %(m_name)s_len) < 0) { @@ -240,7 +240,7 @@ def gen_list_validator(out, cls, version): subclasses = type_maps.inheritance_map[e_cls] out.write("""\ while (len >= %(fixed_len)s) { - of_object_id_t e_id; + of_object_id_t e_id; uint16_t e_type, e_len; buf_u16_get(buf, &e_type); buf_u16_get(buf+2, &e_len); diff --git a/loxi_front_end/flags.py b/loxi_front_end/flags.py index 3c401f9be..47a7a7e8e 100644 --- a/loxi_front_end/flags.py +++ b/loxi_front_end/flags.py @@ -73,4 +73,4 @@ def ident_is_flag(ident): return True return False - + diff --git a/loxi_front_end/oxm.py b/loxi_front_end/oxm.py index d4cf27329..ba5c07215 100644 --- a/loxi_front_end/oxm.py +++ b/loxi_front_end/oxm.py @@ -207,11 +207,11 @@ def add_oxm_classes_1_2(classes, version): members.append(dict(name="value", m_type=oxm_types[oxm])) if oxm.find("_masked") > 0: members.append(dict(name="value_mask", m_type=oxm_types[oxm])) - + name = "of_oxm_" + oxm of_g.ordered_classes[version].append(name) classes[name] = members - + # /* Header for OXM experimenter match fields. */ # struct ofp_oxm_experimenter_header { # uint32_t oxm_header; /* oxm_class = OFPXMC_EXPERIMENTER */ @@ -221,8 +221,8 @@ def add_oxm_classes_1_2(classes, version): # enum ofp_vlan_id { -# OFPVID_PRESENT = 0x1000, -# OFPVID_NONE = 0x0000, +# OFPVID_PRESENT = 0x1000, +# OFPVID_NONE = 0x0000, # }; # #define OFP_VLAN_NONE OFPVID_NONE diff --git a/loxi_front_end/translation.py b/loxi_front_end/translation.py index c889a5f82..5f62a0a97 100644 --- a/loxi_front_end/translation.py +++ b/loxi_front_end/translation.py @@ -31,7 +31,7 @@ import re import sys - + def loxi_name(ident): """ Return the LOXI name of an openflow.h identifier diff --git a/loxi_front_end/type_maps.py b/loxi_front_end/type_maps.py index df002c462..23452d330 100644 --- a/loxi_front_end/type_maps.py +++ b/loxi_front_end/type_maps.py @@ -636,7 +636,7 @@ def class_is_virtual(cls): ## # These are the objects whose length is specified by an external # reference, specifically another data member in the class. -# +# #external_length_spec = { # ("of_packet_out", "actions", OF_VERSION_1_0) : "actions_len", # ("of_packet_out", "actions", OF_VERSION_1_1) : "actions_len", @@ -647,7 +647,7 @@ def class_is_virtual(cls): ################################################################ # -# type_val is the primary data structure that maps an +# type_val is the primary data structure that maps an # (class_name, version) pair to the wire data type value # ################################################################ @@ -697,7 +697,7 @@ def type_array_len(version_indexed, max_val): Given versioned information about a type, calculate how long the unified array should be. - @param version_indexed A dict indexed by version. Each value is a + @param version_indexed A dict indexed by version. Each value is a dict indexed by a name and whose value is an integer @param max_val Ignore values greater than this for length calcs """ @@ -923,7 +923,7 @@ def extension_to_experimenter_name(cls): This is brute force; we search all extension data for a match """ - + for ext_obj in extension_objects: for version, exp_list in ext_obj.items(): for exp_name, classes in exp_list.items(): @@ -998,7 +998,7 @@ def cls_is_ext_obj(cls, version, ext_obj): return True return False - + ################################################################ # These are extension message specific ################################################################ diff --git a/loxi_utils/loxi_utils.py b/loxi_utils/loxi_utils.py index 5508c94c4..9edf0ac6b 100644 --- a/loxi_utils/loxi_utils.py +++ b/loxi_utils/loxi_utils.py @@ -28,7 +28,7 @@ """ @brief Utilities involving LOXI naming conventions -Utility functions for OpenFlow class generation +Utility functions for OpenFlow class generation These may need to be sorted out into language specific functions """ @@ -256,7 +256,7 @@ def list_to_entry_type(cls): Return the entry type for a list """ slen = len("of_list_") - return "of_" + cls[slen:] + return "of_" + cls[slen:] def type_to_short_name(m_type): if m_type in of_g.of_base_types: @@ -323,11 +323,11 @@ def member_returns_val(cls, m_name): Should get accessor return a value rather than void @param cls The class name @param m_name The member name - @return True if of_g config and the specific member allow a + @return True if of_g config and the specific member allow a return value. Otherwise False """ m_type = of_g.unified[cls]["union"][m_name]["m_type"] - return (config_check("get_returns") =="value" and + return (config_check("get_returns") =="value" and m_type in of_g.of_scalar_types) def config_check(str, dictionary = of_g.code_gen_config): @@ -356,7 +356,7 @@ def type_to_cof_type(m_type): return of_g.of_base_types[m_type]["cof_type"] return m_type - + def member_is_scalar(cls, m_name): return of_g.unified[cls]["union"][m_name]["m_type"] in of_g.of_scalar_types diff --git a/loxigen.py b/loxigen.py index 84afe76d4..ee92d6b3a 100755 --- a/loxigen.py +++ b/loxigen.py @@ -48,7 +48,7 @@ These are taken from current versions of openflow.h but are modified a bit. See Overview for more information. -Class canonical form: A list of entries, each of which is a +Class canonical form: A list of entries, each of which is a pair "type, name;". The exception is when type is the keyword 'list' in which the syntax is "list(type) name;". @@ -60,8 +60,8 @@ @fixme Clean up the lang module architecture. It should provide a list of files that it wants to generate and maps to the filenames, -subdirectory names and generation functions. It should also be -defined as a class, probably with the constructor taking the +subdirectory names and generation functions. It should also be +defined as a class, probably with the constructor taking the language target. @fixme Clean up global data structures such as versions and of_g @@ -109,10 +109,10 @@ def config_sanity_check(): # For now, only "error" supported for get returns if config_check("copy_semantics") != "read": debug("Only 'read' is supported for copy_semantics"); - rv = False + rv = False if config_check("get_returns") != "error": debug("Only 'error' is supported for get-accessor return types\m"); - rv = False + rv = False if not config_check("use_fn_ptrs") and not config_check("gen_unified_fns"): debug("Must have gen_fn_ptrs and/or gen_unified_fns set in config") rv = False @@ -125,12 +125,12 @@ def config_sanity_check(): debug("Conflict: Cannot generate unified functions and lower case \ unified macros") rv = False - + return rv def add_class(wire_version, cls, members): """ - Process a class for the given version and update the unified + Process a class for the given version and update the unified list of classes as needed. @param wire_version The wire version for this class defn @@ -153,7 +153,7 @@ def add_class(wire_version, cls, members): if wver == wire_version: continue if not "use_version" in uc[wver]: if sig == loxi_utils.class_signature(uc[wver]["members"]): - log("Matched %s, ver %d to ver %d" % + log("Matched %s, ver %d to ver %d" % (cls, wire_version, wver)) # have a match with existing version uc[wire_version]["use_version"] = wver @@ -268,13 +268,13 @@ def calculate_offsets_and_lengths(ordered_classes, classes, wire_version): if name == "pad": log("Skipping pad for special offset for %s" % cls) else: - log("SPECIAL OFS: Member %s (prev %s), class %s ver %d" % + log("SPECIAL OFS: Member %s (prev %s), class %s ver %d" % (name, last_name, cls, wire_version)) if (((cls, name) in of_g.special_offsets) and (of_g.special_offsets[(cls, name)] != last_name)): debug("ERROR: special offset prev name changed") debug(" cls %s. name %s. version %d. was %s. now %s" % - cls, name, wire_version, + cls, name, wire_version, of_g.special_offsets[(cls, name)], last_name) sys.exit(1) of_g.special_offsets[(cls, name)] = last_name @@ -292,13 +292,13 @@ def calculate_offsets_and_lengths(ordered_classes, classes, wire_version): log("offset gen skipping octets: %s.%s " % (cls, name)) offset = -1 else: - offset, len_update = update_offset(cls, wire_version, name, + offset, len_update = update_offset(cls, wire_version, name, offset, m_type) if offset != -1: fixed_offset = offset else: fixed_offset += len_update - log("offset is -1 for %s.%s version %d " % + log("offset is -1 for %s.%s version %d " % (cls, name, wire_version)) last_offset = offset last_name = name @@ -370,7 +370,7 @@ def order_and_assign_object_ids(): This is done to promote a reasonable order of the objects, putting messages first followed by non-messages. No assumptions should be made about the order, nor about contiguous numbering. However, the - numbers should all be reasonably small allowing arrays indexed by + numbers should all be reasonably small allowing arrays indexed by these enum values to be defined. """ @@ -510,7 +510,7 @@ def unify_input(): global versions - # Add classes to unified in wire-format order so that it is easier + # Add classes to unified in wire-format order so that it is easier # to generate things later keys = versions.keys() keys.sort(reverse=True) @@ -536,7 +536,7 @@ def log_all_class_info(): loxi_utils.class_is_var_len(cls,v) and "not fixed" or "fixed")) if "use_version" in of_g.unified[cls][v]: - log("cls %s: v %d mapped to %d" % (str(cls), v, + log("cls %s: v %d mapped to %d" % (str(cls), v, of_g.unified[cls][v]["use_version"])) if "members" in of_g.unified[cls][v]: for member in of_g.unified[cls][v]["members"]: diff --git a/of_g.py b/of_g.py index 6c8144fae..046273c8b 100644 --- a/of_g.py +++ b/of_g.py @@ -69,7 +69,7 @@ def lang_normalize(lang): """ - Normalize the representation of the language + Normalize the representation of the language """ return lang.lower() @@ -115,7 +115,7 @@ def process_commandline(default_vals=options_default): parser.add_option("-i", "--install-dir", default=default_vals["install-dir"], help="Directory to install generated files to (default %s)" % default_vals["install-dir"]) - parser.add_option("-v", "--version-list", + parser.add_option("-v", "--version-list", default=default_vals["version-list"], help="Specify the versions to target as 1.0 1.1 etc") @@ -130,7 +130,7 @@ def process_commandline(default_vals=options_default): # The dictionary of config variables related to code # # @param gen_unified_fns Boolean; Generate top level function definitions for -# accessors which are independent of the version; the alternative is to only +# accessors which are independent of the version; the alternative is to only # use the function pointers in the class definitions. These functions support # better inlining optimizations. # @@ -139,7 +139,7 @@ def process_commandline(default_vals=options_default): # unified (use_) functions # # @param use_obj_id Use object IDs in struct defns CURRENTLY NOT SUPPORTED -# +# # @param return_base_types For 'get' accessors, return values when possible. # Otherwise all values are returned thru a call by variable parameter # @@ -152,7 +152,7 @@ def process_commandline(default_vals=options_default): # @param encode_typedefs Use object and member IDs (rather than names) # when generating the names used for accessor function typedefs # -# @param get_returns One of "error", "value", or "void"; +# @param get_returns One of "error", "value", or "void"; # CURRENTLY ONLY "error" IS SUPPORTED. "error" means # all get operations return an error code. "value" means return a base_type # value when possible or void if not. "void" means always return void @@ -200,8 +200,8 @@ def process_commandline(default_vals=options_default): # the value is the name of the type to use for that version # # This is the map between the external type (like of_port_no_t) -# which is used by customers of this code and the internal -# datatypes (like uint16_t) that appear on the wire for a +# which is used by customers of this code and the internal +# datatypes (like uint16_t) that appear on the wire for a # particular version. # of_mixed_types = dict( @@ -282,17 +282,17 @@ def process_commandline(default_vals=options_default): short_name="desc_str"), of_serial_num_t = dict(bytes=ofp_constants["OF_SERIAL_NUM_LEN"], short_name="ser_num"), - of_match_v1_t = dict(bytes=40, to_w="match_v1_hton", - from_w="match_v1_ntoh", + of_match_v1_t = dict(bytes=40, to_w="match_v1_hton", + from_w="match_v1_ntoh", short_name="match_v1"), - of_match_v2_t = dict(bytes=88, to_w="match_v2_hton", - from_w="match_v2_ntoh", + of_match_v2_t = dict(bytes=88, to_w="match_v2_hton", + from_w="match_v2_ntoh", short_name="match_v2"), - of_match_v3_t = dict(bytes=-1, to_w="match_v3_hton", - from_w="match_v3_ntoh", + of_match_v3_t = dict(bytes=-1, to_w="match_v3_hton", + from_w="match_v3_ntoh", short_name="match_v3"), -# of_match_v4_t = dict(bytes=-1, to_w="match_v4_hton", -# from_w="match_v4_ntoh", +# of_match_v4_t = dict(bytes=-1, to_w="match_v4_hton", +# from_w="match_v4_ntoh", # short_name="match_v4"), of_octets_t = dict(bytes=-1, short_name="octets") ) @@ -300,7 +300,7 @@ def process_commandline(default_vals=options_default): of_scalar_types = ["char", "uint8_t", "uint16_t", "uint32_t", "uint64_t", "of_port_no_t", "of_fm_cmd_t", "of_wc_bmap_t", "of_match_bmap_t", "of_port_name_t", "of_table_name_t", - "of_desc_str_t", "of_serial_num_t", "of_mac_addr_t", + "of_desc_str_t", "of_serial_num_t", "of_mac_addr_t", "of_ipv6_t"] base_object_members = """\ @@ -394,7 +394,7 @@ def process_commandline(default_vals=options_default): ## Map from class, wire_version to size of fixed part of class base_length = {} -## Boolean indication of variable length, per class, wire_version, +## Boolean indication of variable length, per class, wire_version, is_fixed_length = set() ## The global object ID counter @@ -467,7 +467,7 @@ def process_commandline(default_vals=options_default): # # Experimenters, vendors, extensions # -# Although the term "experimenter" is used for identifying +# Although the term "experimenter" is used for identifying # external extension definitions, we generally use the term # extension when refering to the messages or objects themselves. # diff --git a/pyparsing.py b/pyparsing.py index 9be97dc28..ed34209c0 100644 --- a/pyparsing.py +++ b/pyparsing.py @@ -1,3749 +1,3749 @@ -# module pyparsing.py -# -# Copyright (c) 2003-2011 Paul T. McGuire -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -#from __future__ import generators - -__doc__ = \ -""" -pyparsing module - Classes and methods to define and execute parsing grammars - -The pyparsing module is an alternative approach to creating and executing simple grammars, -vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you -don't need to learn a new syntax for defining grammars or matching expressions - the parsing module -provides a library of classes that you use to construct the grammar directly in Python. - -Here is a program to parse "Hello, World!" (or any greeting of the form C{", !"}):: - - from pyparsing import Word, alphas - - # define grammar of a greeting - greet = Word( alphas ) + "," + Word( alphas ) + "!" - - hello = "Hello, World!" - print hello, "->", greet.parseString( hello ) - -The program outputs the following:: - - Hello, World! -> ['Hello', ',', 'World', '!'] - -The Python representation of the grammar is quite readable, owing to the self-explanatory -class names, and the use of '+', '|' and '^' operators. - -The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an -object with named attributes. - -The pyparsing module handles some of the problems that are typically vexing when writing text parsers: - - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) - - quoted strings - - embedded comments -""" - -__version__ = "1.5.6" -__versionTime__ = "26 June 2011 10:53" -__author__ = "Paul McGuire " - -import string -from weakref import ref as wkref -import copy -import sys -import warnings -import re -import sre_constants -#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) - -__all__ = [ -'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', -'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', -'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', -'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', -'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', -'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', -'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', -'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', -'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', -'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', -'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', -'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', -'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', -'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', -'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', -'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', -'indentedBlock', 'originalTextFor', -] - -""" -Detect if we are running version 3.X and make appropriate changes -Robert A. Clark -""" -_PY3K = sys.version_info[0] > 2 -if _PY3K: - _MAX_INT = sys.maxsize - basestring = str - unichr = chr - _ustr = str - alphas = string.ascii_lowercase + string.ascii_uppercase -else: - _MAX_INT = sys.maxint - range = xrange - set = lambda s : dict( [(c,0) for c in s] ) - alphas = string.lowercase + string.uppercase - - def _ustr(obj): - """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries - str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It - then < returns the unicode object | encodes it with the default encoding | ... >. - """ - if isinstance(obj,unicode): - return obj - - try: - # If this works, then _ustr(obj) has the same behaviour as str(obj), so - # it won't break any existing code. - return str(obj) - - except UnicodeEncodeError: - # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) - # state that "The return value must be a string object". However, does a - # unicode object (being a subclass of basestring) count as a "string - # object"? - # If so, then return a unicode object: - return unicode(obj) - # Else encode it... but how? There are many choices... :) - # Replace unprintables with escape codes? - #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') - # Replace unprintables with question marks? - #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') - # ... - - alphas = string.lowercase + string.uppercase - -# build list of single arg builtins, tolerant of Python version, that can be used as parse actions -singleArgBuiltins = [] -import __builtin__ -for fname in "sum len enumerate sorted reversed list tuple set any all".split(): - try: - singleArgBuiltins.append(getattr(__builtin__,fname)) - except AttributeError: - continue - -def _xml_escape(data): - """Escape &, <, >, ", ', etc. in a string of data.""" - - # ampersand must be replaced first - from_symbols = '&><"\'' - to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] - for from_,to_ in zip(from_symbols, to_symbols): - data = data.replace(from_, to_) - return data - -class _Constants(object): - pass - -nums = string.digits -hexnums = nums + "ABCDEFabcdef" -alphanums = alphas + nums -_bslash = chr(92) -printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) - -class ParseBaseException(Exception): - """base exception class for all parsing runtime exceptions""" - # Performance tuning: we construct a *lot* of these, so keep this - # constructor as small and fast as possible - def __init__( self, pstr, loc=0, msg=None, elem=None ): - self.loc = loc - if msg is None: - self.msg = pstr - self.pstr = "" - else: - self.msg = msg - self.pstr = pstr - self.parserElement = elem - - def __getattr__( self, aname ): - """supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text - """ - if( aname == "lineno" ): - return lineno( self.loc, self.pstr ) - elif( aname in ("col", "column") ): - return col( self.loc, self.pstr ) - elif( aname == "line" ): - return line( self.loc, self.pstr ) - else: - raise AttributeError(aname) - - def __str__( self ): - return "%s (at char %d), (line:%d, col:%d)" % \ - ( self.msg, self.loc, self.lineno, self.column ) - def __repr__( self ): - return _ustr(self) - def markInputline( self, markerString = ">!<" ): - """Extracts the exception line from the input string, and marks - the location of the exception with a special symbol. - """ - line_str = self.line - line_column = self.column - 1 - if markerString: - line_str = "".join( [line_str[:line_column], - markerString, line_str[line_column:]]) - return line_str.strip() - def __dir__(self): - return "loc msg pstr parserElement lineno col line " \ - "markInputLine __str__ __repr__".split() - -class ParseException(ParseBaseException): - """exception thrown when parse expressions don't match class; - supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text - """ - pass - -class ParseFatalException(ParseBaseException): - """user-throwable exception thrown when inconsistent parse content - is found; stops all parsing immediately""" - pass - -class ParseSyntaxException(ParseFatalException): - """just like C{ParseFatalException}, but thrown internally when an - C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because - an unbacktrackable syntax error has been found""" - def __init__(self, pe): - super(ParseSyntaxException, self).__init__( - pe.pstr, pe.loc, pe.msg, pe.parserElement) - -#~ class ReparseException(ParseBaseException): - #~ """Experimental class - parse actions can raise this exception to cause - #~ pyparsing to reparse the input string: - #~ - with a modified input string, and/or - #~ - with a modified start location - #~ Set the values of the ReparseException in the constructor, and raise the - #~ exception in a parse action to cause pyparsing to use the new string/location. - #~ Setting the values as None causes no change to be made. - #~ """ - #~ def __init_( self, newstring, restartLoc ): - #~ self.newParseText = newstring - #~ self.reparseLoc = restartLoc - -class RecursiveGrammarException(Exception): - """exception thrown by C{validate()} if the grammar could be improperly recursive""" - def __init__( self, parseElementList ): - self.parseElementTrace = parseElementList - - def __str__( self ): - return "RecursiveGrammarException: %s" % self.parseElementTrace - -class _ParseResultsWithOffset(object): - def __init__(self,p1,p2): - self.tup = (p1,p2) - def __getitem__(self,i): - return self.tup[i] - def __repr__(self): - return repr(self.tup) - def setOffset(self,i): - self.tup = (self.tup[0],i) - -class ParseResults(object): - """Structured parse results, to provide multiple means of access to the parsed data: - - as a list (C{len(results)}) - - by list index (C{results[0], results[1]}, etc.) - - by attribute (C{results.}) - """ - #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" ) - def __new__(cls, toklist, name=None, asList=True, modal=True ): - if isinstance(toklist, cls): - return toklist - retobj = object.__new__(cls) - retobj.__doinit = True - return retobj - - # Performance tuning: we construct a *lot* of these, so keep this - # constructor as small and fast as possible - def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ): - if self.__doinit: - self.__doinit = False - self.__name = None - self.__parent = None - self.__accumNames = {} - if isinstance(toklist, list): - self.__toklist = toklist[:] - else: - self.__toklist = [toklist] - self.__tokdict = dict() - - if name is not None and name: - if not modal: - self.__accumNames[name] = 0 - if isinstance(name,int): - name = _ustr(name) # will always return a str, but use _ustr for consistency - self.__name = name - if not toklist in (None,'',[]): - if isinstance(toklist,basestring): - toklist = [ toklist ] - if asList: - if isinstance(toklist,ParseResults): - self[name] = _ParseResultsWithOffset(toklist.copy(),0) - else: - self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) - self[name].__name = name - else: - try: - self[name] = toklist[0] - except (KeyError,TypeError,IndexError): - self[name] = toklist - - def __getitem__( self, i ): - if isinstance( i, (int,slice) ): - return self.__toklist[i] - else: - if i not in self.__accumNames: - return self.__tokdict[i][-1][0] - else: - return ParseResults([ v[0] for v in self.__tokdict[i] ]) - - def __setitem__( self, k, v, isinstance=isinstance ): - if isinstance(v,_ParseResultsWithOffset): - self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] - sub = v[0] - elif isinstance(k,int): - self.__toklist[k] = v - sub = v - else: - self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] - sub = v - if isinstance(sub,ParseResults): - sub.__parent = wkref(self) - - def __delitem__( self, i ): - if isinstance(i,(int,slice)): - mylen = len( self.__toklist ) - del self.__toklist[i] - - # convert int to slice - if isinstance(i, int): - if i < 0: - i += mylen - i = slice(i, i+1) - # get removed indices - removed = list(range(*i.indices(mylen))) - removed.reverse() - # fixup indices in token dictionary - for name in self.__tokdict: - occurrences = self.__tokdict[name] - for j in removed: - for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) - else: - del self.__tokdict[i] - - def __contains__( self, k ): - return k in self.__tokdict - - def __len__( self ): return len( self.__toklist ) - def __bool__(self): return len( self.__toklist ) > 0 - __nonzero__ = __bool__ - def __iter__( self ): return iter( self.__toklist ) - def __reversed__( self ): return iter( self.__toklist[::-1] ) - def keys( self ): - """Returns all named result keys.""" - return self.__tokdict.keys() - - def pop( self, index=-1 ): - """Removes and returns item at specified index (default=last). - Will work with either numeric indices or dict-key indicies.""" - ret = self[index] - del self[index] - return ret - - def get(self, key, defaultValue=None): - """Returns named result matching the given key, or if there is no - such name, then returns the given C{defaultValue} or C{None} if no - C{defaultValue} is specified.""" - if key in self: - return self[key] - else: - return defaultValue - - def insert( self, index, insStr ): - """Inserts new element at location index in the list of parsed tokens.""" - self.__toklist.insert(index, insStr) - # fixup indices in token dictionary - for name in self.__tokdict: - occurrences = self.__tokdict[name] - for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) - - def items( self ): - """Returns all named result keys and values as a list of tuples.""" - return [(k,self[k]) for k in self.__tokdict] - - def values( self ): - """Returns all named result values.""" - return [ v[-1][0] for v in self.__tokdict.values() ] - - def __getattr__( self, name ): - if True: #name not in self.__slots__: - if name in self.__tokdict: - if name not in self.__accumNames: - return self.__tokdict[name][-1][0] - else: - return ParseResults([ v[0] for v in self.__tokdict[name] ]) - else: - return "" - return None - - def __add__( self, other ): - ret = self.copy() - ret += other - return ret - - def __iadd__( self, other ): - if other.__tokdict: - offset = len(self.__toklist) - addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) - otheritems = other.__tokdict.items() - otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) - for (k,vlist) in otheritems for v in vlist] - for k,v in otherdictitems: - self[k] = v - if isinstance(v[0],ParseResults): - v[0].__parent = wkref(self) - - self.__toklist += other.__toklist - self.__accumNames.update( other.__accumNames ) - return self - - def __radd__(self, other): - if isinstance(other,int) and other == 0: - return self.copy() - - def __repr__( self ): - return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) - - def __str__( self ): - out = "[" - sep = "" - for i in self.__toklist: - if isinstance(i, ParseResults): - out += sep + _ustr(i) - else: - out += sep + repr(i) - sep = ", " - out += "]" - return out - - def _asStringList( self, sep='' ): - out = [] - for item in self.__toklist: - if out and sep: - out.append(sep) - if isinstance( item, ParseResults ): - out += item._asStringList() - else: - out.append( _ustr(item) ) - return out - - def asList( self ): - """Returns the parse results as a nested list of matching tokens, all converted to strings.""" - out = [] - for res in self.__toklist: - if isinstance(res,ParseResults): - out.append( res.asList() ) - else: - out.append( res ) - return out - - def asDict( self ): - """Returns the named parse results as dictionary.""" - return dict( self.items() ) - - def copy( self ): - """Returns a new copy of a C{ParseResults} object.""" - ret = ParseResults( self.__toklist ) - ret.__tokdict = self.__tokdict.copy() - ret.__parent = self.__parent - ret.__accumNames.update( self.__accumNames ) - ret.__name = self.__name - return ret - - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): - """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" - nl = "\n" - out = [] - namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() - for v in vlist ] ) - nextLevelIndent = indent + " " - - # collapse out indents if formatting is not desired - if not formatted: - indent = "" - nextLevelIndent = "" - nl = "" - - selfTag = None - if doctag is not None: - selfTag = doctag - else: - if self.__name: - selfTag = self.__name - - if not selfTag: - if namedItemsOnly: - return "" - else: - selfTag = "ITEM" - - out += [ nl, indent, "<", selfTag, ">" ] - - worklist = self.__toklist - for i,res in enumerate(worklist): - if isinstance(res,ParseResults): - if i in namedItems: - out += [ res.asXML(namedItems[i], - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted)] - else: - out += [ res.asXML(None, - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted)] - else: - # individual token, see if there is a name for it - resTag = None - if i in namedItems: - resTag = namedItems[i] - if not resTag: - if namedItemsOnly: - continue - else: - resTag = "ITEM" - xmlBodyText = _xml_escape(_ustr(res)) - out += [ nl, nextLevelIndent, "<", resTag, ">", - xmlBodyText, - "" ] - - out += [ nl, indent, "" ] - return "".join(out) - - def __lookup(self,sub): - for k,vlist in self.__tokdict.items(): - for v,loc in vlist: - if sub is v: - return k - return None - - def getName(self): - """Returns the results name for this token expression.""" - if self.__name: - return self.__name - elif self.__parent: - par = self.__parent() - if par: - return par.__lookup(self) - else: - return None - elif (len(self) == 1 and - len(self.__tokdict) == 1 and - self.__tokdict.values()[0][0][1] in (0,-1)): - return self.__tokdict.keys()[0] - else: - return None - - def dump(self,indent='',depth=0): - """Diagnostic method for listing out the contents of a C{ParseResults}. - Accepts an optional C{indent} argument so that this string can be embedded - in a nested display of other data.""" - out = [] - out.append( indent+_ustr(self.asList()) ) - keys = self.items() - keys.sort() - for k,v in keys: - if out: - out.append('\n') - out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) - if isinstance(v,ParseResults): - if v.keys(): - out.append( v.dump(indent,depth+1) ) - else: - out.append(_ustr(v)) - else: - out.append(_ustr(v)) - return "".join(out) - - # add support for pickle protocol - def __getstate__(self): - return ( self.__toklist, - ( self.__tokdict.copy(), - self.__parent is not None and self.__parent() or None, - self.__accumNames, - self.__name ) ) - - def __setstate__(self,state): - self.__toklist = state[0] - (self.__tokdict, - par, - inAccumNames, - self.__name) = state[1] - self.__accumNames = {} - self.__accumNames.update(inAccumNames) - if par is not None: - self.__parent = wkref(par) - else: - self.__parent = None - - def __dir__(self): - return dir(super(ParseResults,self)) + self.keys() - -def col (loc,strg): - """Returns current column within a string, counting newlines as line separators. - The first column is number 1. - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{ParserElement.parseString}} for more information - on parsing strings containing s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - return (loc} for more information - on parsing strings containing s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - return strg.count("\n",0,loc) + 1 - -def line( loc, strg ): - """Returns the line of text containing loc within a string, counting newlines as line separators. - """ - lastCR = strg.rfind("\n", 0, loc) - nextCR = strg.find("\n", loc) - if nextCR >= 0: - return strg[lastCR+1:nextCR] - else: - return strg[lastCR+1:] - -def _defaultStartDebugAction( instring, loc, expr ): - print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) - -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): - print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) - -def _defaultExceptionDebugAction( instring, loc, expr, exc ): - print ("Exception raised:" + _ustr(exc)) - -def nullDebugAction(*args): - """'Do-nothing' debug action, to suppress debugging output during parsing.""" - pass - -'decorator to trim function calls to match the arity of the target' -if not _PY3K: - def _trim_arity(func, maxargs=2): - limit = [0] - def wrapper(*args): - while 1: - try: - return func(*args[limit[0]:]) - except TypeError: - if limit[0] <= maxargs: - limit[0] += 1 - continue - raise - return wrapper -else: - def _trim_arity(func, maxargs=2): - limit = maxargs - def wrapper(*args): - #~ nonlocal limit - while 1: - try: - return func(*args[limit:]) - except TypeError: - if limit: - limit -= 1 - continue - raise - return wrapper - -class ParserElement(object): - """Abstract base level parser element class.""" - DEFAULT_WHITE_CHARS = " \n\t\r" - verbose_stacktrace = False - - def setDefaultWhitespaceChars( chars ): - """Overrides the default whitespace chars - """ - ParserElement.DEFAULT_WHITE_CHARS = chars - setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) - - def __init__( self, savelist=False ): - self.parseAction = list() - self.failAction = None - #~ self.name = "" # don't define self.name, let subclasses try/except upcall - self.strRepr = None - self.resultsName = None - self.saveAsList = savelist - self.skipWhitespace = True - self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS - self.copyDefaultWhiteChars = True - self.mayReturnEmpty = False # used when checking for left-recursion - self.keepTabs = False - self.ignoreExprs = list() - self.debug = False - self.streamlined = False - self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index - self.errmsg = "" - self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) - self.debugActions = ( None, None, None ) #custom debug actions - self.re = None - self.callPreparse = True # used to avoid redundant calls to preParse - self.callDuringTry = False - - def copy( self ): - """Make a copy of this C{ParserElement}. Useful for defining different parse actions - for the same parsing pattern, using copies of the original parse element.""" - cpy = copy.copy( self ) - cpy.parseAction = self.parseAction[:] - cpy.ignoreExprs = self.ignoreExprs[:] - if self.copyDefaultWhiteChars: - cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS - return cpy - - def setName( self, name ): - """Define name for this expression, for use in debugging.""" - self.name = name - self.errmsg = "Expected " + self.name - if hasattr(self,"exception"): - self.exception.msg = self.errmsg - return self - - def setResultsName( self, name, listAllMatches=False ): - """Define name for referencing matching tokens as a nested attribute - of the returned parse results. - NOTE: this returns a *copy* of the original C{ParserElement} object; - this is so that the client can define a basic element, such as an - integer, and reference it in multiple places with different names. - - You can also set results names using the abbreviated syntax, - C{expr("name")} in place of C{expr.setResultsName("name")} - - see L{I{__call__}<__call__>}. - """ - newself = self.copy() - if name.endswith("*"): - name = name[:-1] - listAllMatches=True - newself.resultsName = name - newself.modalResults = not listAllMatches - return newself - - def setBreak(self,breakFlag = True): - """Method to invoke the Python pdb debugger when this element is - about to be parsed. Set C{breakFlag} to True to enable, False to - disable. - """ - if breakFlag: - _parseMethod = self._parse - def breaker(instring, loc, doActions=True, callPreParse=True): - import pdb - pdb.set_trace() - return _parseMethod( instring, loc, doActions, callPreParse ) - breaker._originalParseMethod = _parseMethod - self._parse = breaker - else: - if hasattr(self._parse,"_originalParseMethod"): - self._parse = self._parse._originalParseMethod - return self - - def setParseAction( self, *fns, **kwargs ): - """Define action to perform when successfully matching parse element definition. - Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, - C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: - - s = the original string being parsed (see note below) - - loc = the location of the matching substring - - toks = a list of the matched tokens, packaged as a ParseResults object - If the functions in fns modify the tokens, they can return them as the return - value from fn, and the modified list of tokens will replace the original. - Otherwise, fn does not need to return any value. - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{parseString}} for more information - on parsing strings containing s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - self.parseAction = list(map(_trim_arity, list(fns))) - self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) - return self - - def addParseAction( self, *fns, **kwargs ): - """Add parse action to expression's list of parse actions. See L{I{setParseAction}}.""" - self.parseAction += list(map(_trim_arity, list(fns))) - self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) - return self - - def setFailAction( self, fn ): - """Define action to perform if parsing fails at this expression. - Fail acton fn is a callable function that takes the arguments - C{fn(s,loc,expr,err)} where: - - s = string being parsed - - loc = location where expression match was attempted and failed - - expr = the parse expression that failed - - err = the exception thrown - The function returns no value. It may throw C{ParseFatalException} - if it is desired to stop parsing immediately.""" - self.failAction = fn - return self - - def _skipIgnorables( self, instring, loc ): - exprsFound = True - while exprsFound: - exprsFound = False - for e in self.ignoreExprs: - try: - while 1: - loc,dummy = e._parse( instring, loc ) - exprsFound = True - except ParseException: - pass - return loc - - def preParse( self, instring, loc ): - if self.ignoreExprs: - loc = self._skipIgnorables( instring, loc ) - - if self.skipWhitespace: - wt = self.whiteChars - instrlen = len(instring) - while loc < instrlen and instring[loc] in wt: - loc += 1 - - return loc - - def parseImpl( self, instring, loc, doActions=True ): - return loc, [] - - def postParse( self, instring, loc, tokenlist ): - return tokenlist - - #~ @profile - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): - debugging = ( self.debug ) #and doActions ) - - if debugging or self.failAction: - #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) - if (self.debugActions[0] ): - self.debugActions[0]( instring, loc, self ) - if callPreParse and self.callPreparse: - preloc = self.preParse( instring, loc ) - else: - preloc = loc - tokensStart = preloc - try: - try: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - except IndexError: - raise ParseException( instring, len(instring), self.errmsg, self ) - except ParseBaseException: - #~ print ("Exception raised:", err) - err = None - if self.debugActions[2]: - err = sys.exc_info()[1] - self.debugActions[2]( instring, tokensStart, self, err ) - if self.failAction: - if err is None: - err = sys.exc_info()[1] - self.failAction( instring, tokensStart, self, err ) - raise - else: - if callPreParse and self.callPreparse: - preloc = self.preParse( instring, loc ) - else: - preloc = loc - tokensStart = preloc - if self.mayIndexError or loc >= len(instring): - try: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - except IndexError: - raise ParseException( instring, len(instring), self.errmsg, self ) - else: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - - tokens = self.postParse( instring, loc, tokens ) - - retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) - if self.parseAction and (doActions or self.callDuringTry): - if debugging: - try: - for fn in self.parseAction: - tokens = fn( instring, tokensStart, retTokens ) - if tokens is not None: - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), - modal=self.modalResults ) - except ParseBaseException: - #~ print "Exception raised in user parse action:", err - if (self.debugActions[2] ): - err = sys.exc_info()[1] - self.debugActions[2]( instring, tokensStart, self, err ) - raise - else: - for fn in self.parseAction: - tokens = fn( instring, tokensStart, retTokens ) - if tokens is not None: - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), - modal=self.modalResults ) - - if debugging: - #~ print ("Matched",self,"->",retTokens.asList()) - if (self.debugActions[1] ): - self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) - - return loc, retTokens - - def tryParse( self, instring, loc ): - try: - return self._parse( instring, loc, doActions=False )[0] - except ParseFatalException: - raise ParseException( instring, loc, self.errmsg, self) - - # this method gets repeatedly called during backtracking with the same arguments - - # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): - lookup = (self,instring,loc,callPreParse,doActions) - if lookup in ParserElement._exprArgCache: - value = ParserElement._exprArgCache[ lookup ] - if isinstance(value, Exception): - raise value - return (value[0],value[1].copy()) - else: - try: - value = self._parseNoCache( instring, loc, doActions, callPreParse ) - ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) - return value - except ParseBaseException: - pe = sys.exc_info()[1] - ParserElement._exprArgCache[ lookup ] = pe - raise - - _parse = _parseNoCache - - # argument cache for optimizing repeated calls when backtracking through recursive expressions - _exprArgCache = {} - def resetCache(): - ParserElement._exprArgCache.clear() - resetCache = staticmethod(resetCache) - - _packratEnabled = False - def enablePackrat(): - """Enables "packrat" parsing, which adds memoizing to the parsing logic. - Repeated parse attempts at the same string location (which happens - often in many complex grammars) can immediately return a cached value, - instead of re-executing parsing/validating code. Memoizing is done of - both valid results and parsing exceptions. - - This speedup may break existing programs that use parse actions that - have side-effects. For this reason, packrat parsing is disabled when - you first import pyparsing. To activate the packrat feature, your - program must call the class method C{ParserElement.enablePackrat()}. If - your program uses C{psyco} to "compile as you go", you must call - C{enablePackrat} before calling C{psyco.full()}. If you do not do this, - Python will crash. For best results, call C{enablePackrat()} immediately - after importing pyparsing. - """ - if not ParserElement._packratEnabled: - ParserElement._packratEnabled = True - ParserElement._parse = ParserElement._parseCache - enablePackrat = staticmethod(enablePackrat) - - def parseString( self, instring, parseAll=False ): - """Execute the parse expression with the given string. - This is the main interface to the client code, once the complete - expression has been built. - - If you want the grammar to require that the entire input string be - successfully parsed, then set C{parseAll} to True (equivalent to ending - the grammar with C{StringEnd()}). - - Note: C{parseString} implicitly calls C{expandtabs()} on the input string, - in order to report proper column numbers in parse actions. - If the input string contains tabs and - the grammar uses parse actions that use the C{loc} argument to index into the - string being parsed, you can ensure you have a consistent view of the input - string by: - - calling C{parseWithTabs} on your grammar before calling C{parseString} - (see L{I{parseWithTabs}}) - - define your parse action using the full C{(s,loc,toks)} signature, and - reference the input string using the parse action's C{s} argument - - explictly expand the tabs in your input string before calling - C{parseString} - """ - ParserElement.resetCache() - if not self.streamlined: - self.streamline() - #~ self.saveAsList = True - for e in self.ignoreExprs: - e.streamline() - if not self.keepTabs: - instring = instring.expandtabs() - try: - loc, tokens = self._parse( instring, 0 ) - if parseAll: - loc = self.preParse( instring, loc ) - se = Empty() + StringEnd() - se._parse( instring, loc ) - except ParseBaseException: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] - raise exc - else: - return tokens - - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): - """Scan the input string for expression matches. Each match will return the - matching tokens, start location, and end location. May be called with optional - C{maxMatches} argument, to clip scanning after 'n' matches are found. If - C{overlap} is specified, then overlapping matches will be reported. - - Note that the start and end locations are reported relative to the string - being parsed. See L{I{parseString}} for more information on parsing - strings with embedded tabs.""" - if not self.streamlined: - self.streamline() - for e in self.ignoreExprs: - e.streamline() - - if not self.keepTabs: - instring = _ustr(instring).expandtabs() - instrlen = len(instring) - loc = 0 - preparseFn = self.preParse - parseFn = self._parse - ParserElement.resetCache() - matches = 0 - try: - while loc <= instrlen and matches < maxMatches: - try: - preloc = preparseFn( instring, loc ) - nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) - except ParseException: - loc = preloc+1 - else: - if nextLoc > loc: - matches += 1 - yield tokens, preloc, nextLoc - if overlap: - nextloc = preparseFn( instring, loc ) - if nextloc > loc: - loc = nextLoc - else: - loc += 1 - else: - loc = nextLoc - else: - loc = preloc+1 - except ParseBaseException: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] - raise exc - - def transformString( self, instring ): - """Extension to C{scanString}, to modify matching text with modified tokens that may - be returned from a parse action. To use C{transformString}, define a grammar and - attach a parse action to it that modifies the returned token list. - Invoking C{transformString()} on a target string will then scan for matches, - and replace the matched text patterns according to the logic in the parse - action. C{transformString()} returns the resulting transformed string.""" - out = [] - lastE = 0 - # force preservation of s, to minimize unwanted transformation of string, and to - # keep string locs straight between transformString and scanString - self.keepTabs = True - try: - for t,s,e in self.scanString( instring ): - out.append( instring[lastE:s] ) - if t: - if isinstance(t,ParseResults): - out += t.asList() - elif isinstance(t,list): - out += t - else: - out.append(t) - lastE = e - out.append(instring[lastE:]) - out = [o for o in out if o] - return "".join(map(_ustr,_flatten(out))) - except ParseBaseException: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] - raise exc - - def searchString( self, instring, maxMatches=_MAX_INT ): - """Another extension to C{scanString}, simplifying the access to the tokens found - to match the given parse expression. May be called with optional - C{maxMatches} argument, to clip searching after 'n' matches are found. - """ - try: - return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) - except ParseBaseException: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] - raise exc - - def __add__(self, other ): - """Implementation of + operator - returns And""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return And( [ self, other ] ) - - def __radd__(self, other ): - """Implementation of + operator when left operand is not a C{ParserElement}""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other + self - - def __sub__(self, other): - """Implementation of - operator, returns C{And} with error stop""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return And( [ self, And._ErrorStop(), other ] ) - - def __rsub__(self, other ): - """Implementation of - operator when left operand is not a C{ParserElement}""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other - self - - def __mul__(self,other): - """Implementation of * operator, allows use of C{expr * 3} in place of - C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer - tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples - may also include C{None} as in: - - C{expr*(n,None)} or C{expr*(n,)} is equivalent - to C{expr*n + ZeroOrMore(expr)} - (read as "at least n instances of C{expr}") - - C{expr*(None,n)} is equivalent to C{expr*(0,n)} - (read as "0 to n instances of C{expr}") - - C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)} - - C{expr*(1,None)} is equivalent to C{OneOrMore(expr)} - - Note that C{expr*(None,n)} does not raise an exception if - more than n exprs exist in the input stream; that is, - C{expr*(None,n)} does not enforce a maximum number of expr - occurrences. If this behavior is desired, then write - C{expr*(None,n) + ~expr} - - """ - if isinstance(other,int): - minElements, optElements = other,0 - elif isinstance(other,tuple): - other = (other + (None, None))[:2] - if other[0] is None: - other = (0, other[1]) - if isinstance(other[0],int) and other[1] is None: - if other[0] == 0: - return ZeroOrMore(self) - if other[0] == 1: - return OneOrMore(self) - else: - return self*other[0] + ZeroOrMore(self) - elif isinstance(other[0],int) and isinstance(other[1],int): - minElements, optElements = other - optElements -= minElements - else: - raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) - else: - raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) - - if minElements < 0: - raise ValueError("cannot multiply ParserElement by negative value") - if optElements < 0: - raise ValueError("second tuple value must be greater or equal to first tuple value") - if minElements == optElements == 0: - raise ValueError("cannot multiply ParserElement by 0 or (0,0)") - - if (optElements): - def makeOptionalList(n): - if n>1: - return Optional(self + makeOptionalList(n-1)) - else: - return Optional(self) - if minElements: - if minElements == 1: - ret = self + makeOptionalList(optElements) - else: - ret = And([self]*minElements) + makeOptionalList(optElements) - else: - ret = makeOptionalList(optElements) - else: - if minElements == 1: - ret = self - else: - ret = And([self]*minElements) - return ret - - def __rmul__(self, other): - return self.__mul__(other) - - def __or__(self, other ): - """Implementation of | operator - returns C{MatchFirst}""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return MatchFirst( [ self, other ] ) - - def __ror__(self, other ): - """Implementation of | operator when left operand is not a C{ParserElement}""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other | self - - def __xor__(self, other ): - """Implementation of ^ operator - returns C{Or}""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return Or( [ self, other ] ) - - def __rxor__(self, other ): - """Implementation of ^ operator when left operand is not a C{ParserElement}""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other ^ self - - def __and__(self, other ): - """Implementation of & operator - returns C{Each}""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return Each( [ self, other ] ) - - def __rand__(self, other ): - """Implementation of & operator when left operand is not a C{ParserElement}""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other & self - - def __invert__( self ): - """Implementation of ~ operator - returns C{NotAny}""" - return NotAny( self ) - - def __call__(self, name): - """Shortcut for C{setResultsName}, with C{listAllMatches=default}:: - userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") - could be written as:: - userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") - - If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be - passed as C{True}. - """ - return self.setResultsName(name) - - def suppress( self ): - """Suppresses the output of this C{ParserElement}; useful to keep punctuation from - cluttering up returned output. - """ - return Suppress( self ) - - def leaveWhitespace( self ): - """Disables the skipping of whitespace before matching the characters in the - C{ParserElement}'s defined pattern. This is normally only used internally by - the pyparsing module, but may be needed in some whitespace-sensitive grammars. - """ - self.skipWhitespace = False - return self - - def setWhitespaceChars( self, chars ): - """Overrides the default whitespace chars - """ - self.skipWhitespace = True - self.whiteChars = chars - self.copyDefaultWhiteChars = False - return self - - def parseWithTabs( self ): - """Overrides default behavior to expand C{}s to spaces before parsing the input string. - Must be called before C{parseString} when the input grammar contains elements that - match C{} characters.""" - self.keepTabs = True - return self - - def ignore( self, other ): - """Define expression to be ignored (e.g., comments) while doing pattern - matching; may be called repeatedly, to define multiple comment or other - ignorable patterns. - """ - if isinstance( other, Suppress ): - if other not in self.ignoreExprs: - self.ignoreExprs.append( other.copy() ) - else: - self.ignoreExprs.append( Suppress( other.copy() ) ) - return self - - def setDebugActions( self, startAction, successAction, exceptionAction ): - """Enable display of debugging messages while doing pattern matching.""" - self.debugActions = (startAction or _defaultStartDebugAction, - successAction or _defaultSuccessDebugAction, - exceptionAction or _defaultExceptionDebugAction) - self.debug = True - return self - - def setDebug( self, flag=True ): - """Enable display of debugging messages while doing pattern matching. - Set C{flag} to True to enable, False to disable.""" - if flag: - self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) - else: - self.debug = False - return self - - def __str__( self ): - return self.name - - def __repr__( self ): - return _ustr(self) - - def streamline( self ): - self.streamlined = True - self.strRepr = None - return self - - def checkRecursion( self, parseElementList ): - pass - - def validate( self, validateTrace=[] ): - """Check defined expressions for valid structure, check for infinite recursive definitions.""" - self.checkRecursion( [] ) - - def parseFile( self, file_or_filename, parseAll=False ): - """Execute the parse expression on the given file or filename. - If a filename is specified (instead of a file object), - the entire file is opened, read, and closed before parsing. - """ - try: - file_contents = file_or_filename.read() - except AttributeError: - f = open(file_or_filename, "rb") - file_contents = f.read() - f.close() - try: - return self.parseString(file_contents, parseAll) - except ParseBaseException: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] - raise exc - - def getException(self): - return ParseException("",0,self.errmsg,self) - - def __getattr__(self,aname): - if aname == "myException": - self.myException = ret = self.getException(); - return ret; - else: - raise AttributeError("no such attribute " + aname) - - def __eq__(self,other): - if isinstance(other, ParserElement): - return self is other or self.__dict__ == other.__dict__ - elif isinstance(other, basestring): - try: - self.parseString(_ustr(other), parseAll=True) - return True - except ParseBaseException: - return False - else: - return super(ParserElement,self)==other - - def __ne__(self,other): - return not (self == other) - - def __hash__(self): - return hash(id(self)) - - def __req__(self,other): - return self == other - - def __rne__(self,other): - return not (self == other) - - -class Token(ParserElement): - """Abstract C{ParserElement} subclass, for defining atomic matching patterns.""" - def __init__( self ): - super(Token,self).__init__( savelist=False ) - - def setName(self, name): - s = super(Token,self).setName(name) - self.errmsg = "Expected " + self.name - return s - - -class Empty(Token): - """An empty token, will always match.""" - def __init__( self ): - super(Empty,self).__init__() - self.name = "Empty" - self.mayReturnEmpty = True - self.mayIndexError = False - - -class NoMatch(Token): - """A token that will never match.""" - def __init__( self ): - super(NoMatch,self).__init__() - self.name = "NoMatch" - self.mayReturnEmpty = True - self.mayIndexError = False - self.errmsg = "Unmatchable token" - - def parseImpl( self, instring, loc, doActions=True ): - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - -class Literal(Token): - """Token to exactly match a specified string.""" - def __init__( self, matchString ): - super(Literal,self).__init__() - self.match = matchString - self.matchLen = len(matchString) - try: - self.firstMatchChar = matchString[0] - except IndexError: - warnings.warn("null string passed to Literal; use Empty() instead", - SyntaxWarning, stacklevel=2) - self.__class__ = Empty - self.name = '"%s"' % _ustr(self.match) - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = False - self.mayIndexError = False - - # Performance tuning: this routine gets called a *lot* - # if this is a single character match string and the first character matches, - # short-circuit as quickly as possible, and avoid calling startswith - #~ @profile - def parseImpl( self, instring, loc, doActions=True ): - if (instring[loc] == self.firstMatchChar and - (self.matchLen==1 or instring.startswith(self.match,loc)) ): - return loc+self.matchLen, self.match - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc -_L = Literal - -class Keyword(Token): - """Token to exactly match a specified string as a keyword, that is, it must be - immediately followed by a non-keyword character. Compare with C{Literal}:: - Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. - Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} - Accepts two optional constructor arguments in addition to the keyword string: - C{identChars} is a string of characters that would be valid identifier characters, - defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive - matching, default is C{False}. - """ - DEFAULT_KEYWORD_CHARS = alphanums+"_$" - - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ): - super(Keyword,self).__init__() - self.match = matchString - self.matchLen = len(matchString) - try: - self.firstMatchChar = matchString[0] - except IndexError: - warnings.warn("null string passed to Keyword; use Empty() instead", - SyntaxWarning, stacklevel=2) - self.name = '"%s"' % self.match - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = False - self.mayIndexError = False - self.caseless = caseless - if caseless: - self.caselessmatch = matchString.upper() - identChars = identChars.upper() - self.identChars = set(identChars) - - def parseImpl( self, instring, loc, doActions=True ): - if self.caseless: - if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and - (loc == 0 or instring[loc-1].upper() not in self.identChars) ): - return loc+self.matchLen, self.match - else: - if (instring[loc] == self.firstMatchChar and - (self.matchLen==1 or instring.startswith(self.match,loc)) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and - (loc == 0 or instring[loc-1] not in self.identChars) ): - return loc+self.matchLen, self.match - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - def copy(self): - c = super(Keyword,self).copy() - c.identChars = Keyword.DEFAULT_KEYWORD_CHARS - return c - - def setDefaultKeywordChars( chars ): - """Overrides the default Keyword chars - """ - Keyword.DEFAULT_KEYWORD_CHARS = chars - setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) - -class CaselessLiteral(Literal): - """Token to match a specified string, ignoring case of letters. - Note: the matched results will always be in the case of the given - match string, NOT the case of the input text. - """ - def __init__( self, matchString ): - super(CaselessLiteral,self).__init__( matchString.upper() ) - # Preserve the defining literal. - self.returnString = matchString - self.name = "'%s'" % self.returnString - self.errmsg = "Expected " + self.name - - def parseImpl( self, instring, loc, doActions=True ): - if instring[ loc:loc+self.matchLen ].upper() == self.match: - return loc+self.matchLen, self.returnString - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - -class CaselessKeyword(Keyword): - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): - super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) - - def parseImpl( self, instring, loc, doActions=True ): - if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): - return loc+self.matchLen, self.match - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - -class Word(Token): - """Token for matching words composed of allowed character sets. - Defined with string containing all allowed initial characters, - an optional string containing allowed body characters (if omitted, - defaults to the initial character set), and an optional minimum, - maximum, and/or exact length. The default value for C{min} is 1 (a - minimum value < 1 is not valid); the default values for C{max} and C{exact} - are 0, meaning no maximum or exact length restriction. An optional - C{exclude} parameter can list characters that might be found in - the input C{bodyChars} string; useful to define a word of all printables - except for one or two characters, for instance. - """ - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): - super(Word,self).__init__() - if excludeChars: - initChars = ''.join([c for c in initChars if c not in excludeChars]) - if bodyChars: - bodyChars = ''.join([c for c in bodyChars if c not in excludeChars]) - self.initCharsOrig = initChars - self.initChars = set(initChars) - if bodyChars : - self.bodyCharsOrig = bodyChars - self.bodyChars = set(bodyChars) - else: - self.bodyCharsOrig = initChars - self.bodyChars = set(initChars) - - self.maxSpecified = max > 0 - - if min < 1: - raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayIndexError = False - self.asKeyword = asKeyword - - if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): - if self.bodyCharsOrig == self.initCharsOrig: - self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) - elif len(self.bodyCharsOrig) == 1: - self.reString = "%s[%s]*" % \ - (re.escape(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig),) - else: - self.reString = "[%s][%s]*" % \ - (_escapeRegexRangeChars(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig),) - if self.asKeyword: - self.reString = r"\b"+self.reString+r"\b" - try: - self.re = re.compile( self.reString ) - except: - self.re = None - - def parseImpl( self, instring, loc, doActions=True ): - if self.re: - result = self.re.match(instring,loc) - if not result: - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - loc = result.end() - return loc, result.group() - - if not(instring[ loc ] in self.initChars): - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - start = loc - loc += 1 - instrlen = len(instring) - bodychars = self.bodyChars - maxloc = start + self.maxLen - maxloc = min( maxloc, instrlen ) - while loc < maxloc and instring[loc] in bodychars: - loc += 1 - - throwException = False - if loc - start < self.minLen: - throwException = True - if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: - throwException = True - if self.asKeyword: - if (start>0 and instring[start-1] in bodychars) or (loc4: - return s[:4]+"..." - else: - return s - - if ( self.initCharsOrig != self.bodyCharsOrig ): - self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) - else: - self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) - - return self.strRepr - - -class Regex(Token): - """Token for matching strings that match a given regular expression. - Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. - """ - compiledREtype = type(re.compile("[A-Z]")) - def __init__( self, pattern, flags=0): - """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" - super(Regex,self).__init__() - - if isinstance(pattern, basestring): - if len(pattern) == 0: - warnings.warn("null string passed to Regex; use Empty() instead", - SyntaxWarning, stacklevel=2) - - self.pattern = pattern - self.flags = flags - - try: - self.re = re.compile(self.pattern, self.flags) - self.reString = self.pattern - except sre_constants.error: - warnings.warn("invalid pattern (%s) passed to Regex" % pattern, - SyntaxWarning, stacklevel=2) - raise - - elif isinstance(pattern, Regex.compiledREtype): - self.re = pattern - self.pattern = \ - self.reString = str(pattern) - self.flags = flags - - else: - raise ValueError("Regex may only be constructed with a string or a compiled RE object") - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayIndexError = False - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - result = self.re.match(instring,loc) - if not result: - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - loc = result.end() - d = result.groupdict() - ret = ParseResults(result.group()) - if d: - for k in d: - ret[k] = d[k] - return loc,ret - - def __str__( self ): - try: - return super(Regex,self).__str__() - except: - pass - - if self.strRepr is None: - self.strRepr = "Re:(%s)" % repr(self.pattern) - - return self.strRepr - - -class QuotedString(Token): - """Token for matching strings that are delimited by quoting characters. - """ - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): - """ - Defined with the following parameters: - - quoteChar - string of one or more characters defining the quote delimiting string - - escChar - character to escape quotes, typically backslash (default=None) - - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) - - multiline - boolean indicating whether quotes can span multiple lines (default=False) - - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) - - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) - """ - super(QuotedString,self).__init__() - - # remove white space from quote chars - wont work anyway - quoteChar = quoteChar.strip() - if len(quoteChar) == 0: - warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) - raise SyntaxError() - - if endQuoteChar is None: - endQuoteChar = quoteChar - else: - endQuoteChar = endQuoteChar.strip() - if len(endQuoteChar) == 0: - warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) - raise SyntaxError() - - self.quoteChar = quoteChar - self.quoteCharLen = len(quoteChar) - self.firstQuoteChar = quoteChar[0] - self.endQuoteChar = endQuoteChar - self.endQuoteCharLen = len(endQuoteChar) - self.escChar = escChar - self.escQuote = escQuote - self.unquoteResults = unquoteResults - - if multiline: - self.flags = re.MULTILINE | re.DOTALL - self.pattern = r'%s(?:[^%s%s]' % \ - ( re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) - else: - self.flags = 0 - self.pattern = r'%s(?:[^%s\n\r%s]' % \ - ( re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) - if len(self.endQuoteChar) > 1: - self.pattern += ( - '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), - _escapeRegexRangeChars(self.endQuoteChar[i])) - for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' - ) - if escQuote: - self.pattern += (r'|(?:%s)' % re.escape(escQuote)) - if escChar: - self.pattern += (r'|(?:%s.)' % re.escape(escChar)) - charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-') - self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset) - self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) - - try: - self.re = re.compile(self.pattern, self.flags) - self.reString = self.pattern - except sre_constants.error: - warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, - SyntaxWarning, stacklevel=2) - raise - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayIndexError = False - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None - if not result: - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - loc = result.end() - ret = result.group() - - if self.unquoteResults: - - # strip off quotes - ret = ret[self.quoteCharLen:-self.endQuoteCharLen] - - if isinstance(ret,basestring): - # replace escaped characters - if self.escChar: - ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) - - # replace escaped quotes - if self.escQuote: - ret = ret.replace(self.escQuote, self.endQuoteChar) - - return loc, ret - - def __str__( self ): - try: - return super(QuotedString,self).__str__() - except: - pass - - if self.strRepr is None: - self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) - - return self.strRepr - - -class CharsNotIn(Token): - """Token for matching words composed of characters *not* in a given set. - Defined with string containing all disallowed characters, and an optional - minimum, maximum, and/or exact length. The default value for C{min} is 1 (a - minimum value < 1 is not valid); the default values for C{max} and C{exact} - are 0, meaning no maximum or exact length restriction. - """ - def __init__( self, notChars, min=1, max=0, exact=0 ): - super(CharsNotIn,self).__init__() - self.skipWhitespace = False - self.notChars = notChars - - if min < 1: - raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = ( self.minLen == 0 ) - self.mayIndexError = False - - def parseImpl( self, instring, loc, doActions=True ): - if instring[loc] in self.notChars: - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - start = loc - loc += 1 - notchars = self.notChars - maxlen = min( start+self.maxLen, len(instring) ) - while loc < maxlen and \ - (instring[loc] not in notchars): - loc += 1 - - if loc - start < self.minLen: - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - return loc, instring[start:loc] - - def __str__( self ): - try: - return super(CharsNotIn, self).__str__() - except: - pass - - if self.strRepr is None: - if len(self.notChars) > 4: - self.strRepr = "!W:(%s...)" % self.notChars[:4] - else: - self.strRepr = "!W:(%s)" % self.notChars - - return self.strRepr - -class White(Token): - """Special matching class for matching whitespace. Normally, whitespace is ignored - by pyparsing grammars. This class is included when some whitespace structures - are significant. Define with a string containing the whitespace characters to be - matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, - as defined for the C{Word} class.""" - whiteStrs = { - " " : "", - "\t": "", - "\n": "", - "\r": "", - "\f": "", - } - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): - super(White,self).__init__() - self.matchWhite = ws - self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) - #~ self.leaveWhitespace() - self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) - self.mayReturnEmpty = True - self.errmsg = "Expected " + self.name - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - def parseImpl( self, instring, loc, doActions=True ): - if not(instring[ loc ] in self.matchWhite): - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - start = loc - loc += 1 - maxloc = start + self.maxLen - maxloc = min( maxloc, len(instring) ) - while loc < maxloc and instring[loc] in self.matchWhite: - loc += 1 - - if loc - start < self.minLen: - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - return loc, instring[start:loc] - - -class _PositionToken(Token): - def __init__( self ): - super(_PositionToken,self).__init__() - self.name=self.__class__.__name__ - self.mayReturnEmpty = True - self.mayIndexError = False - -class GoToColumn(_PositionToken): - """Token to advance to a specific column of input text; useful for tabular report scraping.""" - def __init__( self, colno ): - super(GoToColumn,self).__init__() - self.col = colno - - def preParse( self, instring, loc ): - if col(loc,instring) != self.col: - instrlen = len(instring) - if self.ignoreExprs: - loc = self._skipIgnorables( instring, loc ) - while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : - loc += 1 - return loc - - def parseImpl( self, instring, loc, doActions=True ): - thiscol = col( loc, instring ) - if thiscol > self.col: - raise ParseException( instring, loc, "Text not in expected column", self ) - newloc = loc + self.col - thiscol - ret = instring[ loc: newloc ] - return newloc, ret - -class LineStart(_PositionToken): - """Matches if current position is at the beginning of a line within the parse string""" - def __init__( self ): - super(LineStart,self).__init__() - self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) - self.errmsg = "Expected start of line" - - def preParse( self, instring, loc ): - preloc = super(LineStart,self).preParse(instring,loc) - if instring[preloc] == "\n": - loc += 1 - return loc - - def parseImpl( self, instring, loc, doActions=True ): - if not( loc==0 or - (loc == self.preParse( instring, 0 )) or - (instring[loc-1] == "\n") ): #col(loc, instring) != 1: - #~ raise ParseException( instring, loc, "Expected start of line" ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - return loc, [] - -class LineEnd(_PositionToken): - """Matches if current position is at the end of a line within the parse string""" - def __init__( self ): - super(LineEnd,self).__init__() - self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) - self.errmsg = "Expected end of line" - - def parseImpl( self, instring, loc, doActions=True ): - if loc len(instring): - return loc, [] - else: - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - -class WordStart(_PositionToken): - """Matches if the current position is at the beginning of a Word, and - is not preceded by any character in a given set of C{wordChars} - (default=C{printables}). To emulate the C{\b} behavior of regular expressions, - use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of - the string being parsed, or at the beginning of a line. - """ - def __init__(self, wordChars = printables): - super(WordStart,self).__init__() - self.wordChars = set(wordChars) - self.errmsg = "Not at the start of a word" - - def parseImpl(self, instring, loc, doActions=True ): - if loc != 0: - if (instring[loc-1] in self.wordChars or - instring[loc] not in self.wordChars): - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - return loc, [] - -class WordEnd(_PositionToken): - """Matches if the current position is at the end of a Word, and - is not followed by any character in a given set of C{wordChars} - (default=C{printables}). To emulate the C{\b} behavior of regular expressions, - use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of - the string being parsed, or at the end of a line. - """ - def __init__(self, wordChars = printables): - super(WordEnd,self).__init__() - self.wordChars = set(wordChars) - self.skipWhitespace = False - self.errmsg = "Not at the end of a word" - - def parseImpl(self, instring, loc, doActions=True ): - instrlen = len(instring) - if instrlen>0 and loc maxExcLoc: - maxException = err - maxExcLoc = err.loc - except IndexError: - if len(instring) > maxExcLoc: - maxException = ParseException(instring,len(instring),e.errmsg,self) - maxExcLoc = len(instring) - else: - if loc2 > maxMatchLoc: - maxMatchLoc = loc2 - maxMatchExp = e - - if maxMatchLoc < 0: - if maxException is not None: - raise maxException - else: - raise ParseException(instring, loc, "no defined alternatives to match", self) - - return maxMatchExp._parse( instring, loc, doActions ) - - def __ixor__(self, other ): - if isinstance( other, basestring ): - other = Literal( other ) - return self.append( other ) #Or( [ self, other ] ) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class MatchFirst(ParseExpression): - """Requires that at least one C{ParseExpression} is found. - If two expressions match, the first one listed is the one that will match. - May be constructed using the C{'|'} operator. - """ - def __init__( self, exprs, savelist = False ): - super(MatchFirst,self).__init__(exprs, savelist) - if exprs: - self.mayReturnEmpty = False - for e in self.exprs: - if e.mayReturnEmpty: - self.mayReturnEmpty = True - break - else: - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - maxExcLoc = -1 - maxException = None - for e in self.exprs: - try: - ret = e._parse( instring, loc, doActions ) - return ret - except ParseException, err: - if err.loc > maxExcLoc: - maxException = err - maxExcLoc = err.loc - except IndexError: - if len(instring) > maxExcLoc: - maxException = ParseException(instring,len(instring),e.errmsg,self) - maxExcLoc = len(instring) - - # only got here if no expression matched, raise exception for match that made it the furthest - else: - if maxException is not None: - raise maxException - else: - raise ParseException(instring, loc, "no defined alternatives to match", self) - - def __ior__(self, other ): - if isinstance( other, basestring ): - other = Literal( other ) - return self.append( other ) #MatchFirst( [ self, other ] ) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class Each(ParseExpression): - """Requires all given C{ParseExpression}s to be found, but in any order. - Expressions may be separated by whitespace. - May be constructed using the C{'&'} operator. - """ - def __init__( self, exprs, savelist = True ): - super(Each,self).__init__(exprs, savelist) - self.mayReturnEmpty = True - for e in self.exprs: - if not e.mayReturnEmpty: - self.mayReturnEmpty = False - break - self.skipWhitespace = True - self.initExprGroups = True - - def parseImpl( self, instring, loc, doActions=True ): - if self.initExprGroups: - opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] - opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] - self.optionals = opt1 + opt2 - self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] - self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] - self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] - self.required += self.multirequired - self.initExprGroups = False - tmpLoc = loc - tmpReqd = self.required[:] - tmpOpt = self.optionals[:] - matchOrder = [] - - keepMatching = True - while keepMatching: - tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired - failed = [] - for e in tmpExprs: - try: - tmpLoc = e.tryParse( instring, tmpLoc ) - except ParseException: - failed.append(e) - else: - matchOrder.append(e) - if e in tmpReqd: - tmpReqd.remove(e) - elif e in tmpOpt: - tmpOpt.remove(e) - if len(failed) == len(tmpExprs): - keepMatching = False - - if tmpReqd: - missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) - raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) - - # add any unmatched Optionals, in case they have default values defined - matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] - - resultlist = [] - for e in matchOrder: - loc,results = e._parse(instring,loc,doActions) - resultlist.append(results) - - finalResults = ParseResults([]) - for r in resultlist: - dups = {} - for k in r.keys(): - if k in finalResults.keys(): - tmp = ParseResults(finalResults[k]) - tmp += ParseResults(r[k]) - dups[k] = tmp - finalResults += ParseResults(r) - for k,v in dups.items(): - finalResults[k] = v - return loc, finalResults - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class ParseElementEnhance(ParserElement): - """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.""" - def __init__( self, expr, savelist=False ): - super(ParseElementEnhance,self).__init__(savelist) - if isinstance( expr, basestring ): - expr = Literal(expr) - self.expr = expr - self.strRepr = None - if expr is not None: - self.mayIndexError = expr.mayIndexError - self.mayReturnEmpty = expr.mayReturnEmpty - self.setWhitespaceChars( expr.whiteChars ) - self.skipWhitespace = expr.skipWhitespace - self.saveAsList = expr.saveAsList - self.callPreparse = expr.callPreparse - self.ignoreExprs.extend(expr.ignoreExprs) - - def parseImpl( self, instring, loc, doActions=True ): - if self.expr is not None: - return self.expr._parse( instring, loc, doActions, callPreParse=False ) - else: - raise ParseException("",loc,self.errmsg,self) - - def leaveWhitespace( self ): - self.skipWhitespace = False - self.expr = self.expr.copy() - if self.expr is not None: - self.expr.leaveWhitespace() - return self - - def ignore( self, other ): - if isinstance( other, Suppress ): - if other not in self.ignoreExprs: - super( ParseElementEnhance, self).ignore( other ) - if self.expr is not None: - self.expr.ignore( self.ignoreExprs[-1] ) - else: - super( ParseElementEnhance, self).ignore( other ) - if self.expr is not None: - self.expr.ignore( self.ignoreExprs[-1] ) - return self - - def streamline( self ): - super(ParseElementEnhance,self).streamline() - if self.expr is not None: - self.expr.streamline() - return self - - def checkRecursion( self, parseElementList ): - if self in parseElementList: - raise RecursiveGrammarException( parseElementList+[self] ) - subRecCheckList = parseElementList[:] + [ self ] - if self.expr is not None: - self.expr.checkRecursion( subRecCheckList ) - - def validate( self, validateTrace=[] ): - tmp = validateTrace[:]+[self] - if self.expr is not None: - self.expr.validate(tmp) - self.checkRecursion( [] ) - - def __str__( self ): - try: - return super(ParseElementEnhance,self).__str__() - except: - pass - - if self.strRepr is None and self.expr is not None: - self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) - return self.strRepr - - -class FollowedBy(ParseElementEnhance): - """Lookahead matching of the given parse expression. C{FollowedBy} - does *not* advance the parsing position within the input string, it only - verifies that the specified parse expression matches at the current - position. C{FollowedBy} always returns a null token list.""" - def __init__( self, expr ): - super(FollowedBy,self).__init__(expr) - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - self.expr.tryParse( instring, loc ) - return loc, [] - - -class NotAny(ParseElementEnhance): - """Lookahead to disallow matching with the given parse expression. C{NotAny} - does *not* advance the parsing position within the input string, it only - verifies that the specified parse expression does *not* match at the current - position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} - always returns a null token list. May be constructed using the '~' operator.""" - def __init__( self, expr ): - super(NotAny,self).__init__(expr) - #~ self.leaveWhitespace() - self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs - self.mayReturnEmpty = True - self.errmsg = "Found unwanted token, "+_ustr(self.expr) - - def parseImpl( self, instring, loc, doActions=True ): - try: - self.expr.tryParse( instring, loc ) - except (ParseException,IndexError): - pass - else: - #~ raise ParseException(instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - return loc, [] - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "~{" + _ustr(self.expr) + "}" - - return self.strRepr - - -class ZeroOrMore(ParseElementEnhance): - """Optional repetition of zero or more of the given expression.""" - def __init__( self, expr ): - super(ZeroOrMore,self).__init__(expr) - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - tokens = [] - try: - loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) - hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) - while 1: - if hasIgnoreExprs: - preloc = self._skipIgnorables( instring, loc ) - else: - preloc = loc - loc, tmptokens = self.expr._parse( instring, preloc, doActions ) - if tmptokens or tmptokens.keys(): - tokens += tmptokens - except (ParseException,IndexError): - pass - - return loc, tokens - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "[" + _ustr(self.expr) + "]..." - - return self.strRepr - - def setResultsName( self, name, listAllMatches=False ): - ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) - ret.saveAsList = True - return ret - - -class OneOrMore(ParseElementEnhance): - """Repetition of one or more of the given expression.""" - def parseImpl( self, instring, loc, doActions=True ): - # must be at least one - loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) - try: - hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) - while 1: - if hasIgnoreExprs: - preloc = self._skipIgnorables( instring, loc ) - else: - preloc = loc - loc, tmptokens = self.expr._parse( instring, preloc, doActions ) - if tmptokens or tmptokens.keys(): - tokens += tmptokens - except (ParseException,IndexError): - pass - - return loc, tokens - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + _ustr(self.expr) + "}..." - - return self.strRepr - - def setResultsName( self, name, listAllMatches=False ): - ret = super(OneOrMore,self).setResultsName(name,listAllMatches) - ret.saveAsList = True - return ret - -class _NullToken(object): - def __bool__(self): - return False - __nonzero__ = __bool__ - def __str__(self): - return "" - -_optionalNotMatched = _NullToken() -class Optional(ParseElementEnhance): - """Optional matching of the given expression. - A default return string can also be specified, if the optional expression - is not found. - """ - def __init__( self, exprs, default=_optionalNotMatched ): - super(Optional,self).__init__( exprs, savelist=False ) - self.defaultValue = default - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - try: - loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) - except (ParseException,IndexError): - if self.defaultValue is not _optionalNotMatched: - if self.expr.resultsName: - tokens = ParseResults([ self.defaultValue ]) - tokens[self.expr.resultsName] = self.defaultValue - else: - tokens = [ self.defaultValue ] - else: - tokens = [] - return loc, tokens - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "[" + _ustr(self.expr) + "]" - - return self.strRepr - - -class SkipTo(ParseElementEnhance): - """Token for skipping over all undefined text until the matched expression is found. - If C{include} is set to true, the matched expression is also parsed (the skipped text - and matched expression are returned as a 2-element list). The C{ignore} - argument is used to define grammars (typically quoted strings and comments) that - might contain false matches. - """ - def __init__( self, other, include=False, ignore=None, failOn=None ): - super( SkipTo, self ).__init__( other ) - self.ignoreExpr = ignore - self.mayReturnEmpty = True - self.mayIndexError = False - self.includeMatch = include - self.asList = False - if failOn is not None and isinstance(failOn, basestring): - self.failOn = Literal(failOn) - else: - self.failOn = failOn - self.errmsg = "No match found for "+_ustr(self.expr) - - def parseImpl( self, instring, loc, doActions=True ): - startLoc = loc - instrlen = len(instring) - expr = self.expr - failParse = False - while loc <= instrlen: - try: - if self.failOn: - try: - self.failOn.tryParse(instring, loc) - except ParseBaseException: - pass - else: - failParse = True - raise ParseException(instring, loc, "Found expression " + str(self.failOn)) - failParse = False - if self.ignoreExpr is not None: - while 1: - try: - loc = self.ignoreExpr.tryParse(instring,loc) - # print "found ignoreExpr, advance to", loc - except ParseBaseException: - break - expr._parse( instring, loc, doActions=False, callPreParse=False ) - skipText = instring[startLoc:loc] - if self.includeMatch: - loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) - if mat: - skipRes = ParseResults( skipText ) - skipRes += mat - return loc, [ skipRes ] - else: - return loc, [ skipText ] - else: - return loc, [ skipText ] - except (ParseException,IndexError): - if failParse: - raise - else: - loc += 1 - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - -class Forward(ParseElementEnhance): - """Forward declaration of an expression to be defined later - - used for recursive grammars, such as algebraic infix notation. - When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. - - Note: take care when assigning to C{Forward} not to overlook precedence of operators. - Specifically, '|' has a lower precedence than '<<', so that:: - fwdExpr << a | b | c - will actually be evaluated as:: - (fwdExpr << a) | b | c - thereby leaving b and c out as parseable alternatives. It is recommended that you - explicitly group the values inserted into the C{Forward}:: - fwdExpr << (a | b | c) - """ - def __init__( self, other=None ): - super(Forward,self).__init__( other, savelist=False ) - - def __lshift__( self, other ): - if isinstance( other, basestring ): - other = Literal(other) - self.expr = other - self.mayReturnEmpty = other.mayReturnEmpty - self.strRepr = None - self.mayIndexError = self.expr.mayIndexError - self.mayReturnEmpty = self.expr.mayReturnEmpty - self.setWhitespaceChars( self.expr.whiteChars ) - self.skipWhitespace = self.expr.skipWhitespace - self.saveAsList = self.expr.saveAsList - self.ignoreExprs.extend(self.expr.ignoreExprs) - return None - - def leaveWhitespace( self ): - self.skipWhitespace = False - return self - - def streamline( self ): - if not self.streamlined: - self.streamlined = True - if self.expr is not None: - self.expr.streamline() - return self - - def validate( self, validateTrace=[] ): - if self not in validateTrace: - tmp = validateTrace[:]+[self] - if self.expr is not None: - self.expr.validate(tmp) - self.checkRecursion([]) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - self._revertClass = self.__class__ - self.__class__ = _ForwardNoRecurse - try: - if self.expr is not None: - retString = _ustr(self.expr) - else: - retString = "None" - finally: - self.__class__ = self._revertClass - return self.__class__.__name__ + ": " + retString - - def copy(self): - if self.expr is not None: - return super(Forward,self).copy() - else: - ret = Forward() - ret << self - return ret - -class _ForwardNoRecurse(Forward): - def __str__( self ): - return "..." - -class TokenConverter(ParseElementEnhance): - """Abstract subclass of C{ParseExpression}, for converting parsed results.""" - def __init__( self, expr, savelist=False ): - super(TokenConverter,self).__init__( expr )#, savelist ) - self.saveAsList = False - -class Upcase(TokenConverter): - """Converter to upper case all matching tokens.""" - def __init__(self, *args): - super(Upcase,self).__init__(*args) - warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", - DeprecationWarning,stacklevel=2) - - def postParse( self, instring, loc, tokenlist ): - return list(map( string.upper, tokenlist )) - - -class Combine(TokenConverter): - """Converter to concatenate all matching tokens to a single string. - By default, the matching patterns must also be contiguous in the input string; - this can be disabled by specifying C{'adjacent=False'} in the constructor. - """ - def __init__( self, expr, joinString="", adjacent=True ): - super(Combine,self).__init__( expr ) - # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself - if adjacent: - self.leaveWhitespace() - self.adjacent = adjacent - self.skipWhitespace = True - self.joinString = joinString - self.callPreparse = True - - def ignore( self, other ): - if self.adjacent: - ParserElement.ignore(self, other) - else: - super( Combine, self).ignore( other ) - return self - - def postParse( self, instring, loc, tokenlist ): - retToks = tokenlist.copy() - del retToks[:] - retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) - - if self.resultsName and len(retToks.keys())>0: - return [ retToks ] - else: - return retToks - -class Group(TokenConverter): - """Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions.""" - def __init__( self, expr ): - super(Group,self).__init__( expr ) - self.saveAsList = True - - def postParse( self, instring, loc, tokenlist ): - return [ tokenlist ] - -class Dict(TokenConverter): - """Converter to return a repetitive expression as a list, but also as a dictionary. - Each element can also be referenced using the first token in the expression as its key. - Useful for tabular report scraping when the first column can be used as a item key. - """ - def __init__( self, exprs ): - super(Dict,self).__init__( exprs ) - self.saveAsList = True - - def postParse( self, instring, loc, tokenlist ): - for i,tok in enumerate(tokenlist): - if len(tok) == 0: - continue - ikey = tok[0] - if isinstance(ikey,int): - ikey = _ustr(tok[0]).strip() - if len(tok)==1: - tokenlist[ikey] = _ParseResultsWithOffset("",i) - elif len(tok)==2 and not isinstance(tok[1],ParseResults): - tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) - else: - dictvalue = tok.copy() #ParseResults(i) - del dictvalue[0] - if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) - else: - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) - - if self.resultsName: - return [ tokenlist ] - else: - return tokenlist - - -class Suppress(TokenConverter): - """Converter for ignoring the results of a parsed expression.""" - def postParse( self, instring, loc, tokenlist ): - return [] - - def suppress( self ): - return self - - -class OnlyOnce(object): - """Wrapper for parse actions, to ensure they are only called once.""" - def __init__(self, methodCall): - self.callable = _trim_arity(methodCall) - self.called = False - def __call__(self,s,l,t): - if not self.called: - results = self.callable(s,l,t) - self.called = True - return results - raise ParseException(s,l,"") - def reset(self): - self.called = False - -def traceParseAction(f): - """Decorator for debugging parse actions.""" - f = _trim_arity(f) - def z(*paArgs): - thisFunc = f.func_name - s,l,t = paArgs[-3:] - if len(paArgs)>3: - thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc - sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) - try: - ret = f(*paArgs) - except Exception: - exc = sys.exc_info()[1] - sys.stderr.write( "<", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) - try: - if len(symbols)==len("".join(symbols)): - return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) - else: - return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) - except: - warnings.warn("Exception creating Regex for oneOf, building MatchFirst", - SyntaxWarning, stacklevel=2) - - - # last resort, just use MatchFirst - return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) - -def dictOf( key, value ): - """Helper to easily and clearly define a dictionary by specifying the respective patterns - for the key and value. Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens - in the proper order. The key pattern can include delimiting markers or punctuation, - as long as they are suppressed, thereby leaving the significant key text. The value - pattern can include named results, so that the C{Dict} results can include named token - fields. - """ - return Dict( ZeroOrMore( Group ( key + value ) ) ) - -def originalTextFor(expr, asString=True): - """Helper to return the original, untokenized text for a given expression. Useful to - restore the parsed fields of an HTML start tag into the raw tag text itself, or to - revert separate tokens with intervening whitespace back to the original matching - input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not - require the inspect module to chase up the call stack. By default, returns a - string containing the original parsed text. - - If the optional C{asString} argument is passed as C{False}, then the return value is a - C{ParseResults} containing any results names that were originally matched, and a - single token containing the original matched text from the input string. So if - the expression passed to C{L{originalTextFor}} contains expressions with defined - results names, you must set C{asString} to C{False} if you want to preserve those - results name values.""" - locMarker = Empty().setParseAction(lambda s,loc,t: loc) - endlocMarker = locMarker.copy() - endlocMarker.callPreparse = False - matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") - if asString: - extractText = lambda s,l,t: s[t._original_start:t._original_end] - else: - def extractText(s,l,t): - del t[:] - t.insert(0, s[t._original_start:t._original_end]) - del t["_original_start"] - del t["_original_end"] - matchExpr.setParseAction(extractText) - return matchExpr - -def ungroup(expr): - """Helper to undo pyparsing's default grouping of And expressions, even - if all but one are non-empty.""" - return TokenConverter(expr).setParseAction(lambda t:t[0]) - -# convenience constants for positional expressions -empty = Empty().setName("empty") -lineStart = LineStart().setName("lineStart") -lineEnd = LineEnd().setName("lineEnd") -stringStart = StringStart().setName("stringStart") -stringEnd = StringEnd().setName("stringEnd") - -_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) -_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) -_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16))) -_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) -_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) -_charRange = Group(_singleChar + Suppress("-") + _singleChar) -_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" - -_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) - -def srange(s): - r"""Helper to easily define string ranges for use in Word construction. Borrows - syntax from regexp '[]' string range definitions:: - srange("[0-9]") -> "0123456789" - srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" - srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" - The input string must be enclosed in []'s, and the returned string is the expanded - character set joined into a single string. - The values enclosed in the []'s may be:: - a single character - an escaped character with a leading backslash (such as \- or \]) - an escaped hex character with a leading '\x' (\x21, which is a '!' character) - (\0x## is also supported for backwards compatibility) - an escaped octal character with a leading '\0' (\041, which is a '!' character) - a range of any of the above, separated by a dash ('a-z', etc.) - any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) - """ - try: - return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) - except: - return "" - -def matchOnlyAtCol(n): - """Helper method for defining parse actions that require matching at a specific - column in the input text. - """ - def verifyCol(strg,locn,toks): - if col(locn,strg) != n: - raise ParseException(strg,locn,"matched token not at column %d" % n) - return verifyCol - -def replaceWith(replStr): - """Helper method for common parse actions that simply return a literal value. Especially - useful when used with C{transformString()}. - """ - def _replFunc(*args): - return [replStr] - return _replFunc - -def removeQuotes(s,l,t): - """Helper parse action for removing quotation marks from parsed quoted strings. - To use, add this parse action to quoted string using:: - quotedString.setParseAction( removeQuotes ) - """ - return t[0][1:-1] - -def upcaseTokens(s,l,t): - """Helper parse action to convert tokens to upper case.""" - return [ tt.upper() for tt in map(_ustr,t) ] - -def downcaseTokens(s,l,t): - """Helper parse action to convert tokens to lower case.""" - return [ tt.lower() for tt in map(_ustr,t) ] - -def keepOriginalText(s,startLoc,t): - """DEPRECATED - use new helper method C{originalTextFor}. - Helper parse action to preserve original parsed text, - overriding any nested parse actions.""" - try: - endloc = getTokensEndLoc() - except ParseException: - raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") - del t[:] - t += ParseResults(s[startLoc:endloc]) - return t - -def getTokensEndLoc(): - """Method to be called from within a parse action to determine the end - location of the parsed tokens.""" - import inspect - fstack = inspect.stack() - try: - # search up the stack (through intervening argument normalizers) for correct calling routine - for f in fstack[2:]: - if f[3] == "_parseNoCache": - endloc = f[0].f_locals["loc"] - return endloc - else: - raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") - finally: - del fstack - -def _makeTags(tagStr, xml): - """Internal helper to construct opening and closing tag expressions, given a tag name""" - if isinstance(tagStr,basestring): - resname = tagStr - tagStr = Keyword(tagStr, caseless=not xml) - else: - resname = tagStr.name - - tagAttrName = Word(alphas,alphanums+"_-:") - if (xml): - tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) - openTag = Suppress("<") + tagStr("tag") + \ - Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ - Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") - else: - printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) - tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) - openTag = Suppress("<") + tagStr("tag") + \ - Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ - Optional( Suppress("=") + tagAttrValue ) ))) + \ - Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") - closeTag = Combine(_L("") - - openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) - closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("" % tagStr) - openTag.tag = resname - closeTag.tag = resname - return openTag, closeTag - -def makeHTMLTags(tagStr): - """Helper to construct opening and closing tag expressions for HTML, given a tag name""" - return _makeTags( tagStr, False ) - -def makeXMLTags(tagStr): - """Helper to construct opening and closing tag expressions for XML, given a tag name""" - return _makeTags( tagStr, True ) - -def withAttribute(*args,**attrDict): - """Helper to create a validating parse action to be used with start tags created - with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag - with a required attribute value, to avoid false matches on common tags such as - C{} or C{
}. - - Call C{withAttribute} with a series of attribute names and values. Specify the list - of filter attributes names and values as: - - keyword arguments, as in C{(align="right")}, or - - as an explicit dict with C{**} operator, when an attribute name is also a Python - reserved word, as in C{**{"class":"Customer", "align":"right"}} - - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) - For attribute names with a namespace prefix, you must use the second form. Attribute - names are matched insensitive to upper/lower case. - - To verify that the attribute exists, but without specifying a value, pass - C{withAttribute.ANY_VALUE} as the value. - """ - if args: - attrs = args[:] - else: - attrs = attrDict.items() - attrs = [(k,v) for k,v in attrs] - def pa(s,l,tokens): - for attrName,attrValue in attrs: - if attrName not in tokens: - raise ParseException(s,l,"no matching attribute " + attrName) - if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: - raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % - (attrName, tokens[attrName], attrValue)) - return pa -withAttribute.ANY_VALUE = object() - -opAssoc = _Constants() -opAssoc.LEFT = object() -opAssoc.RIGHT = object() - -def operatorPrecedence( baseExpr, opList ): - """Helper method for constructing grammars of expressions made up of - operators working in a precedence hierarchy. Operators may be unary or - binary, left- or right-associative. Parse actions can also be attached - to operator expressions. - - Parameters: - - baseExpr - expression representing the most basic element for the nested - - opList - list of tuples, one for each operator precedence level in the - expression grammar; each tuple is of the form - (opExpr, numTerms, rightLeftAssoc, parseAction), where: - - opExpr is the pyparsing expression for the operator; - may also be a string, which will be converted to a Literal; - if numTerms is 3, opExpr is a tuple of two expressions, for the - two operators separating the 3 terms - - numTerms is the number of terms for this operator (must - be 1, 2, or 3) - - rightLeftAssoc is the indicator whether the operator is - right or left associative, using the pyparsing-defined - constants opAssoc.RIGHT and opAssoc.LEFT. - - parseAction is the parse action to be associated with - expressions matching this operator expression (the - parse action tuple member may be omitted) - """ - ret = Forward() - lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) - for i,operDef in enumerate(opList): - opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] - if arity == 3: - if opExpr is None or len(opExpr) != 2: - raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") - opExpr1, opExpr2 = opExpr - thisExpr = Forward()#.setName("expr%d" % i) - if rightLeftAssoc == opAssoc.LEFT: - if arity == 1: - matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) - elif arity == 2: - if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) - else: - matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) - elif arity == 3: - matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ - Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) - else: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") - elif rightLeftAssoc == opAssoc.RIGHT: - if arity == 1: - # try to avoid LR with this extra test - if not isinstance(opExpr, Optional): - opExpr = Optional(opExpr) - matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) - elif arity == 2: - if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) - else: - matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) - elif arity == 3: - matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ - Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) - else: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") - else: - raise ValueError("operator must indicate right or left associativity") - if pa: - matchExpr.setParseAction( pa ) - thisExpr << ( matchExpr | lastExpr ) - lastExpr = thisExpr - ret << lastExpr - return ret - -dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") -sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") -quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") -unicodeString = Combine(_L('u') + quotedString.copy()) - -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): - """Helper method for defining nested lists enclosed in opening and closing - delimiters ("(" and ")" are the default). - - Parameters: - - opener - opening character for a nested list (default="("); can also be a pyparsing expression - - closer - closing character for a nested list (default=")"); can also be a pyparsing expression - - content - expression for items within the nested lists (default=None) - - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) - - If an expression is not provided for the content argument, the nested - expression will capture all whitespace-delimited content between delimiters - as a list of separate values. - - Use the C{ignoreExpr} argument to define expressions that may contain - opening or closing characters that should not be treated as opening - or closing characters for nesting, such as quotedString or a comment - expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. - The default is L{quotedString}, but if no expressions are to be ignored, - then pass C{None} for this argument. - """ - if opener == closer: - raise ValueError("opening and closing strings cannot be the same") - if content is None: - if isinstance(opener,basestring) and isinstance(closer,basestring): - if len(opener) == 1 and len(closer)==1: - if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr + - CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) - else: - content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS - ).setParseAction(lambda t:t[0].strip())) - else: - if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr + - ~Literal(opener) + ~Literal(closer) + - CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) - else: - content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + - CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) - else: - raise ValueError("opening and closing arguments must be strings if no content expression is given") - ret = Forward() - if ignoreExpr is not None: - ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) - else: - ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) - return ret - -def indentedBlock(blockStatementExpr, indentStack, indent=True): - """Helper method for defining space-delimited indentation blocks, such as - those used to define block statements in Python source code. - - Parameters: - - blockStatementExpr - expression defining syntax of statement that - is repeated within the indented block - - indentStack - list created by caller to manage indentation stack - (multiple statementWithIndentedBlock expressions within a single grammar - should share a common indentStack) - - indent - boolean indicating whether block must be indented beyond the - the current level; set to False for block of left-most statements - (default=True) - - A valid block must contain at least one C{blockStatement}. - """ - def checkPeerIndent(s,l,t): - if l >= len(s): return - curCol = col(l,s) - if curCol != indentStack[-1]: - if curCol > indentStack[-1]: - raise ParseFatalException(s,l,"illegal nesting") - raise ParseException(s,l,"not a peer entry") - - def checkSubIndent(s,l,t): - curCol = col(l,s) - if curCol > indentStack[-1]: - indentStack.append( curCol ) - else: - raise ParseException(s,l,"not a subentry") - - def checkUnindent(s,l,t): - if l >= len(s): return - curCol = col(l,s) - if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): - raise ParseException(s,l,"not an unindent") - indentStack.pop() - - NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) - INDENT = Empty() + Empty().setParseAction(checkSubIndent) - PEER = Empty().setParseAction(checkPeerIndent) - UNDENT = Empty().setParseAction(checkUnindent) - if indent: - smExpr = Group( Optional(NL) + - #~ FollowedBy(blockStatementExpr) + - INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) - else: - smExpr = Group( Optional(NL) + - (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) - blockStatementExpr.ignore(_bslash + LineEnd()) - return smExpr - -alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") -punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") - -anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) -commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() -_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) -replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None - -# it's easy to get these comment structures wrong - they're very common, so may as well make them available -cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") - -htmlComment = Regex(r"") -restOfLine = Regex(r".*").leaveWhitespace() -dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") -cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?" + str(tokenlist)) - print ("tokens = " + str(tokens)) - print ("tokens.columns = " + str(tokens.columns)) - print ("tokens.tables = " + str(tokens.tables)) - print (tokens.asXML("SQL",True)) - except ParseBaseException: - err = sys.exc_info()[1] - print (teststring + "->") - print (err.line) - print (" "*(err.column-1) + "^") - print (err) - print() - - selectToken = CaselessLiteral( "select" ) - fromToken = CaselessLiteral( "from" ) - - ident = Word( alphas, alphanums + "_$" ) - columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) - columnNameList = Group( delimitedList( columnName ) )#.setName("columns") - tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) - tableNameList = Group( delimitedList( tableName ) )#.setName("tables") - simpleSQL = ( selectToken + \ - ( '*' | columnNameList ).setResultsName( "columns" ) + \ - fromToken + \ - tableNameList.setResultsName( "tables" ) ) - - test( "SELECT * from XYZZY, ABC" ) - test( "select * from SYS.XYZZY" ) - test( "Select A from Sys.dual" ) - test( "Select AA,BB,CC from Sys.dual" ) - test( "Select A, B, C from Sys.dual" ) - test( "Select A, B, C from Sys.dual" ) - test( "Xelect A, B, C from Sys.dual" ) - test( "Select A, B, C frox Sys.dual" ) - test( "Select" ) - test( "Select ^^^ frox Sys.dual" ) - test( "Select A, B, C from Sys.dual, Table2 " ) +# module pyparsing.py +# +# Copyright (c) 2003-2011 Paul T. McGuire +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +#from __future__ import generators + +__doc__ = \ +""" +pyparsing module - Classes and methods to define and execute parsing grammars + +The pyparsing module is an alternative approach to creating and executing simple grammars, +vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you +don't need to learn a new syntax for defining grammars or matching expressions - the parsing module +provides a library of classes that you use to construct the grammar directly in Python. + +Here is a program to parse "Hello, World!" (or any greeting of the form C{", !"}):: + + from pyparsing import Word, alphas + + # define grammar of a greeting + greet = Word( alphas ) + "," + Word( alphas ) + "!" + + hello = "Hello, World!" + print hello, "->", greet.parseString( hello ) + +The program outputs the following:: + + Hello, World! -> ['Hello', ',', 'World', '!'] + +The Python representation of the grammar is quite readable, owing to the self-explanatory +class names, and the use of '+', '|' and '^' operators. + +The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an +object with named attributes. + +The pyparsing module handles some of the problems that are typically vexing when writing text parsers: + - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) + - quoted strings + - embedded comments +""" + +__version__ = "1.5.6" +__versionTime__ = "26 June 2011 10:53" +__author__ = "Paul McGuire " + +import string +from weakref import ref as wkref +import copy +import sys +import warnings +import re +import sre_constants +#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) + +__all__ = [ +'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', +'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', +'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', +'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', +'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', +'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', +'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', +'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', +'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', +'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', +'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', +'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', +'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', +'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', +'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', +'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', +'indentedBlock', 'originalTextFor', +] + +""" +Detect if we are running version 3.X and make appropriate changes +Robert A. Clark +""" +_PY3K = sys.version_info[0] > 2 +if _PY3K: + _MAX_INT = sys.maxsize + basestring = str + unichr = chr + _ustr = str + alphas = string.ascii_lowercase + string.ascii_uppercase +else: + _MAX_INT = sys.maxint + range = xrange + set = lambda s : dict( [(c,0) for c in s] ) + alphas = string.lowercase + string.uppercase + + def _ustr(obj): + """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries + str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It + then < returns the unicode object | encodes it with the default encoding | ... >. + """ + if isinstance(obj,unicode): + return obj + + try: + # If this works, then _ustr(obj) has the same behaviour as str(obj), so + # it won't break any existing code. + return str(obj) + + except UnicodeEncodeError: + # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) + # state that "The return value must be a string object". However, does a + # unicode object (being a subclass of basestring) count as a "string + # object"? + # If so, then return a unicode object: + return unicode(obj) + # Else encode it... but how? There are many choices... :) + # Replace unprintables with escape codes? + #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') + # Replace unprintables with question marks? + #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') + # ... + + alphas = string.lowercase + string.uppercase + +# build list of single arg builtins, tolerant of Python version, that can be used as parse actions +singleArgBuiltins = [] +import __builtin__ +for fname in "sum len enumerate sorted reversed list tuple set any all".split(): + try: + singleArgBuiltins.append(getattr(__builtin__,fname)) + except AttributeError: + continue + +def _xml_escape(data): + """Escape &, <, >, ", ', etc. in a string of data.""" + + # ampersand must be replaced first + from_symbols = '&><"\'' + to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] + for from_,to_ in zip(from_symbols, to_symbols): + data = data.replace(from_, to_) + return data + +class _Constants(object): + pass + +nums = string.digits +hexnums = nums + "ABCDEFabcdef" +alphanums = alphas + nums +_bslash = chr(92) +printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) + +class ParseBaseException(Exception): + """base exception class for all parsing runtime exceptions""" + # Performance tuning: we construct a *lot* of these, so keep this + # constructor as small and fast as possible + def __init__( self, pstr, loc=0, msg=None, elem=None ): + self.loc = loc + if msg is None: + self.msg = pstr + self.pstr = "" + else: + self.msg = msg + self.pstr = pstr + self.parserElement = elem + + def __getattr__( self, aname ): + """supported attributes by name are: + - lineno - returns the line number of the exception text + - col - returns the column number of the exception text + - line - returns the line containing the exception text + """ + if( aname == "lineno" ): + return lineno( self.loc, self.pstr ) + elif( aname in ("col", "column") ): + return col( self.loc, self.pstr ) + elif( aname == "line" ): + return line( self.loc, self.pstr ) + else: + raise AttributeError(aname) + + def __str__( self ): + return "%s (at char %d), (line:%d, col:%d)" % \ + ( self.msg, self.loc, self.lineno, self.column ) + def __repr__( self ): + return _ustr(self) + def markInputline( self, markerString = ">!<" ): + """Extracts the exception line from the input string, and marks + the location of the exception with a special symbol. + """ + line_str = self.line + line_column = self.column - 1 + if markerString: + line_str = "".join( [line_str[:line_column], + markerString, line_str[line_column:]]) + return line_str.strip() + def __dir__(self): + return "loc msg pstr parserElement lineno col line " \ + "markInputLine __str__ __repr__".split() + +class ParseException(ParseBaseException): + """exception thrown when parse expressions don't match class; + supported attributes by name are: + - lineno - returns the line number of the exception text + - col - returns the column number of the exception text + - line - returns the line containing the exception text + """ + pass + +class ParseFatalException(ParseBaseException): + """user-throwable exception thrown when inconsistent parse content + is found; stops all parsing immediately""" + pass + +class ParseSyntaxException(ParseFatalException): + """just like C{ParseFatalException}, but thrown internally when an + C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because + an unbacktrackable syntax error has been found""" + def __init__(self, pe): + super(ParseSyntaxException, self).__init__( + pe.pstr, pe.loc, pe.msg, pe.parserElement) + +#~ class ReparseException(ParseBaseException): + #~ """Experimental class - parse actions can raise this exception to cause + #~ pyparsing to reparse the input string: + #~ - with a modified input string, and/or + #~ - with a modified start location + #~ Set the values of the ReparseException in the constructor, and raise the + #~ exception in a parse action to cause pyparsing to use the new string/location. + #~ Setting the values as None causes no change to be made. + #~ """ + #~ def __init_( self, newstring, restartLoc ): + #~ self.newParseText = newstring + #~ self.reparseLoc = restartLoc + +class RecursiveGrammarException(Exception): + """exception thrown by C{validate()} if the grammar could be improperly recursive""" + def __init__( self, parseElementList ): + self.parseElementTrace = parseElementList + + def __str__( self ): + return "RecursiveGrammarException: %s" % self.parseElementTrace + +class _ParseResultsWithOffset(object): + def __init__(self,p1,p2): + self.tup = (p1,p2) + def __getitem__(self,i): + return self.tup[i] + def __repr__(self): + return repr(self.tup) + def setOffset(self,i): + self.tup = (self.tup[0],i) + +class ParseResults(object): + """Structured parse results, to provide multiple means of access to the parsed data: + - as a list (C{len(results)}) + - by list index (C{results[0], results[1]}, etc.) + - by attribute (C{results.}) + """ + #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" ) + def __new__(cls, toklist, name=None, asList=True, modal=True ): + if isinstance(toklist, cls): + return toklist + retobj = object.__new__(cls) + retobj.__doinit = True + return retobj + + # Performance tuning: we construct a *lot* of these, so keep this + # constructor as small and fast as possible + def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ): + if self.__doinit: + self.__doinit = False + self.__name = None + self.__parent = None + self.__accumNames = {} + if isinstance(toklist, list): + self.__toklist = toklist[:] + else: + self.__toklist = [toklist] + self.__tokdict = dict() + + if name is not None and name: + if not modal: + self.__accumNames[name] = 0 + if isinstance(name,int): + name = _ustr(name) # will always return a str, but use _ustr for consistency + self.__name = name + if not toklist in (None,'',[]): + if isinstance(toklist,basestring): + toklist = [ toklist ] + if asList: + if isinstance(toklist,ParseResults): + self[name] = _ParseResultsWithOffset(toklist.copy(),0) + else: + self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) + self[name].__name = name + else: + try: + self[name] = toklist[0] + except (KeyError,TypeError,IndexError): + self[name] = toklist + + def __getitem__( self, i ): + if isinstance( i, (int,slice) ): + return self.__toklist[i] + else: + if i not in self.__accumNames: + return self.__tokdict[i][-1][0] + else: + return ParseResults([ v[0] for v in self.__tokdict[i] ]) + + def __setitem__( self, k, v, isinstance=isinstance ): + if isinstance(v,_ParseResultsWithOffset): + self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] + sub = v[0] + elif isinstance(k,int): + self.__toklist[k] = v + sub = v + else: + self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] + sub = v + if isinstance(sub,ParseResults): + sub.__parent = wkref(self) + + def __delitem__( self, i ): + if isinstance(i,(int,slice)): + mylen = len( self.__toklist ) + del self.__toklist[i] + + # convert int to slice + if isinstance(i, int): + if i < 0: + i += mylen + i = slice(i, i+1) + # get removed indices + removed = list(range(*i.indices(mylen))) + removed.reverse() + # fixup indices in token dictionary + for name in self.__tokdict: + occurrences = self.__tokdict[name] + for j in removed: + for k, (value, position) in enumerate(occurrences): + occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) + else: + del self.__tokdict[i] + + def __contains__( self, k ): + return k in self.__tokdict + + def __len__( self ): return len( self.__toklist ) + def __bool__(self): return len( self.__toklist ) > 0 + __nonzero__ = __bool__ + def __iter__( self ): return iter( self.__toklist ) + def __reversed__( self ): return iter( self.__toklist[::-1] ) + def keys( self ): + """Returns all named result keys.""" + return self.__tokdict.keys() + + def pop( self, index=-1 ): + """Removes and returns item at specified index (default=last). + Will work with either numeric indices or dict-key indicies.""" + ret = self[index] + del self[index] + return ret + + def get(self, key, defaultValue=None): + """Returns named result matching the given key, or if there is no + such name, then returns the given C{defaultValue} or C{None} if no + C{defaultValue} is specified.""" + if key in self: + return self[key] + else: + return defaultValue + + def insert( self, index, insStr ): + """Inserts new element at location index in the list of parsed tokens.""" + self.__toklist.insert(index, insStr) + # fixup indices in token dictionary + for name in self.__tokdict: + occurrences = self.__tokdict[name] + for k, (value, position) in enumerate(occurrences): + occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) + + def items( self ): + """Returns all named result keys and values as a list of tuples.""" + return [(k,self[k]) for k in self.__tokdict] + + def values( self ): + """Returns all named result values.""" + return [ v[-1][0] for v in self.__tokdict.values() ] + + def __getattr__( self, name ): + if True: #name not in self.__slots__: + if name in self.__tokdict: + if name not in self.__accumNames: + return self.__tokdict[name][-1][0] + else: + return ParseResults([ v[0] for v in self.__tokdict[name] ]) + else: + return "" + return None + + def __add__( self, other ): + ret = self.copy() + ret += other + return ret + + def __iadd__( self, other ): + if other.__tokdict: + offset = len(self.__toklist) + addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) + otheritems = other.__tokdict.items() + otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) + for (k,vlist) in otheritems for v in vlist] + for k,v in otherdictitems: + self[k] = v + if isinstance(v[0],ParseResults): + v[0].__parent = wkref(self) + + self.__toklist += other.__toklist + self.__accumNames.update( other.__accumNames ) + return self + + def __radd__(self, other): + if isinstance(other,int) and other == 0: + return self.copy() + + def __repr__( self ): + return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) + + def __str__( self ): + out = "[" + sep = "" + for i in self.__toklist: + if isinstance(i, ParseResults): + out += sep + _ustr(i) + else: + out += sep + repr(i) + sep = ", " + out += "]" + return out + + def _asStringList( self, sep='' ): + out = [] + for item in self.__toklist: + if out and sep: + out.append(sep) + if isinstance( item, ParseResults ): + out += item._asStringList() + else: + out.append( _ustr(item) ) + return out + + def asList( self ): + """Returns the parse results as a nested list of matching tokens, all converted to strings.""" + out = [] + for res in self.__toklist: + if isinstance(res,ParseResults): + out.append( res.asList() ) + else: + out.append( res ) + return out + + def asDict( self ): + """Returns the named parse results as dictionary.""" + return dict( self.items() ) + + def copy( self ): + """Returns a new copy of a C{ParseResults} object.""" + ret = ParseResults( self.__toklist ) + ret.__tokdict = self.__tokdict.copy() + ret.__parent = self.__parent + ret.__accumNames.update( self.__accumNames ) + ret.__name = self.__name + return ret + + def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): + """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" + nl = "\n" + out = [] + namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() + for v in vlist ] ) + nextLevelIndent = indent + " " + + # collapse out indents if formatting is not desired + if not formatted: + indent = "" + nextLevelIndent = "" + nl = "" + + selfTag = None + if doctag is not None: + selfTag = doctag + else: + if self.__name: + selfTag = self.__name + + if not selfTag: + if namedItemsOnly: + return "" + else: + selfTag = "ITEM" + + out += [ nl, indent, "<", selfTag, ">" ] + + worklist = self.__toklist + for i,res in enumerate(worklist): + if isinstance(res,ParseResults): + if i in namedItems: + out += [ res.asXML(namedItems[i], + namedItemsOnly and doctag is None, + nextLevelIndent, + formatted)] + else: + out += [ res.asXML(None, + namedItemsOnly and doctag is None, + nextLevelIndent, + formatted)] + else: + # individual token, see if there is a name for it + resTag = None + if i in namedItems: + resTag = namedItems[i] + if not resTag: + if namedItemsOnly: + continue + else: + resTag = "ITEM" + xmlBodyText = _xml_escape(_ustr(res)) + out += [ nl, nextLevelIndent, "<", resTag, ">", + xmlBodyText, + "" ] + + out += [ nl, indent, "" ] + return "".join(out) + + def __lookup(self,sub): + for k,vlist in self.__tokdict.items(): + for v,loc in vlist: + if sub is v: + return k + return None + + def getName(self): + """Returns the results name for this token expression.""" + if self.__name: + return self.__name + elif self.__parent: + par = self.__parent() + if par: + return par.__lookup(self) + else: + return None + elif (len(self) == 1 and + len(self.__tokdict) == 1 and + self.__tokdict.values()[0][0][1] in (0,-1)): + return self.__tokdict.keys()[0] + else: + return None + + def dump(self,indent='',depth=0): + """Diagnostic method for listing out the contents of a C{ParseResults}. + Accepts an optional C{indent} argument so that this string can be embedded + in a nested display of other data.""" + out = [] + out.append( indent+_ustr(self.asList()) ) + keys = self.items() + keys.sort() + for k,v in keys: + if out: + out.append('\n') + out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) + if isinstance(v,ParseResults): + if v.keys(): + out.append( v.dump(indent,depth+1) ) + else: + out.append(_ustr(v)) + else: + out.append(_ustr(v)) + return "".join(out) + + # add support for pickle protocol + def __getstate__(self): + return ( self.__toklist, + ( self.__tokdict.copy(), + self.__parent is not None and self.__parent() or None, + self.__accumNames, + self.__name ) ) + + def __setstate__(self,state): + self.__toklist = state[0] + (self.__tokdict, + par, + inAccumNames, + self.__name) = state[1] + self.__accumNames = {} + self.__accumNames.update(inAccumNames) + if par is not None: + self.__parent = wkref(par) + else: + self.__parent = None + + def __dir__(self): + return dir(super(ParseResults,self)) + self.keys() + +def col (loc,strg): + """Returns current column within a string, counting newlines as line separators. + The first column is number 1. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{ParserElement.parseString}} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + return (loc} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + return strg.count("\n",0,loc) + 1 + +def line( loc, strg ): + """Returns the line of text containing loc within a string, counting newlines as line separators. + """ + lastCR = strg.rfind("\n", 0, loc) + nextCR = strg.find("\n", loc) + if nextCR >= 0: + return strg[lastCR+1:nextCR] + else: + return strg[lastCR+1:] + +def _defaultStartDebugAction( instring, loc, expr ): + print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) + +def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): + print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) + +def _defaultExceptionDebugAction( instring, loc, expr, exc ): + print ("Exception raised:" + _ustr(exc)) + +def nullDebugAction(*args): + """'Do-nothing' debug action, to suppress debugging output during parsing.""" + pass + +'decorator to trim function calls to match the arity of the target' +if not _PY3K: + def _trim_arity(func, maxargs=2): + limit = [0] + def wrapper(*args): + while 1: + try: + return func(*args[limit[0]:]) + except TypeError: + if limit[0] <= maxargs: + limit[0] += 1 + continue + raise + return wrapper +else: + def _trim_arity(func, maxargs=2): + limit = maxargs + def wrapper(*args): + #~ nonlocal limit + while 1: + try: + return func(*args[limit:]) + except TypeError: + if limit: + limit -= 1 + continue + raise + return wrapper + +class ParserElement(object): + """Abstract base level parser element class.""" + DEFAULT_WHITE_CHARS = " \n\t\r" + verbose_stacktrace = False + + def setDefaultWhitespaceChars( chars ): + """Overrides the default whitespace chars + """ + ParserElement.DEFAULT_WHITE_CHARS = chars + setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) + + def __init__( self, savelist=False ): + self.parseAction = list() + self.failAction = None + #~ self.name = "" # don't define self.name, let subclasses try/except upcall + self.strRepr = None + self.resultsName = None + self.saveAsList = savelist + self.skipWhitespace = True + self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS + self.copyDefaultWhiteChars = True + self.mayReturnEmpty = False # used when checking for left-recursion + self.keepTabs = False + self.ignoreExprs = list() + self.debug = False + self.streamlined = False + self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index + self.errmsg = "" + self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) + self.debugActions = ( None, None, None ) #custom debug actions + self.re = None + self.callPreparse = True # used to avoid redundant calls to preParse + self.callDuringTry = False + + def copy( self ): + """Make a copy of this C{ParserElement}. Useful for defining different parse actions + for the same parsing pattern, using copies of the original parse element.""" + cpy = copy.copy( self ) + cpy.parseAction = self.parseAction[:] + cpy.ignoreExprs = self.ignoreExprs[:] + if self.copyDefaultWhiteChars: + cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS + return cpy + + def setName( self, name ): + """Define name for this expression, for use in debugging.""" + self.name = name + self.errmsg = "Expected " + self.name + if hasattr(self,"exception"): + self.exception.msg = self.errmsg + return self + + def setResultsName( self, name, listAllMatches=False ): + """Define name for referencing matching tokens as a nested attribute + of the returned parse results. + NOTE: this returns a *copy* of the original C{ParserElement} object; + this is so that the client can define a basic element, such as an + integer, and reference it in multiple places with different names. + + You can also set results names using the abbreviated syntax, + C{expr("name")} in place of C{expr.setResultsName("name")} - + see L{I{__call__}<__call__>}. + """ + newself = self.copy() + if name.endswith("*"): + name = name[:-1] + listAllMatches=True + newself.resultsName = name + newself.modalResults = not listAllMatches + return newself + + def setBreak(self,breakFlag = True): + """Method to invoke the Python pdb debugger when this element is + about to be parsed. Set C{breakFlag} to True to enable, False to + disable. + """ + if breakFlag: + _parseMethod = self._parse + def breaker(instring, loc, doActions=True, callPreParse=True): + import pdb + pdb.set_trace() + return _parseMethod( instring, loc, doActions, callPreParse ) + breaker._originalParseMethod = _parseMethod + self._parse = breaker + else: + if hasattr(self._parse,"_originalParseMethod"): + self._parse = self._parse._originalParseMethod + return self + + def setParseAction( self, *fns, **kwargs ): + """Define action to perform when successfully matching parse element definition. + Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, + C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: + - s = the original string being parsed (see note below) + - loc = the location of the matching substring + - toks = a list of the matched tokens, packaged as a ParseResults object + If the functions in fns modify the tokens, they can return them as the return + value from fn, and the modified list of tokens will replace the original. + Otherwise, fn does not need to return any value. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{parseString}} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + self.parseAction = list(map(_trim_arity, list(fns))) + self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) + return self + + def addParseAction( self, *fns, **kwargs ): + """Add parse action to expression's list of parse actions. See L{I{setParseAction}}.""" + self.parseAction += list(map(_trim_arity, list(fns))) + self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) + return self + + def setFailAction( self, fn ): + """Define action to perform if parsing fails at this expression. + Fail acton fn is a callable function that takes the arguments + C{fn(s,loc,expr,err)} where: + - s = string being parsed + - loc = location where expression match was attempted and failed + - expr = the parse expression that failed + - err = the exception thrown + The function returns no value. It may throw C{ParseFatalException} + if it is desired to stop parsing immediately.""" + self.failAction = fn + return self + + def _skipIgnorables( self, instring, loc ): + exprsFound = True + while exprsFound: + exprsFound = False + for e in self.ignoreExprs: + try: + while 1: + loc,dummy = e._parse( instring, loc ) + exprsFound = True + except ParseException: + pass + return loc + + def preParse( self, instring, loc ): + if self.ignoreExprs: + loc = self._skipIgnorables( instring, loc ) + + if self.skipWhitespace: + wt = self.whiteChars + instrlen = len(instring) + while loc < instrlen and instring[loc] in wt: + loc += 1 + + return loc + + def parseImpl( self, instring, loc, doActions=True ): + return loc, [] + + def postParse( self, instring, loc, tokenlist ): + return tokenlist + + #~ @profile + def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): + debugging = ( self.debug ) #and doActions ) + + if debugging or self.failAction: + #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) + if (self.debugActions[0] ): + self.debugActions[0]( instring, loc, self ) + if callPreParse and self.callPreparse: + preloc = self.preParse( instring, loc ) + else: + preloc = loc + tokensStart = preloc + try: + try: + loc,tokens = self.parseImpl( instring, preloc, doActions ) + except IndexError: + raise ParseException( instring, len(instring), self.errmsg, self ) + except ParseBaseException: + #~ print ("Exception raised:", err) + err = None + if self.debugActions[2]: + err = sys.exc_info()[1] + self.debugActions[2]( instring, tokensStart, self, err ) + if self.failAction: + if err is None: + err = sys.exc_info()[1] + self.failAction( instring, tokensStart, self, err ) + raise + else: + if callPreParse and self.callPreparse: + preloc = self.preParse( instring, loc ) + else: + preloc = loc + tokensStart = preloc + if self.mayIndexError or loc >= len(instring): + try: + loc,tokens = self.parseImpl( instring, preloc, doActions ) + except IndexError: + raise ParseException( instring, len(instring), self.errmsg, self ) + else: + loc,tokens = self.parseImpl( instring, preloc, doActions ) + + tokens = self.postParse( instring, loc, tokens ) + + retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) + if self.parseAction and (doActions or self.callDuringTry): + if debugging: + try: + for fn in self.parseAction: + tokens = fn( instring, tokensStart, retTokens ) + if tokens is not None: + retTokens = ParseResults( tokens, + self.resultsName, + asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), + modal=self.modalResults ) + except ParseBaseException: + #~ print "Exception raised in user parse action:", err + if (self.debugActions[2] ): + err = sys.exc_info()[1] + self.debugActions[2]( instring, tokensStart, self, err ) + raise + else: + for fn in self.parseAction: + tokens = fn( instring, tokensStart, retTokens ) + if tokens is not None: + retTokens = ParseResults( tokens, + self.resultsName, + asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), + modal=self.modalResults ) + + if debugging: + #~ print ("Matched",self,"->",retTokens.asList()) + if (self.debugActions[1] ): + self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) + + return loc, retTokens + + def tryParse( self, instring, loc ): + try: + return self._parse( instring, loc, doActions=False )[0] + except ParseFatalException: + raise ParseException( instring, loc, self.errmsg, self) + + # this method gets repeatedly called during backtracking with the same arguments - + # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression + def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): + lookup = (self,instring,loc,callPreParse,doActions) + if lookup in ParserElement._exprArgCache: + value = ParserElement._exprArgCache[ lookup ] + if isinstance(value, Exception): + raise value + return (value[0],value[1].copy()) + else: + try: + value = self._parseNoCache( instring, loc, doActions, callPreParse ) + ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) + return value + except ParseBaseException: + pe = sys.exc_info()[1] + ParserElement._exprArgCache[ lookup ] = pe + raise + + _parse = _parseNoCache + + # argument cache for optimizing repeated calls when backtracking through recursive expressions + _exprArgCache = {} + def resetCache(): + ParserElement._exprArgCache.clear() + resetCache = staticmethod(resetCache) + + _packratEnabled = False + def enablePackrat(): + """Enables "packrat" parsing, which adds memoizing to the parsing logic. + Repeated parse attempts at the same string location (which happens + often in many complex grammars) can immediately return a cached value, + instead of re-executing parsing/validating code. Memoizing is done of + both valid results and parsing exceptions. + + This speedup may break existing programs that use parse actions that + have side-effects. For this reason, packrat parsing is disabled when + you first import pyparsing. To activate the packrat feature, your + program must call the class method C{ParserElement.enablePackrat()}. If + your program uses C{psyco} to "compile as you go", you must call + C{enablePackrat} before calling C{psyco.full()}. If you do not do this, + Python will crash. For best results, call C{enablePackrat()} immediately + after importing pyparsing. + """ + if not ParserElement._packratEnabled: + ParserElement._packratEnabled = True + ParserElement._parse = ParserElement._parseCache + enablePackrat = staticmethod(enablePackrat) + + def parseString( self, instring, parseAll=False ): + """Execute the parse expression with the given string. + This is the main interface to the client code, once the complete + expression has been built. + + If you want the grammar to require that the entire input string be + successfully parsed, then set C{parseAll} to True (equivalent to ending + the grammar with C{StringEnd()}). + + Note: C{parseString} implicitly calls C{expandtabs()} on the input string, + in order to report proper column numbers in parse actions. + If the input string contains tabs and + the grammar uses parse actions that use the C{loc} argument to index into the + string being parsed, you can ensure you have a consistent view of the input + string by: + - calling C{parseWithTabs} on your grammar before calling C{parseString} + (see L{I{parseWithTabs}}) + - define your parse action using the full C{(s,loc,toks)} signature, and + reference the input string using the parse action's C{s} argument + - explictly expand the tabs in your input string before calling + C{parseString} + """ + ParserElement.resetCache() + if not self.streamlined: + self.streamline() + #~ self.saveAsList = True + for e in self.ignoreExprs: + e.streamline() + if not self.keepTabs: + instring = instring.expandtabs() + try: + loc, tokens = self._parse( instring, 0 ) + if parseAll: + loc = self.preParse( instring, loc ) + se = Empty() + StringEnd() + se._parse( instring, loc ) + except ParseBaseException: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + exc = sys.exc_info()[1] + raise exc + else: + return tokens + + def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): + """Scan the input string for expression matches. Each match will return the + matching tokens, start location, and end location. May be called with optional + C{maxMatches} argument, to clip scanning after 'n' matches are found. If + C{overlap} is specified, then overlapping matches will be reported. + + Note that the start and end locations are reported relative to the string + being parsed. See L{I{parseString}} for more information on parsing + strings with embedded tabs.""" + if not self.streamlined: + self.streamline() + for e in self.ignoreExprs: + e.streamline() + + if not self.keepTabs: + instring = _ustr(instring).expandtabs() + instrlen = len(instring) + loc = 0 + preparseFn = self.preParse + parseFn = self._parse + ParserElement.resetCache() + matches = 0 + try: + while loc <= instrlen and matches < maxMatches: + try: + preloc = preparseFn( instring, loc ) + nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) + except ParseException: + loc = preloc+1 + else: + if nextLoc > loc: + matches += 1 + yield tokens, preloc, nextLoc + if overlap: + nextloc = preparseFn( instring, loc ) + if nextloc > loc: + loc = nextLoc + else: + loc += 1 + else: + loc = nextLoc + else: + loc = preloc+1 + except ParseBaseException: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + exc = sys.exc_info()[1] + raise exc + + def transformString( self, instring ): + """Extension to C{scanString}, to modify matching text with modified tokens that may + be returned from a parse action. To use C{transformString}, define a grammar and + attach a parse action to it that modifies the returned token list. + Invoking C{transformString()} on a target string will then scan for matches, + and replace the matched text patterns according to the logic in the parse + action. C{transformString()} returns the resulting transformed string.""" + out = [] + lastE = 0 + # force preservation of s, to minimize unwanted transformation of string, and to + # keep string locs straight between transformString and scanString + self.keepTabs = True + try: + for t,s,e in self.scanString( instring ): + out.append( instring[lastE:s] ) + if t: + if isinstance(t,ParseResults): + out += t.asList() + elif isinstance(t,list): + out += t + else: + out.append(t) + lastE = e + out.append(instring[lastE:]) + out = [o for o in out if o] + return "".join(map(_ustr,_flatten(out))) + except ParseBaseException: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + exc = sys.exc_info()[1] + raise exc + + def searchString( self, instring, maxMatches=_MAX_INT ): + """Another extension to C{scanString}, simplifying the access to the tokens found + to match the given parse expression. May be called with optional + C{maxMatches} argument, to clip searching after 'n' matches are found. + """ + try: + return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) + except ParseBaseException: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + exc = sys.exc_info()[1] + raise exc + + def __add__(self, other ): + """Implementation of + operator - returns And""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return And( [ self, other ] ) + + def __radd__(self, other ): + """Implementation of + operator when left operand is not a C{ParserElement}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other + self + + def __sub__(self, other): + """Implementation of - operator, returns C{And} with error stop""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return And( [ self, And._ErrorStop(), other ] ) + + def __rsub__(self, other ): + """Implementation of - operator when left operand is not a C{ParserElement}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other - self + + def __mul__(self,other): + """Implementation of * operator, allows use of C{expr * 3} in place of + C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer + tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples + may also include C{None} as in: + - C{expr*(n,None)} or C{expr*(n,)} is equivalent + to C{expr*n + ZeroOrMore(expr)} + (read as "at least n instances of C{expr}") + - C{expr*(None,n)} is equivalent to C{expr*(0,n)} + (read as "0 to n instances of C{expr}") + - C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)} + - C{expr*(1,None)} is equivalent to C{OneOrMore(expr)} + + Note that C{expr*(None,n)} does not raise an exception if + more than n exprs exist in the input stream; that is, + C{expr*(None,n)} does not enforce a maximum number of expr + occurrences. If this behavior is desired, then write + C{expr*(None,n) + ~expr} + + """ + if isinstance(other,int): + minElements, optElements = other,0 + elif isinstance(other,tuple): + other = (other + (None, None))[:2] + if other[0] is None: + other = (0, other[1]) + if isinstance(other[0],int) and other[1] is None: + if other[0] == 0: + return ZeroOrMore(self) + if other[0] == 1: + return OneOrMore(self) + else: + return self*other[0] + ZeroOrMore(self) + elif isinstance(other[0],int) and isinstance(other[1],int): + minElements, optElements = other + optElements -= minElements + else: + raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) + else: + raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) + + if minElements < 0: + raise ValueError("cannot multiply ParserElement by negative value") + if optElements < 0: + raise ValueError("second tuple value must be greater or equal to first tuple value") + if minElements == optElements == 0: + raise ValueError("cannot multiply ParserElement by 0 or (0,0)") + + if (optElements): + def makeOptionalList(n): + if n>1: + return Optional(self + makeOptionalList(n-1)) + else: + return Optional(self) + if minElements: + if minElements == 1: + ret = self + makeOptionalList(optElements) + else: + ret = And([self]*minElements) + makeOptionalList(optElements) + else: + ret = makeOptionalList(optElements) + else: + if minElements == 1: + ret = self + else: + ret = And([self]*minElements) + return ret + + def __rmul__(self, other): + return self.__mul__(other) + + def __or__(self, other ): + """Implementation of | operator - returns C{MatchFirst}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return MatchFirst( [ self, other ] ) + + def __ror__(self, other ): + """Implementation of | operator when left operand is not a C{ParserElement}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other | self + + def __xor__(self, other ): + """Implementation of ^ operator - returns C{Or}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return Or( [ self, other ] ) + + def __rxor__(self, other ): + """Implementation of ^ operator when left operand is not a C{ParserElement}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other ^ self + + def __and__(self, other ): + """Implementation of & operator - returns C{Each}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return Each( [ self, other ] ) + + def __rand__(self, other ): + """Implementation of & operator when left operand is not a C{ParserElement}""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other & self + + def __invert__( self ): + """Implementation of ~ operator - returns C{NotAny}""" + return NotAny( self ) + + def __call__(self, name): + """Shortcut for C{setResultsName}, with C{listAllMatches=default}:: + userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") + could be written as:: + userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") + + If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be + passed as C{True}. + """ + return self.setResultsName(name) + + def suppress( self ): + """Suppresses the output of this C{ParserElement}; useful to keep punctuation from + cluttering up returned output. + """ + return Suppress( self ) + + def leaveWhitespace( self ): + """Disables the skipping of whitespace before matching the characters in the + C{ParserElement}'s defined pattern. This is normally only used internally by + the pyparsing module, but may be needed in some whitespace-sensitive grammars. + """ + self.skipWhitespace = False + return self + + def setWhitespaceChars( self, chars ): + """Overrides the default whitespace chars + """ + self.skipWhitespace = True + self.whiteChars = chars + self.copyDefaultWhiteChars = False + return self + + def parseWithTabs( self ): + """Overrides default behavior to expand C{}s to spaces before parsing the input string. + Must be called before C{parseString} when the input grammar contains elements that + match C{} characters.""" + self.keepTabs = True + return self + + def ignore( self, other ): + """Define expression to be ignored (e.g., comments) while doing pattern + matching; may be called repeatedly, to define multiple comment or other + ignorable patterns. + """ + if isinstance( other, Suppress ): + if other not in self.ignoreExprs: + self.ignoreExprs.append( other.copy() ) + else: + self.ignoreExprs.append( Suppress( other.copy() ) ) + return self + + def setDebugActions( self, startAction, successAction, exceptionAction ): + """Enable display of debugging messages while doing pattern matching.""" + self.debugActions = (startAction or _defaultStartDebugAction, + successAction or _defaultSuccessDebugAction, + exceptionAction or _defaultExceptionDebugAction) + self.debug = True + return self + + def setDebug( self, flag=True ): + """Enable display of debugging messages while doing pattern matching. + Set C{flag} to True to enable, False to disable.""" + if flag: + self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) + else: + self.debug = False + return self + + def __str__( self ): + return self.name + + def __repr__( self ): + return _ustr(self) + + def streamline( self ): + self.streamlined = True + self.strRepr = None + return self + + def checkRecursion( self, parseElementList ): + pass + + def validate( self, validateTrace=[] ): + """Check defined expressions for valid structure, check for infinite recursive definitions.""" + self.checkRecursion( [] ) + + def parseFile( self, file_or_filename, parseAll=False ): + """Execute the parse expression on the given file or filename. + If a filename is specified (instead of a file object), + the entire file is opened, read, and closed before parsing. + """ + try: + file_contents = file_or_filename.read() + except AttributeError: + f = open(file_or_filename, "rb") + file_contents = f.read() + f.close() + try: + return self.parseString(file_contents, parseAll) + except ParseBaseException: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + exc = sys.exc_info()[1] + raise exc + + def getException(self): + return ParseException("",0,self.errmsg,self) + + def __getattr__(self,aname): + if aname == "myException": + self.myException = ret = self.getException(); + return ret; + else: + raise AttributeError("no such attribute " + aname) + + def __eq__(self,other): + if isinstance(other, ParserElement): + return self is other or self.__dict__ == other.__dict__ + elif isinstance(other, basestring): + try: + self.parseString(_ustr(other), parseAll=True) + return True + except ParseBaseException: + return False + else: + return super(ParserElement,self)==other + + def __ne__(self,other): + return not (self == other) + + def __hash__(self): + return hash(id(self)) + + def __req__(self,other): + return self == other + + def __rne__(self,other): + return not (self == other) + + +class Token(ParserElement): + """Abstract C{ParserElement} subclass, for defining atomic matching patterns.""" + def __init__( self ): + super(Token,self).__init__( savelist=False ) + + def setName(self, name): + s = super(Token,self).setName(name) + self.errmsg = "Expected " + self.name + return s + + +class Empty(Token): + """An empty token, will always match.""" + def __init__( self ): + super(Empty,self).__init__() + self.name = "Empty" + self.mayReturnEmpty = True + self.mayIndexError = False + + +class NoMatch(Token): + """A token that will never match.""" + def __init__( self ): + super(NoMatch,self).__init__() + self.name = "NoMatch" + self.mayReturnEmpty = True + self.mayIndexError = False + self.errmsg = "Unmatchable token" + + def parseImpl( self, instring, loc, doActions=True ): + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + +class Literal(Token): + """Token to exactly match a specified string.""" + def __init__( self, matchString ): + super(Literal,self).__init__() + self.match = matchString + self.matchLen = len(matchString) + try: + self.firstMatchChar = matchString[0] + except IndexError: + warnings.warn("null string passed to Literal; use Empty() instead", + SyntaxWarning, stacklevel=2) + self.__class__ = Empty + self.name = '"%s"' % _ustr(self.match) + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = False + self.mayIndexError = False + + # Performance tuning: this routine gets called a *lot* + # if this is a single character match string and the first character matches, + # short-circuit as quickly as possible, and avoid calling startswith + #~ @profile + def parseImpl( self, instring, loc, doActions=True ): + if (instring[loc] == self.firstMatchChar and + (self.matchLen==1 or instring.startswith(self.match,loc)) ): + return loc+self.matchLen, self.match + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc +_L = Literal + +class Keyword(Token): + """Token to exactly match a specified string as a keyword, that is, it must be + immediately followed by a non-keyword character. Compare with C{Literal}:: + Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. + Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} + Accepts two optional constructor arguments in addition to the keyword string: + C{identChars} is a string of characters that would be valid identifier characters, + defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive + matching, default is C{False}. + """ + DEFAULT_KEYWORD_CHARS = alphanums+"_$" + + def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ): + super(Keyword,self).__init__() + self.match = matchString + self.matchLen = len(matchString) + try: + self.firstMatchChar = matchString[0] + except IndexError: + warnings.warn("null string passed to Keyword; use Empty() instead", + SyntaxWarning, stacklevel=2) + self.name = '"%s"' % self.match + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = False + self.mayIndexError = False + self.caseless = caseless + if caseless: + self.caselessmatch = matchString.upper() + identChars = identChars.upper() + self.identChars = set(identChars) + + def parseImpl( self, instring, loc, doActions=True ): + if self.caseless: + if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and + (loc == 0 or instring[loc-1].upper() not in self.identChars) ): + return loc+self.matchLen, self.match + else: + if (instring[loc] == self.firstMatchChar and + (self.matchLen==1 or instring.startswith(self.match,loc)) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and + (loc == 0 or instring[loc-1] not in self.identChars) ): + return loc+self.matchLen, self.match + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + def copy(self): + c = super(Keyword,self).copy() + c.identChars = Keyword.DEFAULT_KEYWORD_CHARS + return c + + def setDefaultKeywordChars( chars ): + """Overrides the default Keyword chars + """ + Keyword.DEFAULT_KEYWORD_CHARS = chars + setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) + +class CaselessLiteral(Literal): + """Token to match a specified string, ignoring case of letters. + Note: the matched results will always be in the case of the given + match string, NOT the case of the input text. + """ + def __init__( self, matchString ): + super(CaselessLiteral,self).__init__( matchString.upper() ) + # Preserve the defining literal. + self.returnString = matchString + self.name = "'%s'" % self.returnString + self.errmsg = "Expected " + self.name + + def parseImpl( self, instring, loc, doActions=True ): + if instring[ loc:loc+self.matchLen ].upper() == self.match: + return loc+self.matchLen, self.returnString + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class CaselessKeyword(Keyword): + def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): + super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) + + def parseImpl( self, instring, loc, doActions=True ): + if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): + return loc+self.matchLen, self.match + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class Word(Token): + """Token for matching words composed of allowed character sets. + Defined with string containing all allowed initial characters, + an optional string containing allowed body characters (if omitted, + defaults to the initial character set), and an optional minimum, + maximum, and/or exact length. The default value for C{min} is 1 (a + minimum value < 1 is not valid); the default values for C{max} and C{exact} + are 0, meaning no maximum or exact length restriction. An optional + C{exclude} parameter can list characters that might be found in + the input C{bodyChars} string; useful to define a word of all printables + except for one or two characters, for instance. + """ + def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): + super(Word,self).__init__() + if excludeChars: + initChars = ''.join([c for c in initChars if c not in excludeChars]) + if bodyChars: + bodyChars = ''.join([c for c in bodyChars if c not in excludeChars]) + self.initCharsOrig = initChars + self.initChars = set(initChars) + if bodyChars : + self.bodyCharsOrig = bodyChars + self.bodyChars = set(bodyChars) + else: + self.bodyCharsOrig = initChars + self.bodyChars = set(initChars) + + self.maxSpecified = max > 0 + + if min < 1: + raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + self.mayIndexError = False + self.asKeyword = asKeyword + + if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): + if self.bodyCharsOrig == self.initCharsOrig: + self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) + elif len(self.bodyCharsOrig) == 1: + self.reString = "%s[%s]*" % \ + (re.escape(self.initCharsOrig), + _escapeRegexRangeChars(self.bodyCharsOrig),) + else: + self.reString = "[%s][%s]*" % \ + (_escapeRegexRangeChars(self.initCharsOrig), + _escapeRegexRangeChars(self.bodyCharsOrig),) + if self.asKeyword: + self.reString = r"\b"+self.reString+r"\b" + try: + self.re = re.compile( self.reString ) + except: + self.re = None + + def parseImpl( self, instring, loc, doActions=True ): + if self.re: + result = self.re.match(instring,loc) + if not result: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + loc = result.end() + return loc, result.group() + + if not(instring[ loc ] in self.initChars): + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + start = loc + loc += 1 + instrlen = len(instring) + bodychars = self.bodyChars + maxloc = start + self.maxLen + maxloc = min( maxloc, instrlen ) + while loc < maxloc and instring[loc] in bodychars: + loc += 1 + + throwException = False + if loc - start < self.minLen: + throwException = True + if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: + throwException = True + if self.asKeyword: + if (start>0 and instring[start-1] in bodychars) or (loc4: + return s[:4]+"..." + else: + return s + + if ( self.initCharsOrig != self.bodyCharsOrig ): + self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) + else: + self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) + + return self.strRepr + + +class Regex(Token): + """Token for matching strings that match a given regular expression. + Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. + """ + compiledREtype = type(re.compile("[A-Z]")) + def __init__( self, pattern, flags=0): + """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" + super(Regex,self).__init__() + + if isinstance(pattern, basestring): + if len(pattern) == 0: + warnings.warn("null string passed to Regex; use Empty() instead", + SyntaxWarning, stacklevel=2) + + self.pattern = pattern + self.flags = flags + + try: + self.re = re.compile(self.pattern, self.flags) + self.reString = self.pattern + except sre_constants.error: + warnings.warn("invalid pattern (%s) passed to Regex" % pattern, + SyntaxWarning, stacklevel=2) + raise + + elif isinstance(pattern, Regex.compiledREtype): + self.re = pattern + self.pattern = \ + self.reString = str(pattern) + self.flags = flags + + else: + raise ValueError("Regex may only be constructed with a string or a compiled RE object") + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + self.mayIndexError = False + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + result = self.re.match(instring,loc) + if not result: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + loc = result.end() + d = result.groupdict() + ret = ParseResults(result.group()) + if d: + for k in d: + ret[k] = d[k] + return loc,ret + + def __str__( self ): + try: + return super(Regex,self).__str__() + except: + pass + + if self.strRepr is None: + self.strRepr = "Re:(%s)" % repr(self.pattern) + + return self.strRepr + + +class QuotedString(Token): + """Token for matching strings that are delimited by quoting characters. + """ + def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): + """ + Defined with the following parameters: + - quoteChar - string of one or more characters defining the quote delimiting string + - escChar - character to escape quotes, typically backslash (default=None) + - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) + - multiline - boolean indicating whether quotes can span multiple lines (default=False) + - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) + - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) + """ + super(QuotedString,self).__init__() + + # remove white space from quote chars - wont work anyway + quoteChar = quoteChar.strip() + if len(quoteChar) == 0: + warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) + raise SyntaxError() + + if endQuoteChar is None: + endQuoteChar = quoteChar + else: + endQuoteChar = endQuoteChar.strip() + if len(endQuoteChar) == 0: + warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) + raise SyntaxError() + + self.quoteChar = quoteChar + self.quoteCharLen = len(quoteChar) + self.firstQuoteChar = quoteChar[0] + self.endQuoteChar = endQuoteChar + self.endQuoteCharLen = len(endQuoteChar) + self.escChar = escChar + self.escQuote = escQuote + self.unquoteResults = unquoteResults + + if multiline: + self.flags = re.MULTILINE | re.DOTALL + self.pattern = r'%s(?:[^%s%s]' % \ + ( re.escape(self.quoteChar), + _escapeRegexRangeChars(self.endQuoteChar[0]), + (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) + else: + self.flags = 0 + self.pattern = r'%s(?:[^%s\n\r%s]' % \ + ( re.escape(self.quoteChar), + _escapeRegexRangeChars(self.endQuoteChar[0]), + (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) + if len(self.endQuoteChar) > 1: + self.pattern += ( + '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), + _escapeRegexRangeChars(self.endQuoteChar[i])) + for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' + ) + if escQuote: + self.pattern += (r'|(?:%s)' % re.escape(escQuote)) + if escChar: + self.pattern += (r'|(?:%s.)' % re.escape(escChar)) + charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-') + self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset) + self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) + + try: + self.re = re.compile(self.pattern, self.flags) + self.reString = self.pattern + except sre_constants.error: + warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, + SyntaxWarning, stacklevel=2) + raise + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + self.mayIndexError = False + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None + if not result: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + loc = result.end() + ret = result.group() + + if self.unquoteResults: + + # strip off quotes + ret = ret[self.quoteCharLen:-self.endQuoteCharLen] + + if isinstance(ret,basestring): + # replace escaped characters + if self.escChar: + ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) + + # replace escaped quotes + if self.escQuote: + ret = ret.replace(self.escQuote, self.endQuoteChar) + + return loc, ret + + def __str__( self ): + try: + return super(QuotedString,self).__str__() + except: + pass + + if self.strRepr is None: + self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) + + return self.strRepr + + +class CharsNotIn(Token): + """Token for matching words composed of characters *not* in a given set. + Defined with string containing all disallowed characters, and an optional + minimum, maximum, and/or exact length. The default value for C{min} is 1 (a + minimum value < 1 is not valid); the default values for C{max} and C{exact} + are 0, meaning no maximum or exact length restriction. + """ + def __init__( self, notChars, min=1, max=0, exact=0 ): + super(CharsNotIn,self).__init__() + self.skipWhitespace = False + self.notChars = notChars + + if min < 1: + raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = ( self.minLen == 0 ) + self.mayIndexError = False + + def parseImpl( self, instring, loc, doActions=True ): + if instring[loc] in self.notChars: + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + start = loc + loc += 1 + notchars = self.notChars + maxlen = min( start+self.maxLen, len(instring) ) + while loc < maxlen and \ + (instring[loc] not in notchars): + loc += 1 + + if loc - start < self.minLen: + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + return loc, instring[start:loc] + + def __str__( self ): + try: + return super(CharsNotIn, self).__str__() + except: + pass + + if self.strRepr is None: + if len(self.notChars) > 4: + self.strRepr = "!W:(%s...)" % self.notChars[:4] + else: + self.strRepr = "!W:(%s)" % self.notChars + + return self.strRepr + +class White(Token): + """Special matching class for matching whitespace. Normally, whitespace is ignored + by pyparsing grammars. This class is included when some whitespace structures + are significant. Define with a string containing the whitespace characters to be + matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, + as defined for the C{Word} class.""" + whiteStrs = { + " " : "", + "\t": "", + "\n": "", + "\r": "", + "\f": "", + } + def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): + super(White,self).__init__() + self.matchWhite = ws + self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) + #~ self.leaveWhitespace() + self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) + self.mayReturnEmpty = True + self.errmsg = "Expected " + self.name + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + def parseImpl( self, instring, loc, doActions=True ): + if not(instring[ loc ] in self.matchWhite): + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + start = loc + loc += 1 + maxloc = start + self.maxLen + maxloc = min( maxloc, len(instring) ) + while loc < maxloc and instring[loc] in self.matchWhite: + loc += 1 + + if loc - start < self.minLen: + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + return loc, instring[start:loc] + + +class _PositionToken(Token): + def __init__( self ): + super(_PositionToken,self).__init__() + self.name=self.__class__.__name__ + self.mayReturnEmpty = True + self.mayIndexError = False + +class GoToColumn(_PositionToken): + """Token to advance to a specific column of input text; useful for tabular report scraping.""" + def __init__( self, colno ): + super(GoToColumn,self).__init__() + self.col = colno + + def preParse( self, instring, loc ): + if col(loc,instring) != self.col: + instrlen = len(instring) + if self.ignoreExprs: + loc = self._skipIgnorables( instring, loc ) + while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : + loc += 1 + return loc + + def parseImpl( self, instring, loc, doActions=True ): + thiscol = col( loc, instring ) + if thiscol > self.col: + raise ParseException( instring, loc, "Text not in expected column", self ) + newloc = loc + self.col - thiscol + ret = instring[ loc: newloc ] + return newloc, ret + +class LineStart(_PositionToken): + """Matches if current position is at the beginning of a line within the parse string""" + def __init__( self ): + super(LineStart,self).__init__() + self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) + self.errmsg = "Expected start of line" + + def preParse( self, instring, loc ): + preloc = super(LineStart,self).preParse(instring,loc) + if instring[preloc] == "\n": + loc += 1 + return loc + + def parseImpl( self, instring, loc, doActions=True ): + if not( loc==0 or + (loc == self.preParse( instring, 0 )) or + (instring[loc-1] == "\n") ): #col(loc, instring) != 1: + #~ raise ParseException( instring, loc, "Expected start of line" ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + return loc, [] + +class LineEnd(_PositionToken): + """Matches if current position is at the end of a line within the parse string""" + def __init__( self ): + super(LineEnd,self).__init__() + self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) + self.errmsg = "Expected end of line" + + def parseImpl( self, instring, loc, doActions=True ): + if loc len(instring): + return loc, [] + else: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class WordStart(_PositionToken): + """Matches if the current position is at the beginning of a Word, and + is not preceded by any character in a given set of C{wordChars} + (default=C{printables}). To emulate the C{\b} behavior of regular expressions, + use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of + the string being parsed, or at the beginning of a line. + """ + def __init__(self, wordChars = printables): + super(WordStart,self).__init__() + self.wordChars = set(wordChars) + self.errmsg = "Not at the start of a word" + + def parseImpl(self, instring, loc, doActions=True ): + if loc != 0: + if (instring[loc-1] in self.wordChars or + instring[loc] not in self.wordChars): + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + return loc, [] + +class WordEnd(_PositionToken): + """Matches if the current position is at the end of a Word, and + is not followed by any character in a given set of C{wordChars} + (default=C{printables}). To emulate the C{\b} behavior of regular expressions, + use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of + the string being parsed, or at the end of a line. + """ + def __init__(self, wordChars = printables): + super(WordEnd,self).__init__() + self.wordChars = set(wordChars) + self.skipWhitespace = False + self.errmsg = "Not at the end of a word" + + def parseImpl(self, instring, loc, doActions=True ): + instrlen = len(instring) + if instrlen>0 and loc maxExcLoc: + maxException = err + maxExcLoc = err.loc + except IndexError: + if len(instring) > maxExcLoc: + maxException = ParseException(instring,len(instring),e.errmsg,self) + maxExcLoc = len(instring) + else: + if loc2 > maxMatchLoc: + maxMatchLoc = loc2 + maxMatchExp = e + + if maxMatchLoc < 0: + if maxException is not None: + raise maxException + else: + raise ParseException(instring, loc, "no defined alternatives to match", self) + + return maxMatchExp._parse( instring, loc, doActions ) + + def __ixor__(self, other ): + if isinstance( other, basestring ): + other = Literal( other ) + return self.append( other ) #Or( [ self, other ] ) + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" + + return self.strRepr + + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] + for e in self.exprs: + e.checkRecursion( subRecCheckList ) + + +class MatchFirst(ParseExpression): + """Requires that at least one C{ParseExpression} is found. + If two expressions match, the first one listed is the one that will match. + May be constructed using the C{'|'} operator. + """ + def __init__( self, exprs, savelist = False ): + super(MatchFirst,self).__init__(exprs, savelist) + if exprs: + self.mayReturnEmpty = False + for e in self.exprs: + if e.mayReturnEmpty: + self.mayReturnEmpty = True + break + else: + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + maxExcLoc = -1 + maxException = None + for e in self.exprs: + try: + ret = e._parse( instring, loc, doActions ) + return ret + except ParseException, err: + if err.loc > maxExcLoc: + maxException = err + maxExcLoc = err.loc + except IndexError: + if len(instring) > maxExcLoc: + maxException = ParseException(instring,len(instring),e.errmsg,self) + maxExcLoc = len(instring) + + # only got here if no expression matched, raise exception for match that made it the furthest + else: + if maxException is not None: + raise maxException + else: + raise ParseException(instring, loc, "no defined alternatives to match", self) + + def __ior__(self, other ): + if isinstance( other, basestring ): + other = Literal( other ) + return self.append( other ) #MatchFirst( [ self, other ] ) + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" + + return self.strRepr + + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] + for e in self.exprs: + e.checkRecursion( subRecCheckList ) + + +class Each(ParseExpression): + """Requires all given C{ParseExpression}s to be found, but in any order. + Expressions may be separated by whitespace. + May be constructed using the C{'&'} operator. + """ + def __init__( self, exprs, savelist = True ): + super(Each,self).__init__(exprs, savelist) + self.mayReturnEmpty = True + for e in self.exprs: + if not e.mayReturnEmpty: + self.mayReturnEmpty = False + break + self.skipWhitespace = True + self.initExprGroups = True + + def parseImpl( self, instring, loc, doActions=True ): + if self.initExprGroups: + opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] + opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] + self.optionals = opt1 + opt2 + self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] + self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] + self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] + self.required += self.multirequired + self.initExprGroups = False + tmpLoc = loc + tmpReqd = self.required[:] + tmpOpt = self.optionals[:] + matchOrder = [] + + keepMatching = True + while keepMatching: + tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired + failed = [] + for e in tmpExprs: + try: + tmpLoc = e.tryParse( instring, tmpLoc ) + except ParseException: + failed.append(e) + else: + matchOrder.append(e) + if e in tmpReqd: + tmpReqd.remove(e) + elif e in tmpOpt: + tmpOpt.remove(e) + if len(failed) == len(tmpExprs): + keepMatching = False + + if tmpReqd: + missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) + raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) + + # add any unmatched Optionals, in case they have default values defined + matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] + + resultlist = [] + for e in matchOrder: + loc,results = e._parse(instring,loc,doActions) + resultlist.append(results) + + finalResults = ParseResults([]) + for r in resultlist: + dups = {} + for k in r.keys(): + if k in finalResults.keys(): + tmp = ParseResults(finalResults[k]) + tmp += ParseResults(r[k]) + dups[k] = tmp + finalResults += ParseResults(r) + for k,v in dups.items(): + finalResults[k] = v + return loc, finalResults + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" + + return self.strRepr + + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] + for e in self.exprs: + e.checkRecursion( subRecCheckList ) + + +class ParseElementEnhance(ParserElement): + """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.""" + def __init__( self, expr, savelist=False ): + super(ParseElementEnhance,self).__init__(savelist) + if isinstance( expr, basestring ): + expr = Literal(expr) + self.expr = expr + self.strRepr = None + if expr is not None: + self.mayIndexError = expr.mayIndexError + self.mayReturnEmpty = expr.mayReturnEmpty + self.setWhitespaceChars( expr.whiteChars ) + self.skipWhitespace = expr.skipWhitespace + self.saveAsList = expr.saveAsList + self.callPreparse = expr.callPreparse + self.ignoreExprs.extend(expr.ignoreExprs) + + def parseImpl( self, instring, loc, doActions=True ): + if self.expr is not None: + return self.expr._parse( instring, loc, doActions, callPreParse=False ) + else: + raise ParseException("",loc,self.errmsg,self) + + def leaveWhitespace( self ): + self.skipWhitespace = False + self.expr = self.expr.copy() + if self.expr is not None: + self.expr.leaveWhitespace() + return self + + def ignore( self, other ): + if isinstance( other, Suppress ): + if other not in self.ignoreExprs: + super( ParseElementEnhance, self).ignore( other ) + if self.expr is not None: + self.expr.ignore( self.ignoreExprs[-1] ) + else: + super( ParseElementEnhance, self).ignore( other ) + if self.expr is not None: + self.expr.ignore( self.ignoreExprs[-1] ) + return self + + def streamline( self ): + super(ParseElementEnhance,self).streamline() + if self.expr is not None: + self.expr.streamline() + return self + + def checkRecursion( self, parseElementList ): + if self in parseElementList: + raise RecursiveGrammarException( parseElementList+[self] ) + subRecCheckList = parseElementList[:] + [ self ] + if self.expr is not None: + self.expr.checkRecursion( subRecCheckList ) + + def validate( self, validateTrace=[] ): + tmp = validateTrace[:]+[self] + if self.expr is not None: + self.expr.validate(tmp) + self.checkRecursion( [] ) + + def __str__( self ): + try: + return super(ParseElementEnhance,self).__str__() + except: + pass + + if self.strRepr is None and self.expr is not None: + self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) + return self.strRepr + + +class FollowedBy(ParseElementEnhance): + """Lookahead matching of the given parse expression. C{FollowedBy} + does *not* advance the parsing position within the input string, it only + verifies that the specified parse expression matches at the current + position. C{FollowedBy} always returns a null token list.""" + def __init__( self, expr ): + super(FollowedBy,self).__init__(expr) + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + self.expr.tryParse( instring, loc ) + return loc, [] + + +class NotAny(ParseElementEnhance): + """Lookahead to disallow matching with the given parse expression. C{NotAny} + does *not* advance the parsing position within the input string, it only + verifies that the specified parse expression does *not* match at the current + position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} + always returns a null token list. May be constructed using the '~' operator.""" + def __init__( self, expr ): + super(NotAny,self).__init__(expr) + #~ self.leaveWhitespace() + self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs + self.mayReturnEmpty = True + self.errmsg = "Found unwanted token, "+_ustr(self.expr) + + def parseImpl( self, instring, loc, doActions=True ): + try: + self.expr.tryParse( instring, loc ) + except (ParseException,IndexError): + pass + else: + #~ raise ParseException(instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + return loc, [] + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "~{" + _ustr(self.expr) + "}" + + return self.strRepr + + +class ZeroOrMore(ParseElementEnhance): + """Optional repetition of zero or more of the given expression.""" + def __init__( self, expr ): + super(ZeroOrMore,self).__init__(expr) + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + tokens = [] + try: + loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) + hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) + while 1: + if hasIgnoreExprs: + preloc = self._skipIgnorables( instring, loc ) + else: + preloc = loc + loc, tmptokens = self.expr._parse( instring, preloc, doActions ) + if tmptokens or tmptokens.keys(): + tokens += tmptokens + except (ParseException,IndexError): + pass + + return loc, tokens + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "[" + _ustr(self.expr) + "]..." + + return self.strRepr + + def setResultsName( self, name, listAllMatches=False ): + ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) + ret.saveAsList = True + return ret + + +class OneOrMore(ParseElementEnhance): + """Repetition of one or more of the given expression.""" + def parseImpl( self, instring, loc, doActions=True ): + # must be at least one + loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) + try: + hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) + while 1: + if hasIgnoreExprs: + preloc = self._skipIgnorables( instring, loc ) + else: + preloc = loc + loc, tmptokens = self.expr._parse( instring, preloc, doActions ) + if tmptokens or tmptokens.keys(): + tokens += tmptokens + except (ParseException,IndexError): + pass + + return loc, tokens + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + _ustr(self.expr) + "}..." + + return self.strRepr + + def setResultsName( self, name, listAllMatches=False ): + ret = super(OneOrMore,self).setResultsName(name,listAllMatches) + ret.saveAsList = True + return ret + +class _NullToken(object): + def __bool__(self): + return False + __nonzero__ = __bool__ + def __str__(self): + return "" + +_optionalNotMatched = _NullToken() +class Optional(ParseElementEnhance): + """Optional matching of the given expression. + A default return string can also be specified, if the optional expression + is not found. + """ + def __init__( self, exprs, default=_optionalNotMatched ): + super(Optional,self).__init__( exprs, savelist=False ) + self.defaultValue = default + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + try: + loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) + except (ParseException,IndexError): + if self.defaultValue is not _optionalNotMatched: + if self.expr.resultsName: + tokens = ParseResults([ self.defaultValue ]) + tokens[self.expr.resultsName] = self.defaultValue + else: + tokens = [ self.defaultValue ] + else: + tokens = [] + return loc, tokens + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "[" + _ustr(self.expr) + "]" + + return self.strRepr + + +class SkipTo(ParseElementEnhance): + """Token for skipping over all undefined text until the matched expression is found. + If C{include} is set to true, the matched expression is also parsed (the skipped text + and matched expression are returned as a 2-element list). The C{ignore} + argument is used to define grammars (typically quoted strings and comments) that + might contain false matches. + """ + def __init__( self, other, include=False, ignore=None, failOn=None ): + super( SkipTo, self ).__init__( other ) + self.ignoreExpr = ignore + self.mayReturnEmpty = True + self.mayIndexError = False + self.includeMatch = include + self.asList = False + if failOn is not None and isinstance(failOn, basestring): + self.failOn = Literal(failOn) + else: + self.failOn = failOn + self.errmsg = "No match found for "+_ustr(self.expr) + + def parseImpl( self, instring, loc, doActions=True ): + startLoc = loc + instrlen = len(instring) + expr = self.expr + failParse = False + while loc <= instrlen: + try: + if self.failOn: + try: + self.failOn.tryParse(instring, loc) + except ParseBaseException: + pass + else: + failParse = True + raise ParseException(instring, loc, "Found expression " + str(self.failOn)) + failParse = False + if self.ignoreExpr is not None: + while 1: + try: + loc = self.ignoreExpr.tryParse(instring,loc) + # print "found ignoreExpr, advance to", loc + except ParseBaseException: + break + expr._parse( instring, loc, doActions=False, callPreParse=False ) + skipText = instring[startLoc:loc] + if self.includeMatch: + loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) + if mat: + skipRes = ParseResults( skipText ) + skipRes += mat + return loc, [ skipRes ] + else: + return loc, [ skipText ] + else: + return loc, [ skipText ] + except (ParseException,IndexError): + if failParse: + raise + else: + loc += 1 + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class Forward(ParseElementEnhance): + """Forward declaration of an expression to be defined later - + used for recursive grammars, such as algebraic infix notation. + When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. + + Note: take care when assigning to C{Forward} not to overlook precedence of operators. + Specifically, '|' has a lower precedence than '<<', so that:: + fwdExpr << a | b | c + will actually be evaluated as:: + (fwdExpr << a) | b | c + thereby leaving b and c out as parseable alternatives. It is recommended that you + explicitly group the values inserted into the C{Forward}:: + fwdExpr << (a | b | c) + """ + def __init__( self, other=None ): + super(Forward,self).__init__( other, savelist=False ) + + def __lshift__( self, other ): + if isinstance( other, basestring ): + other = Literal(other) + self.expr = other + self.mayReturnEmpty = other.mayReturnEmpty + self.strRepr = None + self.mayIndexError = self.expr.mayIndexError + self.mayReturnEmpty = self.expr.mayReturnEmpty + self.setWhitespaceChars( self.expr.whiteChars ) + self.skipWhitespace = self.expr.skipWhitespace + self.saveAsList = self.expr.saveAsList + self.ignoreExprs.extend(self.expr.ignoreExprs) + return None + + def leaveWhitespace( self ): + self.skipWhitespace = False + return self + + def streamline( self ): + if not self.streamlined: + self.streamlined = True + if self.expr is not None: + self.expr.streamline() + return self + + def validate( self, validateTrace=[] ): + if self not in validateTrace: + tmp = validateTrace[:]+[self] + if self.expr is not None: + self.expr.validate(tmp) + self.checkRecursion([]) + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + self._revertClass = self.__class__ + self.__class__ = _ForwardNoRecurse + try: + if self.expr is not None: + retString = _ustr(self.expr) + else: + retString = "None" + finally: + self.__class__ = self._revertClass + return self.__class__.__name__ + ": " + retString + + def copy(self): + if self.expr is not None: + return super(Forward,self).copy() + else: + ret = Forward() + ret << self + return ret + +class _ForwardNoRecurse(Forward): + def __str__( self ): + return "..." + +class TokenConverter(ParseElementEnhance): + """Abstract subclass of C{ParseExpression}, for converting parsed results.""" + def __init__( self, expr, savelist=False ): + super(TokenConverter,self).__init__( expr )#, savelist ) + self.saveAsList = False + +class Upcase(TokenConverter): + """Converter to upper case all matching tokens.""" + def __init__(self, *args): + super(Upcase,self).__init__(*args) + warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", + DeprecationWarning,stacklevel=2) + + def postParse( self, instring, loc, tokenlist ): + return list(map( string.upper, tokenlist )) + + +class Combine(TokenConverter): + """Converter to concatenate all matching tokens to a single string. + By default, the matching patterns must also be contiguous in the input string; + this can be disabled by specifying C{'adjacent=False'} in the constructor. + """ + def __init__( self, expr, joinString="", adjacent=True ): + super(Combine,self).__init__( expr ) + # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself + if adjacent: + self.leaveWhitespace() + self.adjacent = adjacent + self.skipWhitespace = True + self.joinString = joinString + self.callPreparse = True + + def ignore( self, other ): + if self.adjacent: + ParserElement.ignore(self, other) + else: + super( Combine, self).ignore( other ) + return self + + def postParse( self, instring, loc, tokenlist ): + retToks = tokenlist.copy() + del retToks[:] + retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) + + if self.resultsName and len(retToks.keys())>0: + return [ retToks ] + else: + return retToks + +class Group(TokenConverter): + """Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions.""" + def __init__( self, expr ): + super(Group,self).__init__( expr ) + self.saveAsList = True + + def postParse( self, instring, loc, tokenlist ): + return [ tokenlist ] + +class Dict(TokenConverter): + """Converter to return a repetitive expression as a list, but also as a dictionary. + Each element can also be referenced using the first token in the expression as its key. + Useful for tabular report scraping when the first column can be used as a item key. + """ + def __init__( self, exprs ): + super(Dict,self).__init__( exprs ) + self.saveAsList = True + + def postParse( self, instring, loc, tokenlist ): + for i,tok in enumerate(tokenlist): + if len(tok) == 0: + continue + ikey = tok[0] + if isinstance(ikey,int): + ikey = _ustr(tok[0]).strip() + if len(tok)==1: + tokenlist[ikey] = _ParseResultsWithOffset("",i) + elif len(tok)==2 and not isinstance(tok[1],ParseResults): + tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) + else: + dictvalue = tok.copy() #ParseResults(i) + del dictvalue[0] + if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) + else: + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) + + if self.resultsName: + return [ tokenlist ] + else: + return tokenlist + + +class Suppress(TokenConverter): + """Converter for ignoring the results of a parsed expression.""" + def postParse( self, instring, loc, tokenlist ): + return [] + + def suppress( self ): + return self + + +class OnlyOnce(object): + """Wrapper for parse actions, to ensure they are only called once.""" + def __init__(self, methodCall): + self.callable = _trim_arity(methodCall) + self.called = False + def __call__(self,s,l,t): + if not self.called: + results = self.callable(s,l,t) + self.called = True + return results + raise ParseException(s,l,"") + def reset(self): + self.called = False + +def traceParseAction(f): + """Decorator for debugging parse actions.""" + f = _trim_arity(f) + def z(*paArgs): + thisFunc = f.func_name + s,l,t = paArgs[-3:] + if len(paArgs)>3: + thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc + sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) + try: + ret = f(*paArgs) + except Exception: + exc = sys.exc_info()[1] + sys.stderr.write( "<", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) + try: + if len(symbols)==len("".join(symbols)): + return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) + else: + return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) + except: + warnings.warn("Exception creating Regex for oneOf, building MatchFirst", + SyntaxWarning, stacklevel=2) + + + # last resort, just use MatchFirst + return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) + +def dictOf( key, value ): + """Helper to easily and clearly define a dictionary by specifying the respective patterns + for the key and value. Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens + in the proper order. The key pattern can include delimiting markers or punctuation, + as long as they are suppressed, thereby leaving the significant key text. The value + pattern can include named results, so that the C{Dict} results can include named token + fields. + """ + return Dict( ZeroOrMore( Group ( key + value ) ) ) + +def originalTextFor(expr, asString=True): + """Helper to return the original, untokenized text for a given expression. Useful to + restore the parsed fields of an HTML start tag into the raw tag text itself, or to + revert separate tokens with intervening whitespace back to the original matching + input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not + require the inspect module to chase up the call stack. By default, returns a + string containing the original parsed text. + + If the optional C{asString} argument is passed as C{False}, then the return value is a + C{ParseResults} containing any results names that were originally matched, and a + single token containing the original matched text from the input string. So if + the expression passed to C{L{originalTextFor}} contains expressions with defined + results names, you must set C{asString} to C{False} if you want to preserve those + results name values.""" + locMarker = Empty().setParseAction(lambda s,loc,t: loc) + endlocMarker = locMarker.copy() + endlocMarker.callPreparse = False + matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") + if asString: + extractText = lambda s,l,t: s[t._original_start:t._original_end] + else: + def extractText(s,l,t): + del t[:] + t.insert(0, s[t._original_start:t._original_end]) + del t["_original_start"] + del t["_original_end"] + matchExpr.setParseAction(extractText) + return matchExpr + +def ungroup(expr): + """Helper to undo pyparsing's default grouping of And expressions, even + if all but one are non-empty.""" + return TokenConverter(expr).setParseAction(lambda t:t[0]) + +# convenience constants for positional expressions +empty = Empty().setName("empty") +lineStart = LineStart().setName("lineStart") +lineEnd = LineEnd().setName("lineEnd") +stringStart = StringStart().setName("stringStart") +stringEnd = StringEnd().setName("stringEnd") + +_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) +_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) +_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16))) +_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) +_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) +_charRange = Group(_singleChar + Suppress("-") + _singleChar) +_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" + +_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) + +def srange(s): + r"""Helper to easily define string ranges for use in Word construction. Borrows + syntax from regexp '[]' string range definitions:: + srange("[0-9]") -> "0123456789" + srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" + srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" + The input string must be enclosed in []'s, and the returned string is the expanded + character set joined into a single string. + The values enclosed in the []'s may be:: + a single character + an escaped character with a leading backslash (such as \- or \]) + an escaped hex character with a leading '\x' (\x21, which is a '!' character) + (\0x## is also supported for backwards compatibility) + an escaped octal character with a leading '\0' (\041, which is a '!' character) + a range of any of the above, separated by a dash ('a-z', etc.) + any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) + """ + try: + return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) + except: + return "" + +def matchOnlyAtCol(n): + """Helper method for defining parse actions that require matching at a specific + column in the input text. + """ + def verifyCol(strg,locn,toks): + if col(locn,strg) != n: + raise ParseException(strg,locn,"matched token not at column %d" % n) + return verifyCol + +def replaceWith(replStr): + """Helper method for common parse actions that simply return a literal value. Especially + useful when used with C{transformString()}. + """ + def _replFunc(*args): + return [replStr] + return _replFunc + +def removeQuotes(s,l,t): + """Helper parse action for removing quotation marks from parsed quoted strings. + To use, add this parse action to quoted string using:: + quotedString.setParseAction( removeQuotes ) + """ + return t[0][1:-1] + +def upcaseTokens(s,l,t): + """Helper parse action to convert tokens to upper case.""" + return [ tt.upper() for tt in map(_ustr,t) ] + +def downcaseTokens(s,l,t): + """Helper parse action to convert tokens to lower case.""" + return [ tt.lower() for tt in map(_ustr,t) ] + +def keepOriginalText(s,startLoc,t): + """DEPRECATED - use new helper method C{originalTextFor}. + Helper parse action to preserve original parsed text, + overriding any nested parse actions.""" + try: + endloc = getTokensEndLoc() + except ParseException: + raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") + del t[:] + t += ParseResults(s[startLoc:endloc]) + return t + +def getTokensEndLoc(): + """Method to be called from within a parse action to determine the end + location of the parsed tokens.""" + import inspect + fstack = inspect.stack() + try: + # search up the stack (through intervening argument normalizers) for correct calling routine + for f in fstack[2:]: + if f[3] == "_parseNoCache": + endloc = f[0].f_locals["loc"] + return endloc + else: + raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") + finally: + del fstack + +def _makeTags(tagStr, xml): + """Internal helper to construct opening and closing tag expressions, given a tag name""" + if isinstance(tagStr,basestring): + resname = tagStr + tagStr = Keyword(tagStr, caseless=not xml) + else: + resname = tagStr.name + + tagAttrName = Word(alphas,alphanums+"_-:") + if (xml): + tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) + openTag = Suppress("<") + tagStr("tag") + \ + Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ + Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") + else: + printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) + tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) + openTag = Suppress("<") + tagStr("tag") + \ + Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ + Optional( Suppress("=") + tagAttrValue ) ))) + \ + Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") + closeTag = Combine(_L("") + + openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) + closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("" % tagStr) + openTag.tag = resname + closeTag.tag = resname + return openTag, closeTag + +def makeHTMLTags(tagStr): + """Helper to construct opening and closing tag expressions for HTML, given a tag name""" + return _makeTags( tagStr, False ) + +def makeXMLTags(tagStr): + """Helper to construct opening and closing tag expressions for XML, given a tag name""" + return _makeTags( tagStr, True ) + +def withAttribute(*args,**attrDict): + """Helper to create a validating parse action to be used with start tags created + with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag + with a required attribute value, to avoid false matches on common tags such as + C{} or C{
}. + + Call C{withAttribute} with a series of attribute names and values. Specify the list + of filter attributes names and values as: + - keyword arguments, as in C{(align="right")}, or + - as an explicit dict with C{**} operator, when an attribute name is also a Python + reserved word, as in C{**{"class":"Customer", "align":"right"}} + - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) + For attribute names with a namespace prefix, you must use the second form. Attribute + names are matched insensitive to upper/lower case. + + To verify that the attribute exists, but without specifying a value, pass + C{withAttribute.ANY_VALUE} as the value. + """ + if args: + attrs = args[:] + else: + attrs = attrDict.items() + attrs = [(k,v) for k,v in attrs] + def pa(s,l,tokens): + for attrName,attrValue in attrs: + if attrName not in tokens: + raise ParseException(s,l,"no matching attribute " + attrName) + if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: + raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % + (attrName, tokens[attrName], attrValue)) + return pa +withAttribute.ANY_VALUE = object() + +opAssoc = _Constants() +opAssoc.LEFT = object() +opAssoc.RIGHT = object() + +def operatorPrecedence( baseExpr, opList ): + """Helper method for constructing grammars of expressions made up of + operators working in a precedence hierarchy. Operators may be unary or + binary, left- or right-associative. Parse actions can also be attached + to operator expressions. + + Parameters: + - baseExpr - expression representing the most basic element for the nested + - opList - list of tuples, one for each operator precedence level in the + expression grammar; each tuple is of the form + (opExpr, numTerms, rightLeftAssoc, parseAction), where: + - opExpr is the pyparsing expression for the operator; + may also be a string, which will be converted to a Literal; + if numTerms is 3, opExpr is a tuple of two expressions, for the + two operators separating the 3 terms + - numTerms is the number of terms for this operator (must + be 1, 2, or 3) + - rightLeftAssoc is the indicator whether the operator is + right or left associative, using the pyparsing-defined + constants opAssoc.RIGHT and opAssoc.LEFT. + - parseAction is the parse action to be associated with + expressions matching this operator expression (the + parse action tuple member may be omitted) + """ + ret = Forward() + lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) + for i,operDef in enumerate(opList): + opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] + if arity == 3: + if opExpr is None or len(opExpr) != 2: + raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") + opExpr1, opExpr2 = opExpr + thisExpr = Forward()#.setName("expr%d" % i) + if rightLeftAssoc == opAssoc.LEFT: + if arity == 1: + matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) + elif arity == 2: + if opExpr is not None: + matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) + else: + matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) + elif arity == 3: + matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ + Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) + else: + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + elif rightLeftAssoc == opAssoc.RIGHT: + if arity == 1: + # try to avoid LR with this extra test + if not isinstance(opExpr, Optional): + opExpr = Optional(opExpr) + matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) + elif arity == 2: + if opExpr is not None: + matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) + else: + matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) + elif arity == 3: + matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ + Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) + else: + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + else: + raise ValueError("operator must indicate right or left associativity") + if pa: + matchExpr.setParseAction( pa ) + thisExpr << ( matchExpr | lastExpr ) + lastExpr = thisExpr + ret << lastExpr + return ret + +dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") +sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") +quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") +unicodeString = Combine(_L('u') + quotedString.copy()) + +def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): + """Helper method for defining nested lists enclosed in opening and closing + delimiters ("(" and ")" are the default). + + Parameters: + - opener - opening character for a nested list (default="("); can also be a pyparsing expression + - closer - closing character for a nested list (default=")"); can also be a pyparsing expression + - content - expression for items within the nested lists (default=None) + - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) + + If an expression is not provided for the content argument, the nested + expression will capture all whitespace-delimited content between delimiters + as a list of separate values. + + Use the C{ignoreExpr} argument to define expressions that may contain + opening or closing characters that should not be treated as opening + or closing characters for nesting, such as quotedString or a comment + expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. + The default is L{quotedString}, but if no expressions are to be ignored, + then pass C{None} for this argument. + """ + if opener == closer: + raise ValueError("opening and closing strings cannot be the same") + if content is None: + if isinstance(opener,basestring) and isinstance(closer,basestring): + if len(opener) == 1 and len(closer)==1: + if ignoreExpr is not None: + content = (Combine(OneOrMore(~ignoreExpr + + CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS + ).setParseAction(lambda t:t[0].strip())) + else: + if ignoreExpr is not None: + content = (Combine(OneOrMore(~ignoreExpr + + ~Literal(opener) + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + raise ValueError("opening and closing arguments must be strings if no content expression is given") + ret = Forward() + if ignoreExpr is not None: + ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) + else: + ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) + return ret + +def indentedBlock(blockStatementExpr, indentStack, indent=True): + """Helper method for defining space-delimited indentation blocks, such as + those used to define block statements in Python source code. + + Parameters: + - blockStatementExpr - expression defining syntax of statement that + is repeated within the indented block + - indentStack - list created by caller to manage indentation stack + (multiple statementWithIndentedBlock expressions within a single grammar + should share a common indentStack) + - indent - boolean indicating whether block must be indented beyond the + the current level; set to False for block of left-most statements + (default=True) + + A valid block must contain at least one C{blockStatement}. + """ + def checkPeerIndent(s,l,t): + if l >= len(s): return + curCol = col(l,s) + if curCol != indentStack[-1]: + if curCol > indentStack[-1]: + raise ParseFatalException(s,l,"illegal nesting") + raise ParseException(s,l,"not a peer entry") + + def checkSubIndent(s,l,t): + curCol = col(l,s) + if curCol > indentStack[-1]: + indentStack.append( curCol ) + else: + raise ParseException(s,l,"not a subentry") + + def checkUnindent(s,l,t): + if l >= len(s): return + curCol = col(l,s) + if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): + raise ParseException(s,l,"not an unindent") + indentStack.pop() + + NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) + INDENT = Empty() + Empty().setParseAction(checkSubIndent) + PEER = Empty().setParseAction(checkPeerIndent) + UNDENT = Empty().setParseAction(checkUnindent) + if indent: + smExpr = Group( Optional(NL) + + #~ FollowedBy(blockStatementExpr) + + INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) + else: + smExpr = Group( Optional(NL) + + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) + blockStatementExpr.ignore(_bslash + LineEnd()) + return smExpr + +alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") +punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") + +anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) +commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() +_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) +replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None + +# it's easy to get these comment structures wrong - they're very common, so may as well make them available +cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") + +htmlComment = Regex(r"") +restOfLine = Regex(r".*").leaveWhitespace() +dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") +cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?" + str(tokenlist)) + print ("tokens = " + str(tokens)) + print ("tokens.columns = " + str(tokens.columns)) + print ("tokens.tables = " + str(tokens.tables)) + print (tokens.asXML("SQL",True)) + except ParseBaseException: + err = sys.exc_info()[1] + print (teststring + "->") + print (err.line) + print (" "*(err.column-1) + "^") + print (err) + print() + + selectToken = CaselessLiteral( "select" ) + fromToken = CaselessLiteral( "from" ) + + ident = Word( alphas, alphanums + "_$" ) + columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) + columnNameList = Group( delimitedList( columnName ) )#.setName("columns") + tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) + tableNameList = Group( delimitedList( tableName ) )#.setName("tables") + simpleSQL = ( selectToken + \ + ( '*' | columnNameList ).setResultsName( "columns" ) + \ + fromToken + \ + tableNameList.setResultsName( "tables" ) ) + + test( "SELECT * from XYZZY, ABC" ) + test( "select * from SYS.XYZZY" ) + test( "Select A from Sys.dual" ) + test( "Select AA,BB,CC from Sys.dual" ) + test( "Select A, B, C from Sys.dual" ) + test( "Select A, B, C from Sys.dual" ) + test( "Xelect A, B, C from Sys.dual" ) + test( "Select A, B, C frox Sys.dual" ) + test( "Select" ) + test( "Select ^^^ frox Sys.dual" ) + test( "Select A, B, C from Sys.dual, Table2 " ) diff --git a/utest/test_parser.py b/utest/test_parser.py index 33ba5454e..89dae10e1 100755 --- a/utest/test_parser.py +++ b/utest/test_parser.py @@ -146,7 +146,7 @@ def test_comments(self): // comment 1 struct foo { //comment 2 // comment 3 - uint32_t a; //comment 5 + uint32_t a; //comment 5 // comment 6 }; // comment 4