mirror of https://github.com/ecmwf/eccodes.git
Fixed an issue parsing binary operators that include a macro
This commit is contained in:
parent
9944b8da24
commit
45ed80e49c
|
@ -0,0 +1,65 @@
|
|||
# Start the line with # to exclude the file
|
||||
grib_accessor_class_abstract_long_vector.cc
|
||||
grib_accessor_class_ascii.cc
|
||||
grib_accessor_class_bits.cc
|
||||
grib_accessor_class_bytes.cc
|
||||
grib_accessor_class_double.cc
|
||||
grib_accessor_class_long.cc
|
||||
grib_accessor_class_proj_string.cc
|
||||
grib_accessor_class_blob.cc
|
||||
# [SLOW FAILS] grib_accessor_class_bufr_data_array.cc
|
||||
grib_accessor_class_bufr_data_element.cc
|
||||
grib_accessor_class_bufr_elements_table.cc
|
||||
grib_accessor_class_bufr_extract_area_subsets.cc
|
||||
grib_accessor_class_bufr_extract_datetime_subsets.cc
|
||||
grib_accessor_class_bufr_extract_subsets.cc
|
||||
grib_accessor_class_bufr_simple_thinning.cc
|
||||
grib_accessor_class_change_alternative_row_scanning.cc
|
||||
grib_accessor_class_change_scanning_direction.cc
|
||||
grib_accessor_class_codetable_title.cc
|
||||
grib_accessor_class_codetable_units.cc
|
||||
grib_accessor_class_concept.cc
|
||||
grib_accessor_class_data_apply_bitmap.cc
|
||||
grib_accessor_class_data_apply_boustrophedonic.cc
|
||||
grib_accessor_class_data_apply_boustrophedonic_bitmap.cc
|
||||
grib_accessor_class_data_secondary_bitmap.cc
|
||||
grib_accessor_class_data_shsimple_packing.cc
|
||||
grib_accessor_class_dictionary.cc
|
||||
grib_accessor_class_g1_half_byte_codeflag.cc
|
||||
grib_accessor_class_g2_mars_labeling.cc
|
||||
grib_accessor_class_g2step_range.cc
|
||||
grib_accessor_class_gaussian_grid_name.cc
|
||||
grib_accessor_class_gds_not_present_bitmap.cc
|
||||
grib_accessor_class_group.cc
|
||||
grib_accessor_class_hash_array.cc
|
||||
grib_accessor_class_headers_only.cc
|
||||
grib_accessor_class_ifs_param.cc
|
||||
grib_accessor_class_iterator.cc
|
||||
grib_accessor_class_label.cc
|
||||
grib_accessor_class_md5.cc
|
||||
grib_accessor_class_message_copy.cc
|
||||
grib_accessor_class_nearest.cc
|
||||
grib_accessor_class_non_alpha.cc
|
||||
grib_accessor_class_number_of_values_data_raw_packing.cc
|
||||
grib_accessor_class_pack_bufr_values.cc
|
||||
grib_accessor_class_packing_type.cc
|
||||
grib_accessor_class_position.cc
|
||||
grib_accessor_class_raw.cc
|
||||
grib_accessor_class_section.cc
|
||||
grib_accessor_class_section_pointer.cc
|
||||
grib_accessor_class_smart_table_column.cc
|
||||
grib_accessor_class_step_human_readable.cc
|
||||
grib_accessor_class_to_double.cc
|
||||
grib_accessor_class_to_integer.cc
|
||||
grib_accessor_class_to_string.cc
|
||||
grib_accessor_class_transient_darray.cc
|
||||
grib_accessor_class_uint16.cc
|
||||
grib_accessor_class_uint32.cc
|
||||
grib_accessor_class_uint32_little_endian.cc
|
||||
grib_accessor_class_uint64.cc
|
||||
grib_accessor_class_uint64_little_endian.cc
|
||||
grib_accessor_class_uint8.cc
|
||||
grib_accessor_class_unpack_bufr_values.cc
|
||||
grib_accessor_class_values.cc
|
||||
grib_accessor_class_variable.cc
|
||||
grib_accessor_class_when.cc
|
|
@ -15,7 +15,7 @@ grib_accessor_class_double.cc
|
|||
grib_accessor_class_long.cc
|
||||
grib_accessor_class_proj_string.cc
|
||||
grib_accessor_class_blob.cc
|
||||
# [SLOW] grib_accessor_class_bufr_data_array.cc
|
||||
# [SLOW FAILS] grib_accessor_class_bufr_data_array.cc
|
||||
grib_accessor_class_bufr_data_element.cc
|
||||
grib_accessor_class_bufr_elements_table.cc
|
||||
grib_accessor_class_bufr_extract_area_subsets.cc
|
||||
|
@ -91,7 +91,7 @@ grib_accessor_class_count_total.cc
|
|||
grib_accessor_class_data_ccsds_packing.cc
|
||||
grib_accessor_class_data_g1secondary_bitmap.cc
|
||||
grib_accessor_class_data_g1shsimple_packing.cc
|
||||
# [SLOW] grib_accessor_class_data_g22order_packing.cc
|
||||
# [SLOW CONVERTS] grib_accessor_class_data_g22order_packing.cc
|
||||
grib_accessor_class_data_g2secondary_bitmap.cc
|
||||
grib_accessor_class_data_g2shsimple_packing.cc
|
||||
grib_accessor_class_data_png_packing.cc
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
|
||||
import utils.debug as debug
|
||||
import ast_object.ast_macro_details as ast_macro_details
|
||||
import os
|
||||
import ast_object.ast_utils as ast_utils
|
||||
|
||||
# Represents a coherent unit of code that needs to be parsed together: usually a single .cc file
|
||||
#
|
||||
|
@ -35,7 +37,9 @@ class AstCode:
|
|||
return self._macro_details
|
||||
|
||||
def add_macro_definition(self, def_node):
|
||||
debug.line("add_macro_definition", f"Adding MACRO DEFN spelling=[{def_node.spelling}] loc=[{os.path.basename(def_node.location.file.name)}] extent={ast_utils.node_extent(def_node)}")
|
||||
self._macro_details.add_definition(def_node)
|
||||
|
||||
def add_macro_instantiation(self, inst_node):
|
||||
debug.line("add_macro_instantiation", f"Adding MACRO INST spelling=[{inst_node.spelling}] loc=[{os.path.basename(inst_node.location.file.name)}] extent={ast_utils.node_extent(inst_node)}")
|
||||
self._macro_details.add_instantiation(inst_node)
|
||||
|
|
|
@ -32,13 +32,13 @@ class AstCodeCreator:
|
|||
# ALWAYS appear at the top of the global declaration
|
||||
self._ast_code.add_global_function_entry(node)
|
||||
else:
|
||||
debug.line("parse_node", f"Ignoring [no file info] node spelling=[{node.spelling}] kind=[{node.kind}]")
|
||||
#debug.line("parse_node", f"Ignoring [no file info] node spelling=[{node.spelling}] kind=[{node.kind}]")
|
||||
return
|
||||
elif node.kind == clang.cindex.CursorKind.MACRO_INSTANTIATION:
|
||||
if node.location.file and node.location.file.name == self._cfilepath + self._cfilename:
|
||||
self._ast_code.add_macro_instantiation(node)
|
||||
elif node.location.file and node.location.file.name != self._cfilepath + self._cfilename:
|
||||
debug.line("parse_node", f"Ignoring [non-local] node spelling=[{node.spelling}] file=[{os.path.basename(node.location.file.name)}]")
|
||||
#debug.line("parse_node", f"Ignoring [non-local] node spelling=[{node.spelling}] file=[{os.path.basename(node.location.file.name)}]")
|
||||
return
|
||||
elif node.kind == clang.cindex.CursorKind.INCLUSION_DIRECTIVE:
|
||||
pass
|
||||
|
@ -108,10 +108,4 @@ class AstCodeCreator:
|
|||
|
||||
self.parse_root()
|
||||
|
||||
# Debug - dump macros
|
||||
for node in self._ast_code.macro_details.def_nodes:
|
||||
debug.line("parse", f"MACRO DEFN spelling=[{node.spelling}] loc=[{os.path.basename(node.location.file.name)}]")
|
||||
for node in self._ast_code.macro_details.inst_nodes:
|
||||
debug.line("parse", f"MACRO INST spelling=[{node.spelling}] loc=[{os.path.basename(node.location.file.name)}] extent=[{node.extent.start.line}:{node.extent.start.column} -> {node.extent.end.line}:{node.extent.end.column}]")
|
||||
|
||||
return self._ast_code
|
||||
|
|
|
@ -73,7 +73,7 @@ class AstParser:
|
|||
# Note - Prefer to call this as it handles macro expansions
|
||||
def parse_ast_node(self, node):
|
||||
|
||||
debug.line("parse_ast_node", f"[{node.kind}] spelling=[{node.spelling}] type=[{node.type.spelling}] extent=[{node.extent.start.line}:{node.extent.start.column}]->[{node.extent.end.line}:{node.extent.end.column}]")
|
||||
debug.line("parse_ast_node", f"[{node.kind}] spelling=[{node.spelling}] type=[{node.type.spelling}] extent={ast_utils.node_extent(node)}")
|
||||
|
||||
# Handle macros
|
||||
macro_instantiation_node = self._macro_details.instantiation_node_for(node)
|
||||
|
@ -116,7 +116,7 @@ class AstParser:
|
|||
# =================================== Macros Convert functions [BEGIN] ===================================
|
||||
|
||||
def parse_macro_definition(self, node):
|
||||
debug.line("parse_macro_definition", f"MACRO spelling=[{node.spelling}] kind=[{node.kind}] extent=[{node.extent.start.line}:{node.extent.start.column} -> {node.extent.end.line}:{node.extent.end.column}]")
|
||||
debug.line("parse_macro_definition", f"MACRO spelling=[{node.spelling}] kind=[{node.kind}] extent={ast_utils.node_extent(node)}")
|
||||
tokens = [token.spelling for token in node.get_tokens()]
|
||||
debug.line("parse_macro_definition", f"MACRO tokens=[{tokens}]")
|
||||
tokens_count = len(tokens)
|
||||
|
@ -211,11 +211,11 @@ class AstParser:
|
|||
# macro_node is the original macro code in the C file
|
||||
# expanded_node is the code after the pre-processor has applied the macro expansion
|
||||
def parse_macro_instantiation(self, macro_node, expanded_node):
|
||||
debug.line("parse_macro_instantiation", f"MACRO macro_node spelling=[{macro_node.spelling}] kind=[{macro_node.kind}] extent=[{macro_node.extent.start.line}:{macro_node.extent.start.column} -> {macro_node.extent.end.line}:{macro_node.extent.end.column}]")
|
||||
debug.line("parse_macro_instantiation", f"MACRO macro_node spelling=[{macro_node.spelling}] kind=[{macro_node.kind}] extent={ast_utils.node_extent(expanded_node)}")
|
||||
debug.line("parse_macro_instantiation", f"MACRO macro_node dump:")
|
||||
ast_utils.dump_node(macro_node, 2, "truncate")
|
||||
|
||||
debug.line("parse_macro_instantiation", f"MACRO expanded_node spelling=[{expanded_node.spelling}] kind=[{expanded_node.kind}] extent=[{expanded_node.extent.start.line}:{expanded_node.extent.start.column} -> {expanded_node.extent.end.line}:{expanded_node.extent.end.column}]")
|
||||
debug.line("parse_macro_instantiation", f"MACRO expanded_node spelling=[{expanded_node.spelling}] kind=[{expanded_node.kind}] extent={ast_utils.node_extent(expanded_node)}")
|
||||
debug.line("parse_macro_instantiation", f"MACRO expanded_node dump:")
|
||||
ast_utils.dump_node(expanded_node, 2, "truncate")
|
||||
|
||||
|
@ -253,8 +253,8 @@ class AstParser:
|
|||
# Just iteratively call parse_ast_node
|
||||
def parse_COMPOUND_STMT(self, node):
|
||||
|
||||
debug.line("parse_COMPOUND_STMT", f"Dumping node for MACRO INFO:")
|
||||
ast_utils.dump_node(node, 2)
|
||||
#debug.line("parse_COMPOUND_STMT", f"Dumping node for MACRO INFO:")
|
||||
#ast_utils.dump_node(node, 2)
|
||||
|
||||
stmt_lines = compound_statement.CompoundStatement()
|
||||
|
||||
|
@ -724,42 +724,60 @@ class AstParser:
|
|||
return c_unary_op
|
||||
|
||||
def parse_BINARY_OPERATOR(self, node):
|
||||
|
||||
debug.line("parse_BINARY_OPERATOR", f"DEBUG NODE DUMP:")
|
||||
ast_utils.dump_node(node)
|
||||
#debug.line("parse_BINARY_OPERATOR", f"DEBUG NODE DUMP:")
|
||||
#ast_utils.dump_node(node)
|
||||
|
||||
children = list(node.get_children())
|
||||
assert len(children) == 2, f"Expected exactly two children for binary operator"
|
||||
|
||||
left_operand, right_operand = children
|
||||
node_tokens = list(node.get_tokens())
|
||||
left_operand_tokens = list(left_operand.get_tokens())
|
||||
right_operand_tokens = list(right_operand.get_tokens())
|
||||
|
||||
debug.line("parse_BINARY_OPERATOR", f"BINARY left_operand [{left_operand.kind}] spelling=[{left_operand.spelling}] type=[{left_operand.type.spelling}] extent=[{left_operand.extent.start.line}:{left_operand.extent.start.column}]->[{left_operand.extent.end.line}:{left_operand.extent.end.column}]")
|
||||
debug.line("parse_BINARY_OPERATOR", f"BINARY right_operand [{right_operand.kind}] spelling=[{right_operand.spelling}] type=[{right_operand.type.spelling}] extent=[{right_operand.extent.start.line}:{right_operand.extent.start.column}]->[{right_operand.extent.end.line}:{right_operand.extent.end.column}]")
|
||||
|
||||
# Tokenize and find the operator
|
||||
tokens = [token.spelling for token in node.get_tokens()]
|
||||
left_tokens = [token.spelling for token in left_operand.get_tokens()]
|
||||
right_tokens = [token.spelling for token in right_operand.get_tokens()]
|
||||
|
||||
# Find the operator by excluding operand tokens
|
||||
tokens_count = len(tokens)
|
||||
left_tokens_count = len(left_tokens)
|
||||
operator_token = tokens[left_tokens_count]
|
||||
debug.line("parse_BINARY_OPERATOR", f"Node spelling=[{node.spelling}] tokens=[{[token.spelling for token in node_tokens]}] extent={ast_utils.node_extent(node)}")
|
||||
debug.line("parse_BINARY_OPERATOR", f"left_operand [{left_operand.kind}] spelling=[{left_operand.spelling}] tokens=[{[token.spelling for token in left_operand_tokens]}] type=[{left_operand.type.spelling}] extent={ast_utils.node_extent(left_operand)}")
|
||||
debug.line("parse_BINARY_OPERATOR", f"right_operand [{right_operand.kind}] spelling=[{right_operand.spelling}] tokens=[{[token.spelling for token in right_operand_tokens]}] type=[{right_operand.type.spelling}] extent={ast_utils.node_extent(right_operand)}")
|
||||
|
||||
left_operand_cvalue = self.parse_ast_node(left_operand)
|
||||
|
||||
right_tokens_count = len(right_tokens)
|
||||
if tokens_count != left_tokens_count + right_tokens_count + 1:
|
||||
# The top level tokens don't match the right_operand tokens. This will happen if the top-level
|
||||
# contains a macro definition. We should be able to handle this, so we'll just record the fact here!
|
||||
debug.line("parse_BINARY_OPERATOR", f"Right operand tokens don't match: assuming a macro")
|
||||
right_operand_cvalue = self.parse_ast_node(right_operand)
|
||||
|
||||
debug.line("parse_BINARY_OPERATOR", f"Create c_binary_op: left_operand_cvalue=[{debug.as_debug_string(left_operand_cvalue)}] operator_token=[{debug.as_debug_string(operator_token)}] right_operand_cvalue=[{debug.as_debug_string(right_operand_cvalue)}]")
|
||||
|
||||
if not right_operand_cvalue:
|
||||
return literal.Literal(f"// [Ignoring C Code] {' '.join([token.spelling for token in node.get_tokens()])}")
|
||||
return literal.Literal(f"// [Ignoring C Code] {' '.join([token.spelling for token in node_tokens])}")
|
||||
|
||||
c_binary_op = binary_operation.BinaryOperation(left_operand_cvalue, operator_token, right_operand_cvalue)
|
||||
debug.line("parse_BINARY_OPERATOR", f"left_operand_cvalue=[{debug.as_debug_string(left_operand_cvalue)}]")
|
||||
debug.line("parse_BINARY_OPERATOR", f"right_operand_cvalue=[{debug.as_debug_string(right_operand_cvalue)}]")
|
||||
|
||||
# Get operator
|
||||
operator_token = None
|
||||
|
||||
# Step 1: See if we have child node tokens
|
||||
node_tokens_count = len(node_tokens)
|
||||
left_tokens_count = len(left_operand_tokens)
|
||||
right_tokens_count = len(right_operand_tokens)
|
||||
|
||||
if node_tokens_count > 0 and left_tokens_count > 0:
|
||||
if node_tokens_count >= left_tokens_count + right_tokens_count + 1:
|
||||
operator_token = node_tokens[left_tokens_count]
|
||||
|
||||
debug.line("parse_BINARY_OPERATOR", f"[Step 1] [child tokens] node_tokens_count=[{node_tokens_count}] left_tokens_count=[{left_tokens_count}] right_tokens_count=[{right_tokens_count}]")
|
||||
debug.line("parse_BINARY_OPERATOR", f"[Step 1] [child tokens] operator_token=[{operator_token.spelling if operator_token else None}]")
|
||||
|
||||
if not operator_token:
|
||||
# Step 2: Deduce it from the node tokens
|
||||
operator_extent = clang.cindex.SourceRange.from_locations(left_operand.extent.end, right_operand.extent.start)
|
||||
debug.line("parse_BINARY_OPERATOR", f"operator_extent=[{ast_utils.source_range_string(operator_extent)}]")
|
||||
operator_token = ast_utils.find_token_from_extent(node_tokens, operator_extent)
|
||||
|
||||
debug.line("parse_BINARY_OPERATOR", f"[Step 2] [node_tokens] operator_token=[{operator_token.spelling if operator_token else None}]")
|
||||
|
||||
if not operator_token:
|
||||
# Step 3: Search ALL translation unit tokens (this will be slow for large C files - may need to optimise)
|
||||
operator_token = ast_utils.find_token_from_extent(node.translation_unit.cursor.get_tokens(), operator_extent)
|
||||
|
||||
debug.line("parse_BINARY_OPERATOR", f"[Step 3] [ALL tokens] operator_token=[{operator_token.spelling if operator_token else None}]")
|
||||
assert operator_token
|
||||
|
||||
c_binary_op = binary_operation.BinaryOperation(left_operand_cvalue, operator_token.spelling, right_operand_cvalue)
|
||||
return c_binary_op
|
||||
|
||||
def parse_COMPOUND_ASSIGNMENT_OPERATOR(self, node):
|
||||
|
|
|
@ -13,6 +13,17 @@ import code_object.array_access as array_access
|
|||
|
||||
# Utilities for working with C AST Nodes
|
||||
|
||||
# Return a string representation e.g. [117:10->117:45]
|
||||
|
||||
def node_extent(node):
|
||||
return f"[{node.extent.start.line}:{node.extent.start.column}->{node.extent.end.line}:{node.extent.end.column}]"
|
||||
|
||||
def token_extent(node):
|
||||
return f"[{node.extent.start.line}:{node.extent.start.column}->{node.extent.end.line}:{node.extent.end.column}]"
|
||||
|
||||
def source_range_string(src_range):
|
||||
return f"[{src_range.start.line}:{src_range.start.column}->{src_range.end.line}:{src_range.end.column}]"
|
||||
|
||||
# tokens string can be:
|
||||
# "flat" to show a flat summary
|
||||
# "list" to show a detailed list
|
||||
|
@ -20,12 +31,12 @@ import code_object.array_access as array_access
|
|||
# "" to not show tokens
|
||||
def dump_node(cnode, depth=0, tokens="truncate"):
|
||||
truncate_depth = 10
|
||||
debug.line("dump_node", f"{' ' * depth}[{depth}:{cnode.kind}] spelling=[{cnode.spelling}] type=[{cnode.type.spelling}] extent=[{cnode.extent.start.line}:{cnode.extent.start.column}]->[{cnode.extent.end.line}:{cnode.extent.end.column}]")
|
||||
debug.line("dump_node", f"{' ' * depth}[{depth}:{cnode.kind}] spelling=[{cnode.spelling}] type=[{cnode.type.spelling}] extent={node_extent(cnode)}")
|
||||
if tokens == "flat":
|
||||
debug.line("dump_node", f"{' ' * depth} -> tokens=[{[token.spelling for token in cnode.get_tokens()]}]")
|
||||
elif tokens == "list":
|
||||
for token in cnode.get_tokens():
|
||||
debug.line("dump_node", f"{' ' * depth} -> token=[{token.spelling}] extent=[{token.extent.start.line}:{token.extent.start.column} -> {token.extent.end.line}:{token.extent.end.column}]")
|
||||
debug.line("dump_node", f"{' ' * depth} -> token=[{token.spelling}] extent={token_extent(token)}")
|
||||
elif tokens == "truncate":
|
||||
token_list = [token.spelling for token in cnode.get_tokens()]
|
||||
debug.line("dump_node", f"{' ' * depth} -> tokens[:{truncate_depth}]=[{token_list[:truncate_depth]}]")
|
||||
|
@ -34,6 +45,16 @@ def dump_node(cnode, depth=0, tokens="truncate"):
|
|||
for child in cnode.get_children():
|
||||
dump_node(child, depth+1, tokens)
|
||||
|
||||
def find_token_from_extent(tokens, extent):
|
||||
for t in tokens:
|
||||
if t.extent.start.line == extent.start.line and \
|
||||
t.extent.end.line == extent.end.line and \
|
||||
t.extent.start.column >= extent.start.column and \
|
||||
t.extent.end.column <= extent.end.column:
|
||||
return t
|
||||
|
||||
return None
|
||||
|
||||
# Create a C FuncSig object from a FUNCTION_DECL node
|
||||
def create_cfuncsig(cnode):
|
||||
if cnode.kind == clang.cindex.CursorKind.FUNCTION_TEMPLATE:
|
||||
|
|
Loading…
Reference in New Issue