Fixed an issue parsing binary operators that include a macro

This commit is contained in:
kevstone 2024-02-16 21:23:55 +00:00
parent 9944b8da24
commit 45ed80e49c
6 changed files with 146 additions and 44 deletions

View File

@ -0,0 +1,65 @@
# Start the line with # to exclude the file
grib_accessor_class_abstract_long_vector.cc
grib_accessor_class_ascii.cc
grib_accessor_class_bits.cc
grib_accessor_class_bytes.cc
grib_accessor_class_double.cc
grib_accessor_class_long.cc
grib_accessor_class_proj_string.cc
grib_accessor_class_blob.cc
# [SLOW FAILS] grib_accessor_class_bufr_data_array.cc
grib_accessor_class_bufr_data_element.cc
grib_accessor_class_bufr_elements_table.cc
grib_accessor_class_bufr_extract_area_subsets.cc
grib_accessor_class_bufr_extract_datetime_subsets.cc
grib_accessor_class_bufr_extract_subsets.cc
grib_accessor_class_bufr_simple_thinning.cc
grib_accessor_class_change_alternative_row_scanning.cc
grib_accessor_class_change_scanning_direction.cc
grib_accessor_class_codetable_title.cc
grib_accessor_class_codetable_units.cc
grib_accessor_class_concept.cc
grib_accessor_class_data_apply_bitmap.cc
grib_accessor_class_data_apply_boustrophedonic.cc
grib_accessor_class_data_apply_boustrophedonic_bitmap.cc
grib_accessor_class_data_secondary_bitmap.cc
grib_accessor_class_data_shsimple_packing.cc
grib_accessor_class_dictionary.cc
grib_accessor_class_g1_half_byte_codeflag.cc
grib_accessor_class_g2_mars_labeling.cc
grib_accessor_class_g2step_range.cc
grib_accessor_class_gaussian_grid_name.cc
grib_accessor_class_gds_not_present_bitmap.cc
grib_accessor_class_group.cc
grib_accessor_class_hash_array.cc
grib_accessor_class_headers_only.cc
grib_accessor_class_ifs_param.cc
grib_accessor_class_iterator.cc
grib_accessor_class_label.cc
grib_accessor_class_md5.cc
grib_accessor_class_message_copy.cc
grib_accessor_class_nearest.cc
grib_accessor_class_non_alpha.cc
grib_accessor_class_number_of_values_data_raw_packing.cc
grib_accessor_class_pack_bufr_values.cc
grib_accessor_class_packing_type.cc
grib_accessor_class_position.cc
grib_accessor_class_raw.cc
grib_accessor_class_section.cc
grib_accessor_class_section_pointer.cc
grib_accessor_class_smart_table_column.cc
grib_accessor_class_step_human_readable.cc
grib_accessor_class_to_double.cc
grib_accessor_class_to_integer.cc
grib_accessor_class_to_string.cc
grib_accessor_class_transient_darray.cc
grib_accessor_class_uint16.cc
grib_accessor_class_uint32.cc
grib_accessor_class_uint32_little_endian.cc
grib_accessor_class_uint64.cc
grib_accessor_class_uint64_little_endian.cc
grib_accessor_class_uint8.cc
grib_accessor_class_unpack_bufr_values.cc
grib_accessor_class_values.cc
grib_accessor_class_variable.cc
grib_accessor_class_when.cc

View File

@ -15,7 +15,7 @@ grib_accessor_class_double.cc
grib_accessor_class_long.cc
grib_accessor_class_proj_string.cc
grib_accessor_class_blob.cc
# [SLOW] grib_accessor_class_bufr_data_array.cc
# [SLOW FAILS] grib_accessor_class_bufr_data_array.cc
grib_accessor_class_bufr_data_element.cc
grib_accessor_class_bufr_elements_table.cc
grib_accessor_class_bufr_extract_area_subsets.cc
@ -91,7 +91,7 @@ grib_accessor_class_count_total.cc
grib_accessor_class_data_ccsds_packing.cc
grib_accessor_class_data_g1secondary_bitmap.cc
grib_accessor_class_data_g1shsimple_packing.cc
# [SLOW] grib_accessor_class_data_g22order_packing.cc
# [SLOW CONVERTS] grib_accessor_class_data_g22order_packing.cc
grib_accessor_class_data_g2secondary_bitmap.cc
grib_accessor_class_data_g2shsimple_packing.cc
grib_accessor_class_data_png_packing.cc

View File

@ -1,6 +1,8 @@
import utils.debug as debug
import ast_object.ast_macro_details as ast_macro_details
import os
import ast_object.ast_utils as ast_utils
# Represents a coherent unit of code that needs to be parsed together: usually a single .cc file
#
@ -35,7 +37,9 @@ class AstCode:
return self._macro_details
def add_macro_definition(self, def_node):
debug.line("add_macro_definition", f"Adding MACRO DEFN spelling=[{def_node.spelling}] loc=[{os.path.basename(def_node.location.file.name)}] extent={ast_utils.node_extent(def_node)}")
self._macro_details.add_definition(def_node)
def add_macro_instantiation(self, inst_node):
debug.line("add_macro_instantiation", f"Adding MACRO INST spelling=[{inst_node.spelling}] loc=[{os.path.basename(inst_node.location.file.name)}] extent={ast_utils.node_extent(inst_node)}")
self._macro_details.add_instantiation(inst_node)

View File

@ -32,13 +32,13 @@ class AstCodeCreator:
# ALWAYS appear at the top of the global declaration
self._ast_code.add_global_function_entry(node)
else:
debug.line("parse_node", f"Ignoring [no file info] node spelling=[{node.spelling}] kind=[{node.kind}]")
#debug.line("parse_node", f"Ignoring [no file info] node spelling=[{node.spelling}] kind=[{node.kind}]")
return
elif node.kind == clang.cindex.CursorKind.MACRO_INSTANTIATION:
if node.location.file and node.location.file.name == self._cfilepath + self._cfilename:
self._ast_code.add_macro_instantiation(node)
elif node.location.file and node.location.file.name != self._cfilepath + self._cfilename:
debug.line("parse_node", f"Ignoring [non-local] node spelling=[{node.spelling}] file=[{os.path.basename(node.location.file.name)}]")
#debug.line("parse_node", f"Ignoring [non-local] node spelling=[{node.spelling}] file=[{os.path.basename(node.location.file.name)}]")
return
elif node.kind == clang.cindex.CursorKind.INCLUSION_DIRECTIVE:
pass
@ -108,10 +108,4 @@ class AstCodeCreator:
self.parse_root()
# Debug - dump macros
for node in self._ast_code.macro_details.def_nodes:
debug.line("parse", f"MACRO DEFN spelling=[{node.spelling}] loc=[{os.path.basename(node.location.file.name)}]")
for node in self._ast_code.macro_details.inst_nodes:
debug.line("parse", f"MACRO INST spelling=[{node.spelling}] loc=[{os.path.basename(node.location.file.name)}] extent=[{node.extent.start.line}:{node.extent.start.column} -> {node.extent.end.line}:{node.extent.end.column}]")
return self._ast_code

View File

@ -73,7 +73,7 @@ class AstParser:
# Note - Prefer to call this as it handles macro expansions
def parse_ast_node(self, node):
debug.line("parse_ast_node", f"[{node.kind}] spelling=[{node.spelling}] type=[{node.type.spelling}] extent=[{node.extent.start.line}:{node.extent.start.column}]->[{node.extent.end.line}:{node.extent.end.column}]")
debug.line("parse_ast_node", f"[{node.kind}] spelling=[{node.spelling}] type=[{node.type.spelling}] extent={ast_utils.node_extent(node)}")
# Handle macros
macro_instantiation_node = self._macro_details.instantiation_node_for(node)
@ -116,7 +116,7 @@ class AstParser:
# =================================== Macros Convert functions [BEGIN] ===================================
def parse_macro_definition(self, node):
debug.line("parse_macro_definition", f"MACRO spelling=[{node.spelling}] kind=[{node.kind}] extent=[{node.extent.start.line}:{node.extent.start.column} -> {node.extent.end.line}:{node.extent.end.column}]")
debug.line("parse_macro_definition", f"MACRO spelling=[{node.spelling}] kind=[{node.kind}] extent={ast_utils.node_extent(node)}")
tokens = [token.spelling for token in node.get_tokens()]
debug.line("parse_macro_definition", f"MACRO tokens=[{tokens}]")
tokens_count = len(tokens)
@ -211,11 +211,11 @@ class AstParser:
# macro_node is the original macro code in the C file
# expanded_node is the code after the pre-processor has applied the macro expansion
def parse_macro_instantiation(self, macro_node, expanded_node):
debug.line("parse_macro_instantiation", f"MACRO macro_node spelling=[{macro_node.spelling}] kind=[{macro_node.kind}] extent=[{macro_node.extent.start.line}:{macro_node.extent.start.column} -> {macro_node.extent.end.line}:{macro_node.extent.end.column}]")
debug.line("parse_macro_instantiation", f"MACRO macro_node spelling=[{macro_node.spelling}] kind=[{macro_node.kind}] extent={ast_utils.node_extent(expanded_node)}")
debug.line("parse_macro_instantiation", f"MACRO macro_node dump:")
ast_utils.dump_node(macro_node, 2, "truncate")
debug.line("parse_macro_instantiation", f"MACRO expanded_node spelling=[{expanded_node.spelling}] kind=[{expanded_node.kind}] extent=[{expanded_node.extent.start.line}:{expanded_node.extent.start.column} -> {expanded_node.extent.end.line}:{expanded_node.extent.end.column}]")
debug.line("parse_macro_instantiation", f"MACRO expanded_node spelling=[{expanded_node.spelling}] kind=[{expanded_node.kind}] extent={ast_utils.node_extent(expanded_node)}")
debug.line("parse_macro_instantiation", f"MACRO expanded_node dump:")
ast_utils.dump_node(expanded_node, 2, "truncate")
@ -253,8 +253,8 @@ class AstParser:
# Just iteratively call parse_ast_node
def parse_COMPOUND_STMT(self, node):
debug.line("parse_COMPOUND_STMT", f"Dumping node for MACRO INFO:")
ast_utils.dump_node(node, 2)
#debug.line("parse_COMPOUND_STMT", f"Dumping node for MACRO INFO:")
#ast_utils.dump_node(node, 2)
stmt_lines = compound_statement.CompoundStatement()
@ -724,42 +724,60 @@ class AstParser:
return c_unary_op
def parse_BINARY_OPERATOR(self, node):
debug.line("parse_BINARY_OPERATOR", f"DEBUG NODE DUMP:")
ast_utils.dump_node(node)
#debug.line("parse_BINARY_OPERATOR", f"DEBUG NODE DUMP:")
#ast_utils.dump_node(node)
children = list(node.get_children())
assert len(children) == 2, f"Expected exactly two children for binary operator"
left_operand, right_operand = children
node_tokens = list(node.get_tokens())
left_operand_tokens = list(left_operand.get_tokens())
right_operand_tokens = list(right_operand.get_tokens())
debug.line("parse_BINARY_OPERATOR", f"BINARY left_operand [{left_operand.kind}] spelling=[{left_operand.spelling}] type=[{left_operand.type.spelling}] extent=[{left_operand.extent.start.line}:{left_operand.extent.start.column}]->[{left_operand.extent.end.line}:{left_operand.extent.end.column}]")
debug.line("parse_BINARY_OPERATOR", f"BINARY right_operand [{right_operand.kind}] spelling=[{right_operand.spelling}] type=[{right_operand.type.spelling}] extent=[{right_operand.extent.start.line}:{right_operand.extent.start.column}]->[{right_operand.extent.end.line}:{right_operand.extent.end.column}]")
# Tokenize and find the operator
tokens = [token.spelling for token in node.get_tokens()]
left_tokens = [token.spelling for token in left_operand.get_tokens()]
right_tokens = [token.spelling for token in right_operand.get_tokens()]
# Find the operator by excluding operand tokens
tokens_count = len(tokens)
left_tokens_count = len(left_tokens)
operator_token = tokens[left_tokens_count]
debug.line("parse_BINARY_OPERATOR", f"Node spelling=[{node.spelling}] tokens=[{[token.spelling for token in node_tokens]}] extent={ast_utils.node_extent(node)}")
debug.line("parse_BINARY_OPERATOR", f"left_operand [{left_operand.kind}] spelling=[{left_operand.spelling}] tokens=[{[token.spelling for token in left_operand_tokens]}] type=[{left_operand.type.spelling}] extent={ast_utils.node_extent(left_operand)}")
debug.line("parse_BINARY_OPERATOR", f"right_operand [{right_operand.kind}] spelling=[{right_operand.spelling}] tokens=[{[token.spelling for token in right_operand_tokens]}] type=[{right_operand.type.spelling}] extent={ast_utils.node_extent(right_operand)}")
left_operand_cvalue = self.parse_ast_node(left_operand)
right_tokens_count = len(right_tokens)
if tokens_count != left_tokens_count + right_tokens_count + 1:
# The top level tokens don't match the right_operand tokens. This will happen if the top-level
# contains a macro definition. We should be able to handle this, so we'll just record the fact here!
debug.line("parse_BINARY_OPERATOR", f"Right operand tokens don't match: assuming a macro")
right_operand_cvalue = self.parse_ast_node(right_operand)
debug.line("parse_BINARY_OPERATOR", f"Create c_binary_op: left_operand_cvalue=[{debug.as_debug_string(left_operand_cvalue)}] operator_token=[{debug.as_debug_string(operator_token)}] right_operand_cvalue=[{debug.as_debug_string(right_operand_cvalue)}]")
if not right_operand_cvalue:
return literal.Literal(f"// [Ignoring C Code] {' '.join([token.spelling for token in node.get_tokens()])}")
return literal.Literal(f"// [Ignoring C Code] {' '.join([token.spelling for token in node_tokens])}")
c_binary_op = binary_operation.BinaryOperation(left_operand_cvalue, operator_token, right_operand_cvalue)
debug.line("parse_BINARY_OPERATOR", f"left_operand_cvalue=[{debug.as_debug_string(left_operand_cvalue)}]")
debug.line("parse_BINARY_OPERATOR", f"right_operand_cvalue=[{debug.as_debug_string(right_operand_cvalue)}]")
# Get operator
operator_token = None
# Step 1: See if we have child node tokens
node_tokens_count = len(node_tokens)
left_tokens_count = len(left_operand_tokens)
right_tokens_count = len(right_operand_tokens)
if node_tokens_count > 0 and left_tokens_count > 0:
if node_tokens_count >= left_tokens_count + right_tokens_count + 1:
operator_token = node_tokens[left_tokens_count]
debug.line("parse_BINARY_OPERATOR", f"[Step 1] [child tokens] node_tokens_count=[{node_tokens_count}] left_tokens_count=[{left_tokens_count}] right_tokens_count=[{right_tokens_count}]")
debug.line("parse_BINARY_OPERATOR", f"[Step 1] [child tokens] operator_token=[{operator_token.spelling if operator_token else None}]")
if not operator_token:
# Step 2: Deduce it from the node tokens
operator_extent = clang.cindex.SourceRange.from_locations(left_operand.extent.end, right_operand.extent.start)
debug.line("parse_BINARY_OPERATOR", f"operator_extent=[{ast_utils.source_range_string(operator_extent)}]")
operator_token = ast_utils.find_token_from_extent(node_tokens, operator_extent)
debug.line("parse_BINARY_OPERATOR", f"[Step 2] [node_tokens] operator_token=[{operator_token.spelling if operator_token else None}]")
if not operator_token:
# Step 3: Search ALL translation unit tokens (this will be slow for large C files - may need to optimise)
operator_token = ast_utils.find_token_from_extent(node.translation_unit.cursor.get_tokens(), operator_extent)
debug.line("parse_BINARY_OPERATOR", f"[Step 3] [ALL tokens] operator_token=[{operator_token.spelling if operator_token else None}]")
assert operator_token
c_binary_op = binary_operation.BinaryOperation(left_operand_cvalue, operator_token.spelling, right_operand_cvalue)
return c_binary_op
def parse_COMPOUND_ASSIGNMENT_OPERATOR(self, node):

View File

@ -13,6 +13,17 @@ import code_object.array_access as array_access
# Utilities for working with C AST Nodes
# Return a string representation e.g. [117:10->117:45]
def node_extent(node):
return f"[{node.extent.start.line}:{node.extent.start.column}->{node.extent.end.line}:{node.extent.end.column}]"
def token_extent(node):
return f"[{node.extent.start.line}:{node.extent.start.column}->{node.extent.end.line}:{node.extent.end.column}]"
def source_range_string(src_range):
return f"[{src_range.start.line}:{src_range.start.column}->{src_range.end.line}:{src_range.end.column}]"
# tokens string can be:
# "flat" to show a flat summary
# "list" to show a detailed list
@ -20,12 +31,12 @@ import code_object.array_access as array_access
# "" to not show tokens
def dump_node(cnode, depth=0, tokens="truncate"):
truncate_depth = 10
debug.line("dump_node", f"{' ' * depth}[{depth}:{cnode.kind}] spelling=[{cnode.spelling}] type=[{cnode.type.spelling}] extent=[{cnode.extent.start.line}:{cnode.extent.start.column}]->[{cnode.extent.end.line}:{cnode.extent.end.column}]")
debug.line("dump_node", f"{' ' * depth}[{depth}:{cnode.kind}] spelling=[{cnode.spelling}] type=[{cnode.type.spelling}] extent={node_extent(cnode)}")
if tokens == "flat":
debug.line("dump_node", f"{' ' * depth} -> tokens=[{[token.spelling for token in cnode.get_tokens()]}]")
elif tokens == "list":
for token in cnode.get_tokens():
debug.line("dump_node", f"{' ' * depth} -> token=[{token.spelling}] extent=[{token.extent.start.line}:{token.extent.start.column} -> {token.extent.end.line}:{token.extent.end.column}]")
debug.line("dump_node", f"{' ' * depth} -> token=[{token.spelling}] extent={token_extent(token)}")
elif tokens == "truncate":
token_list = [token.spelling for token in cnode.get_tokens()]
debug.line("dump_node", f"{' ' * depth} -> tokens[:{truncate_depth}]=[{token_list[:truncate_depth]}]")
@ -34,6 +45,16 @@ def dump_node(cnode, depth=0, tokens="truncate"):
for child in cnode.get_children():
dump_node(child, depth+1, tokens)
def find_token_from_extent(tokens, extent):
for t in tokens:
if t.extent.start.line == extent.start.line and \
t.extent.end.line == extent.end.line and \
t.extent.start.column >= extent.start.column and \
t.extent.end.column <= extent.end.column:
return t
return None
# Create a C FuncSig object from a FUNCTION_DECL node
def create_cfuncsig(cnode):
if cnode.kind == clang.cindex.CursorKind.FUNCTION_TEMPLATE: