diff --git a/examples/nemo/scripts/omp_gpu_trans.py b/examples/nemo/scripts/omp_gpu_trans.py index 4927556c62..36be4c15a4 100755 --- a/examples/nemo/scripts/omp_gpu_trans.py +++ b/examples/nemo/scripts/omp_gpu_trans.py @@ -86,6 +86,13 @@ "zdftke.f90", ] +HOISTING_ISSUES = [ + "trabbl.f90", + "trazdf.f90", + "zdfsh2.f90", + "p4zrem.f90", +] + def trans(psyir): ''' Add OpenMP Target and Loop directives to all loops, including the @@ -101,6 +108,7 @@ def trans(psyir): # file-by-file exhaustive tests. only_do_file = os.environ.get('ONLY_FILE', False) if only_do_file and psyir.name not in (only_do_file, + "sbc_phy.f90", "lib_fortran.f90", "solfrac_mod.f90"): return @@ -149,7 +157,7 @@ def trans(psyir): enhance_tree_information(subroutine) normalise_loops( subroutine, - hoist_local_arrays=True, + hoist_local_arrays=psyir.name not in HOISTING_ISSUES, convert_array_notation=True, loopify_array_intrinsics=True, convert_range_loops=True, diff --git a/examples/nemo/scripts/utils.py b/examples/nemo/scripts/utils.py index 2208ddc3c4..71e91d7ede 100755 --- a/examples/nemo/scripts/utils.py +++ b/examples/nemo/scripts/utils.py @@ -455,10 +455,12 @@ def insert_explicit_loop_parallelism( # And if successful, the region directive on top. if region_directive_trans: region_directive_trans.apply(loop.parent.parent) - except TransformationError: + except TransformationError as err: # This loop cannot be transformed, proceed to next loop. # The parallelisation restrictions will be explained with a comment # associted to the loop in the generated output. + if not loop.preceding_comment: + loop.append_preceding_comment(str(err.value)) continue @@ -518,10 +520,9 @@ def add_profile_region(nodes): # of a single statement return if isinstance(nodes[0], IfBlock) and \ - "was_single_stmt" in nodes[0].annotations and \ - isinstance(nodes[0].if_body[0], CodeBlock): + "was_single_stmt" in nodes[0].annotations: # We also don't put single statements consisting of - # 'IF(condition) CALL blah()' inside profiling regions + # 'IF(condition) statement' inside profiling regions return try: ProfileTrans().apply(nodes) diff --git a/src/psyclone/psyir/transformations/hoist_local_arrays_trans.py b/src/psyclone/psyir/transformations/hoist_local_arrays_trans.py index a736f3a538..9019136b7b 100644 --- a/src/psyclone/psyir/transformations/hoist_local_arrays_trans.py +++ b/src/psyclone/psyir/transformations/hoist_local_arrays_trans.py @@ -47,14 +47,15 @@ CodeBlock, ACCRoutineDirective, Literal, IntrinsicCall, BinaryOperation, Reference) from psyclone.psyir.symbols import ( - ArrayType, Symbol, INTEGER_TYPE, DataSymbol, DataTypeSymbol) + ArrayType, Symbol, INTEGER_TYPE, DataSymbol, DataTypeSymbol, + UnresolvedInterface) from psyclone.psyir.transformations.transformation_error \ import TransformationError class HoistLocalArraysTrans(Transformation): - '''This transformation takes a Routine and promotes any local, 'automatic' - arrays to Container scope: + '''This transformation takes a Routine and promotes any local arrays to + Container scope: >>> from psyclone.psyir.backend.fortran import FortranWriter >>> from psyclone.psyir.frontend.fortran import FortranReader @@ -140,9 +141,8 @@ def apply(self, node, options=None): container = node.ancestor(Container) - # Identify all arrays that are local to the target routine, - # do not explicitly use dynamic memory allocation and are not - # accessed within a CodeBlock. + # Identify all arrays that are local to the target routine + # (automatic interface) and not accessed within a CodeBlock. automatic_arrays = self._get_local_arrays(node) if not automatic_arrays: @@ -154,17 +154,73 @@ def apply(self, node, options=None): tags_dict = node.symbol_table.get_reverse_tags_dict() for sym in automatic_arrays: - # Keep a copy of the original shape of the array. - orig_shape = sym.datatype.shape[:] - # Modify the *existing* symbol so that any references to it - # remain valid. - new_type = copy.copy(sym.datatype) - # pylint: disable=protected-access - new_type._shape = len(orig_shape)*[ArrayType.Extent.DEFERRED] - # pylint: enable=protected-access - sym.datatype = new_type - # Ensure that the promoted symbol is private to the container. - sym.visibility = Symbol.Visibility.PRIVATE + # Check if the original is already an ALLOCATABLE variable + already_allocatable = any(dim == ArrayType.Extent.DEFERRED + for dim in sym.shape) + + # Find or Create the array reference that will be the argument to + # the new memory allocation statement. + if already_allocatable: + # If it was already an allocatable, we should be able to find + # it + original_allocate = None + not_supported = False + for ref in node.walk(ArrayReference): + if ( + isinstance(ref.parent, IntrinsicCall) and + (ref.parent.intrinsic == + IntrinsicCall.Intrinsic.ALLOCATE) and + ref.symbol is sym + ): + if original_allocate is not None: + # This would be the second match, so just warn the + # user and skip this symbol + original_allocate.append_preceding_comment( + f"PSyclone warning: {self.name} found more " + f"than one ALLOCATE for this variable, but " + f"currently it just supports cases with " + f"single allocations") + not_supported = True + break + original_allocate = ref.parent + # alloc-options are captured as argument_names in the + # PSyIR + if any(original_allocate.argument_names): + original_allocate.append_preceding_comment( + f"PSyclone warning: {self.name} found an " + f"ALLOCATE with alloc-options, this is " + f"not supported") + not_supported = True + break + aref = ref.copy() + orig_shape = [] + for child in aref.children: + if isinstance(child, Range): + lbound = child.start + ubound = child.stop + else: + lbound = Literal("1", INTEGER_TYPE) + ubound = child + orig_shape.append( + ArrayType.ArrayBounds(lbound, ubound) + ) + if not_supported or original_allocate is None: + continue + + else: + # Keep a copy of the original shape of the array. + orig_shape = sym.datatype.shape[:] + # Modify the *existing* symbol so that any references to it + # remain valid. + new_type = copy.copy(sym.datatype) + # pylint: disable=protected-access + new_type._shape = len(orig_shape)*[ArrayType.Extent.DEFERRED] + # pylint: enable=protected-access + sym.datatype = new_type + dim_list = [Range.create(dim.lower.copy(), dim.upper.copy()) + for dim in orig_shape] + aref = ArrayReference.create(sym, dim_list) + # We must allow for the situation where there's a clash with a # symbol name already present at container scope. (The validate() # method will already have checked for tag clashes.) @@ -176,11 +232,8 @@ def apply(self, node, options=None): node.symbol_table.rename_symbol(sym, new_name) container.symbol_table.add(sym, tag=tags_dict.get(sym)) - # Create the array reference that will be the argument to the - # new memory allocation statement. - dim_list = [Range.create(dim.lower.copy(), dim.upper.copy()) - for dim in orig_shape] - aref = ArrayReference.create(sym, dim_list) + # Ensure that the promoted symbol is private to the container. + sym.visibility = Symbol.Visibility.PRIVATE # Add a conditional expression to avoid repeating the allocation # if its already done @@ -192,7 +245,6 @@ def apply(self, node, options=None): # Add runtime checks to verify that the boundaries haven't changed # (we skip literals as we know they can't have changed) - check_added = False for idx, dim in enumerate(orig_shape): if not isinstance(dim.lower, Literal): expr = BinaryOperation.create( @@ -208,7 +260,6 @@ def apply(self, node, options=None): cond_expr = BinaryOperation.create( BinaryOperation.Operator.OR, cond_expr, expr) - check_added = True if not isinstance(dim.upper, Literal): expr = BinaryOperation.create( BinaryOperation.Operator.NE, @@ -223,26 +274,60 @@ def apply(self, node, options=None): cond_expr = BinaryOperation.create( BinaryOperation.Operator.OR, cond_expr, expr) - check_added = True - body = [] - if check_added: - body.append( + if_stmt = IfBlock.create(cond_expr, [ + IntrinsicCall.create(IntrinsicCall.Intrinsic.ALLOCATE, [aref]) + ]) + # If any bound-check was added, also insert a deallocate statement + if isinstance(cond_expr, BinaryOperation): + if_stmt.if_body.addchild( IfBlock.create( allocated_expr.copy(), [IntrinsicCall.create( IntrinsicCall.Intrinsic.DEALLOCATE, - [Reference(sym)])])) - body.append( - IntrinsicCall.create(IntrinsicCall.Intrinsic.ALLOCATE, - [aref])) - # Insert the conditional allocation at the start of the supplied - # routine. - node.children.insert(0, IfBlock.create(cond_expr, body)) - - # Finally, remove the hoisted symbols (and any associated tags) - # from the routine scope. - for sym in automatic_arrays: + [Reference(sym)])]), + index=0 + ) + + if already_allocatable: + # Find and remove the deallocate statements + for ref in node.walk(Reference): + if ( + isinstance(ref.parent, IntrinsicCall) and + (ref.parent.intrinsic == + IntrinsicCall.Intrinsic.DEALLOCATE) and + ref.symbol is sym + ): + # If its the only reference in the deallocate + # statement, remove the full statement, otherwise + # just the relevant reference + deallocate_stmt = ref.parent + if len(deallocate_stmt.arguments) == 1: + deallocate_stmt.detach() + else: + for child in deallocate_stmt.arguments: + if child == ref: + child.detach() + # Now insert guarded allocate expression + original_allocate.parent.children.insert( + original_allocate.position, if_stmt) + # Remove the unguarded allocate, if its the only reference in + # a allocate statement, remove the full statement, otherwise + # just the relevant reference + if len(original_allocate.arguments) == 1: + original_allocate.detach() + else: + for child in original_allocate.arguments: + if child == aref: + child.detach() + + else: + # Insert the conditional allocation at the start of the + # supplied routine. + node.children.insert(0, if_stmt) + + # Finally, remove the hoisted symbols (and any associated tags) + # from the routine scope. # TODO #898: Currently the SymbolTable.remove() method does not # support DataSymbols. # pylint: disable=protected-access @@ -256,8 +341,7 @@ def _get_local_arrays(node): ''' Identify all arrays that are local to the target routine, all their bounds/kind/type symbols are also local, do not represent a function's - return value, are not constant and do not explicitly use - dynamic memory allocation. Also excludes any such arrays that are + return value, are not constant. Also excludes any such arrays that are accessed within CodeBlocks or RESHAPE intrinsics. :param node: target PSyIR node. @@ -267,6 +351,20 @@ def _get_local_arrays(node): :rtype: list[:py:class:`psyclone.psyir.symbols.DataSymbol`] ''' + def dependent_symbol_could_be_local(symbol: Symbol) -> bool: + ''' The symbol exists in the local scope, or it is an Unresolved + interface symbol but there are no wildcard import that could make + it local. + + ''' + if symbol.name not in node.symbol_table: + return False + if isinstance(symbol.interface, UnresolvedInterface): + if all(container.wildcard_import is False + for container in node.symbol_table.containersymbols): + return False + return True + local_arrays = {} for sym in node.symbol_table.automatic_datasymbols: # Check that the array is not the functions return symbol, or @@ -274,13 +372,17 @@ def _get_local_arrays(node): if (sym is node.return_symbol or not sym.is_array or sym.is_constant): continue - if isinstance(sym.datatype.intrinsic, DataTypeSymbol): + if (isinstance(sym.datatype.intrinsic, DataTypeSymbol) and + dependent_symbol_could_be_local(sym.datatype.intrinsic)): continue - if isinstance(sym.datatype.precision, DataSymbol): + if (isinstance(sym.datatype.precision, DataSymbol) and + dependent_symbol_could_be_local(sym.datatype.precision)): continue # Check whether all of the bounds of the array are defined - an # allocatable array will have array dimensions of # ArrayType.Extent.DEFERRED + if any(dim == ArrayType.Extent.DEFERRED for dim in sym.shape): + local_arrays[sym.name] = sym if all(isinstance(dim, ArrayType.ArrayBounds) for dim in sym.shape): local_arrays[sym.name] = sym diff --git a/src/psyclone/tests/psyir/transformations/hoist_local_arrays_trans_test.py b/src/psyclone/tests/psyir/transformations/hoist_local_arrays_trans_test.py index 4d078a7acd..29f3b50a59 100644 --- a/src/psyclone/tests/psyir/transformations/hoist_local_arrays_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/hoist_local_arrays_trans_test.py @@ -39,8 +39,9 @@ import pytest -from psyclone.psyir.nodes import Routine, Container, FileContainer -from psyclone.psyir.symbols import ArrayType, Symbol +from psyclone.psyir.nodes import ( + Routine, Container, FileContainer, IntrinsicCall, Literal) +from psyclone.psyir.symbols import ArrayType, Symbol, INTEGER_TYPE from psyclone.psyir.transformations import (HoistLocalArraysTrans, TransformationError) from psyclone.tests.utilities import Compile @@ -404,8 +405,9 @@ def test_get_local_arrays(fortran_reader): routine = psyir.walk(Routine)[0] hoist_trans = HoistLocalArraysTrans() symbols = hoist_trans._get_local_arrays(routine) - assert len(symbols) == 1 + assert len(symbols) == 2 assert symbols[0] is routine.symbol_table.lookup("wrk") + assert symbols[1] is routine.symbol_table.lookup("wrk2") def test_get_local_arrays_codeblock(fortran_reader): @@ -602,6 +604,112 @@ def test_validate_tagged_symbol_clash(fortran_reader): str(err.value)) +def test_apply_with_allocatables(fortran_reader, fortran_writer, tmpdir): + ''' Test the apply method correctly handles an automatic arrays with + allocatable attributes and simple allocatable statements. ''' + code = """ + module my_mod + contains + subroutine test(arg, var) + integer :: i + real, allocatable, dimension(:), intent(in) :: arg + integer, intent(in) :: var + real, allocatable, dimension(:) :: a, b, c ! This will be hoisted + real, allocatable, dimension(:) :: d, e ! This won't be hoisted + + if (var == 3) then ! Allocate is only done inside this condition + ALLOCATE(a(10), b(10:var)) + ALLOCATE(c(var-5:var+5)) + + ! d uses a ALLOCATE parameter, which is not supported + ALLOCATE(d(10), MOLD=arg) + + ! e is allocated and deallocated two times + ALLOCATE(e(10)) + DEALLOCATE(e) + ALLOCATE(e(20)) + DEALLOCATE(e) + + do i=1,10 + a(i) = 1.0 + end do + DEALLOCATE(a,c) + DEALLOCATE(b,d) + endif + end subroutine test + end module my_mod + """ + psyir = fortran_reader.psyir_from_source(code) + alloc1 = psyir.walk(IntrinsicCall)[0] + # the fparser reader always puts ranges in the allocate indices, but for + # 'a' force it to be somthing else + alloc1.arguments[0].children[0].replace_with( + Literal("10", INTEGER_TYPE) + ) + routine = psyir.walk(Routine)[0] + hoist_trans = HoistLocalArraysTrans() + hoist_trans.apply(routine) + output = fortran_writer(psyir) + # Check that: + # - Local allocatable declarations has been hoisted unless they have + # multiple allocation statements or one with named arguments + # - Their ALLOCATEs have been guarded in-place + # - Their DEALLOCATEs have been removed + # - Warning messages for non-hoisted allocatables have been added + assert output == """\ +module my_mod + implicit none + real, allocatable, dimension(:), private :: a + real, allocatable, dimension(:), private :: b + real, allocatable, dimension(:), private :: c + public + + contains + subroutine test(arg, var) + real, allocatable, dimension(:), intent(in) :: arg + integer, intent(in) :: var + integer :: i + real, allocatable, dimension(:) :: d + real, allocatable, dimension(:) :: e + + if (var == 3) then + if (.NOT.ALLOCATED(a)) then + ALLOCATE(a(10)) + end if + if (.NOT.ALLOCATED(b) .OR. UBOUND(b, dim=1) /= var) then + if (ALLOCATED(b)) then + DEALLOCATE(b) + end if + ALLOCATE(b(10:var)) + end if + if (.NOT.ALLOCATED(c) .OR. LBOUND(c, dim=1) /= var - 5 .OR. \ +UBOUND(c, dim=1) /= var + 5) then + if (ALLOCATED(c)) then + DEALLOCATE(c) + end if + ALLOCATE(c(var - 5:var + 5)) + end if + ! PSyclone warning: HoistLocalArraysTrans found an ALLOCATE with \ +alloc-options, this is not supported + ALLOCATE(d(1:10), MOLD=arg) + ! PSyclone warning: HoistLocalArraysTrans found more than one ALLOCATE \ +for this variable, but currently it just supports cases with single allocations + ALLOCATE(e(1:10)) + DEALLOCATE(e) + ALLOCATE(e(1:20)) + DEALLOCATE(e) + do i = 1, 10, 1 + a(i) = 1.0 + enddo + DEALLOCATE(d) + end if + + end subroutine test + +end module my_mod +""" + assert Compile(tmpdir).string_compiles(output) + # str