I don't think the problem is that simple.
I tried to write a smaller example, where I used values of ROW_I, when it failed, but did not reproduce the delays.
I think the processor is seeing that ROW_I(k+1) = ROW_I(k)+1 typically and so when this does not occur after a lot of calculations, things get upset.
The latest test is basically
subroutine gaussean_reduction (sk, nszf, nband)
!
! Reduce stiffness matrix using gaussean reduction
!
integer4 nszf, nband
real8 sk(nband,nszf)
!
real8 c
integer4 n, b, i, j, k, iband, ii, ik, ib
real8 sum_zero, sum_nonzero, sum_coeff, sum_iband, sum_mband
real8 sec_start, sec_end, t2
!
integer8 del_tick, sum_tick, tick_1, tick_2
external del_tick, sum_tick
integer4, allocatable, dimension(:) :: row_i
Real8, allocatable, dimension(:) :: row_f
!
integer4, allocatable, dimension(:) :: stat_iband
integer8, allocatable, dimension(:) :: stat_tick
!
!--- initial statistics
write (,) ' '
write (,) 'Reduce [SK]'
write (,*) ' Eqn del_sec Tick_i Tick_r'
sum_coeff = 0 ! coefficients in matrix
sum_nonzero = 0 ! non-zero coefficients in reduced matriz
sum_zero = 0 ! zero coefficients in original matriz
sum_iband = 0 ! active coeffieients in each row
sum_mband = 0 ! row length envelope
!
do n = 1,nszf ! nszf=139020
do b = 1,min (nband,nszf-n+1)
sum_coeff = sum_coeff + 1
if (sk(b,n)==0) sum_zero = sum_zero+1
end do
end do
!
allocate ( row_i(nband) )
allocate ( row_f(nband) )
!
allocate ( stat_iband(nszf) )
allocate ( stat_tick(nszf) )
!
stat_tick(1) = sum_tick ()
call elapsed_time (sec_start)
t2 = sec_start
tick_1 = del_tick()
tick_1 = 0
tick_2 = 0
!
do n = 1,nszf ! nszf=139020
!
if (mod(n,nszf/20)==0 .or. n < 5) then
call elapsed_time (sec_end)
write ( *,fmt='(a,i7,f10.4,2i10)') ' at equation',n, sec_end-t2, tick_1, tick_2
write (14,fmt='(a,i7,f10.4,2i10)') ' at equation',n, sec_end-t2, tick_1, tick_2
t2 = sec_end
tick_1 = 0
tick_2 = 0
end if
iband = 1
row_i(1) = 1
row_f(1) = sk(1,n)
do b = 2,min (nband,nszf-n+1)
if (sk(b,n)==0) cycle
sum_nonzero = sum_nonzero+1
iband = iband+1
row_i(iband) = b
row_f(b) = sk(b,n)
sk(b,n) = sk(b,n)/sk(1,n)
end do
sum_iband = sum_iband + dble(iband)
sum_mband = sum_mband + dble(row_i(iband))
tick_1 = tick_1 + del_tick()
!