Thanks jalih, this works. Pity lock of threads does not work though. Also, one more moment puzzled me when i made the same code parallelized as in the NET example in the other thread substituting your matrix multiplication DO loop with with the DO loop from here
https://forums.silverfrost.com/Forum/Topic/2239
that speedup is only 3+ times versus 7+ times in NET case. Your matrix case also was around 3.9 times. Any clues why? Here is the text for convenience and simplicity
module test
INCLUDE <windows.ins>
STDCALL attach_thread 'attach_thread' (REF, VAL):integer*4
STDCALL wait_object 'wait_object' (VAL):integer*4
STDCALL close_handle 'close_handle' (VAL):integer*4
STDCALL create_mutex 'create_mutex' (VAL):integer*4
STDCALL release_mutex 'release_mutex' (VAL):integer*4
integer, parameter :: threads = 8
real d
contains
subroutine thread(ptr)
integer :: ptr, i, j, x
d =2.22
nEmployedThreads = 8
do i=1,200000000/nEmployedThreads
d=alog(exp(d))
enddo
call ExitThread(0)
end subroutine thread
end module test
WINAPP
use test
integer :: i, j, x
integer :: thandle(threads)
integer :: nEmployedThreads
write(*,*) 'Single threaded :'
call clock(start)
d =2.22
do i=1,200000000
d=alog(exp(d))
enddo
call clock(finish)
write(*,*) 'Total time in seconds:', finish-start
! Calculate work unit size for threads and assign starting positions for each thread
write(*,*) 'Multi threaded with 8 threads:'
call clock(start)
! Start threads
do i=1,threads,1
thandle(i) = attach_thread(thread,loc(i))
end do
! Wait for threads to finish
do i=1,threads,1
x = wait_object(thandle(i))
end do
call clock(finish)
write(*,*) 'Total time in seconds:', finish-start
write(*,*) 'All done.'
END