QR_MUMPS
dqrm_factorization_core.F90
Go to the documentation of this file.
1 !! ##############################################################################################
2 !!
3 !! Copyright 2012 CNRS, INPT
4 !!
5 !! This file is part of qr_mumps.
6 !!
7 !! qr_mumps is free software: you can redistribute it and/or modify
8 !! it under the terms of the GNU Lesser General Public License as
9 !! published by the Free Software Foundation, either version 3 of
10 !! the License, or (at your option) any later version.
11 !!
12 !! qr_mumps is distributed in the hope that it will be useful,
13 !! but WITHOUT ANY WARRANTY; without even the implied warranty of
14 !! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 !! GNU Lesser General Public License for more details.
16 !!
17 !! You can find a copy of the GNU Lesser General Public License
18 !! in the qr_mumps/doc directory.
19 !!
20 !! ##############################################################################################
21 
22 
23 !! ##############################################################################################
33 
34 
35 #include "qrm_common.h"
36 
37 
41 
66 
67 subroutine dqrm_factorization_core(qrm_mat)
68 
69  use dqrm_spmat_mod
70  use qrm_mem_mod
71  use qrm_queue_mod
72  use qrm_task_mod
73  use qrm_error_mod
74  use qrm_common_mod
76 #if defined(trace)
77  use qrm_trace_mod
78 #endif
79  !$ use omp_lib
80  implicit none
81 
82  type(dqrm_spmat_type), target :: qrm_mat
83 
84  integer :: qrm_nth, nth, thn, info, i
85 
86 #if defined(trace)
87  integer :: pnl_id, upd_id, act_id, asm_id, clean_id, sched_id
88 #endif
89  type(qrm_task_type) :: task
90  type(qrm_task_queue_handle) :: tq_h
91  real(kind(1.d0)), allocatable :: work(:)
92  type(qrm_queue) :: active_q, ready_q
93  real(kind(1.d0)) :: flops, times(6), t1, t2
94  logical :: got_task
95  type(qrm_adata_type), pointer :: adata
96  type(dqrm_fdata_type), pointer :: fdata
97 
98  ! error management
99  integer :: err_act
100  character(len=*), parameter :: name='qrm_factorization_core'
101 
102  call qrm_err_act_save(err_act)
103 
104 #if defined(trace)
105  call qrm_trace_init(1)
106  call qrm_trace_create_event('Panel', pnl_id)
107  call qrm_trace_create_event('Update', upd_id)
108  call qrm_trace_create_event('Activate', act_id)
109  call qrm_trace_create_event('Assemble', asm_id)
110  call qrm_trace_create_event('Clean', clean_id)
111 #endif
112 
113  ! simplify
114  adata => qrm_mat%adata
115  fdata => qrm_mat%fdata
116 
117  ! The factorization uses a double system of queues:
118  ! 1) front queues: these are used to hold the fronts that are
119  ! currently being treated. There is an active front queue that contains
120  ! all the fronts that are currently active and a another one holding all
121  ! the fronts that can be activated (i.e., whose children are all active).
122  ! When scheduling tasks, a thread first tries to schedule tasks related to
123  ! the active fronts. If non is available the thread will schedule the activation
124  ! of the first front in the ready queue.
125  ! 2) task queues: each thread has its own queue of tasks (as defined in the
126  ! qrm_task_mod). Tasks are pushed on the queue of the thread that own the front.
127  ! Then, in the main loop below, each thread tries to fetch a task from its own
128  ! queue but in the case where it is empty, the thread can steal a task from
129  ! others.
130 
131  ! initialize front queues
132  call qrm_queue_init(active_q, adata%nnodes, qrm_fifo_)
133  call qrm_queue_init(ready_q , adata%nnodes, qrm_lifo_)
134  ! push all the leaves in the ready queue. this queue is lifo (in order
135  ! to achieve as much as possible a post-order traversal) so leaves are pushed
136  ! in reverse order.
137  do i=adata%nleaves, 1, -1
138  call qrm_queue_push(ready_q, adata%leaves(i))
139  fdata%front_list(adata%leaves(i))%status = qrm_activable_
140  end do
141 
142 
143  ! initialize to safe values for the case where openmp is not used
144 
145 #if defined (_OPENMP)
146  call omp_set_num_threads(1)
147  qrm_nth = qrm_mat%icntl(qrm_nthreads_)
148 #endif
149 
150  !$omp parallel &
151  !$omp & num_threads(qrm_nth) &
152  !$omp & private(task, nth, thn, work, info, got_task) &
153  !$omp & shared(active_q, ready_q, tq_h) &
154  !$omp & reduction(+:flops)
155 
156 #if defined (_OPENMP)
157  nth = omp_get_num_threads()
158  thn = omp_get_thread_num()
159 #else
160  nth = 1
161  thn = 0
162 #endif
163 
164  call qrm_par_mem_init()
165 
166  flops = 0.d0
167 
168  !$omp master
169  __qrm_prnt_dbg('("Num threads: ",i3)')nth
170  !$omp end master
171 
172  ! initialize the task queues
173  call qrm_init_task_queue(tq_h)
174 
175  ! allocate work area for panel and update
176  call qrm_aalloc(work, qrm_mat%icntl(qrm_nb_)*qrm_mat%icntl(qrm_nb_))
177  __qrm_check_ret(name,'qrm_alloc',9998)
178 
179  info = 0
180 
181  !$omp barrier
182  taskloop: do
183  ! this is the main loop. At each iteration of this loop a thread
184  ! does the following things:
185  ! 1) checks if errors are present on the error stack
186  ! 2) pushes some tasks on the queues if queues begin to run out of entries
187  ! 3) gets a task from the queues (its own queue first and then from others)
188  ! 4) executes the action corresponding to the fetched task
189 
190  if(qrm_err_stack%nelem .gt. 0) goto 9998
191 
192  ! fill up the queue if there are too few tasks
193  if(qrm_task_queue_card(tq_h) .lt. nth) then
194  call fill_queue()
195  end if
196 
197  got_task = qrm_get_task(tq_h, task)
198 
199  if(.not. got_task) cycle taskloop ! queue is empty
200 
201  ! perform the action corresponding to the fetched task
202  select case(task%id)
203  case(qrm_task_exit_)
204  !$omp barrier
205  ! done, exit
206  exit
207  case(qrm_task_pnl_)
208  ! factorize a panel
209  call panel (task, thn, work, flops)
210  case(qrm_task_upd_)
211  ! update a block-column
212  call update (task, thn, work, flops)
213  case(qrm_task_act_)
214  ! activate a front
215  call activate(task, thn, flops)
216  case(qrm_task_asm_)
217  ! assemble column into father front
218  call assemble(task, thn)
219  case(qrm_task_cln_)
220  ! clean the front
221  call clean (task, thn)
222  end select
223  end do taskloop
224 
225 9998 continue
226  call qrm_adealloc(work)
227  call qrm_clean_task_queue(tq_h)
228  call qrm_par_mem_finalize()
229  !$omp end parallel
230 
231 #if defined(trace)
232  call qrm_trace_log_dump('trace.svg')
233 #endif
234 
235  ! free the front queues
236  call qrm_queue_free(active_q)
237  call qrm_queue_free(ready_q)
238 
239  ! check if errors were generated
240  if(qrm_err_stack%nelem .gt. 0) then
241  call qrm_err_push(20, name)
242  goto 9999
243  end if
244 
245  qrm_mat%gstats(qrm_facto_flops_) = floor(flops,kind(qrm_mat%gstats(1)))
246 
247  call qrm_err_act_restore(err_act)
248  return
249 
250 9999 continue ! error management
251  call qrm_err_act_restore(err_act)
252  if(err_act .eq. qrm_abort_) then
253  call qrm_err_check()
254  end if
255 
256  return
257 
258 contains
259 
260 
261 
262 
263 
264 !==========================================================================================
265 !==========================================================================================
266  subroutine fill_queue()
267  implicit none
268 
269  type(dqrm_front_type), pointer :: front
270  integer :: f
271  integer :: thn
272  logical :: found
273 
274  thn=0
275  !$ thn=omp_get_thread_num()
276  found = .false.
277  ! loop over the queue of active nodes
278  f = 0
279  active_fronts: do
280  f = qrm_queue_next(active_q, f)
281  if(f .eq. 0) exit active_fronts
282  front => fdata%front_list(f)
283  ! if(front%owner .ne. thn) cycle active_fronts ! front f does not belong to me
284 #if defined(_OPENMP)
285  if(.not. omp_test_lock(front%lock)) cycle active_fronts ! front f does not belong to me
286 #endif
287  found = found .or. front_sched_ops(front)
288 #if defined(_OPENMP)
289  call omp_unset_lock(front%lock)
290 #endif
291  end do active_fronts
292 
293  ! if nothing was found above, then look for fronts to activate
294  ! otherwise return
295  if(found) return
296 
297  ! schedule the activation of a new front
298  f = 0
299  ready_fronts: do
300  f = qrm_queue_next(ready_q, f)
301  if(f .eq. 0) exit ready_fronts
302  front => fdata%front_list(f)
303  found = found .or. front_sched_act(front)
304  if (found) exit ready_fronts
305  end do ready_fronts
306  ! if nothing was found above, then check if the facto is over
307  ! otherwise return
308  if(found) return
309 
310  call check_facto_over()
311 
312  return
313  end subroutine fill_queue
314 
315 
316 
317 
318 
319 
320 !==========================================================================================
321 !==========================================================================================
322  function front_sched_ops(front)
323  ! schedules operations for a specific (active) front
324  implicit none
325  logical :: front_sched_ops
326  type(dqrm_front_type) :: front
327 
328  type(qrm_task_type) :: tsk
329  type(dqrm_front_type), pointer :: father, cfront
330  integer :: p, c, i, j, childpt, child
331 
332  ! initialize
333  front_sched_ops = .false.
334 
335  if( (front%status .le. 0) .or. &
336  &(front%status .eq. qrm_done_)) &
337  & return
338 
339  ! check if the front can be cleaned
340  if(minval(front%ptable) .eq. qrm_done_) then
341  tsk = qrm_task_type(qrm_task_cln_, front%num, 0, 0, .false.)
342  if(qrm_sched_task(tq_h, tsk, 't', front%owner)) then
343  ! mark the column as assigned
344  front_sched_ops = .true.
345  front%status = qrm_busy_
346  return ! if the front can be cleaned
347  ! there's nothing more we can schedule on it
348  end if
349  end if
350 
351 #if defined(fullasm)
352  if(maxval(front%ptable) .lt. 0) return
353 #endif
354 
355  ! first look for panels
356  if (front%lastpnl .lt. front%np) then
357  ! there are still panels left to do in this front
358  ! check if it's possible to reduce panel lastpnl+1
359  if (front%ptable(front%lastpnl+1) .eq. front%lastpnl) then
360  ! column lastpnl+1 is up-to-date wrt panel lastpnl.
361  ! that means it can be reduced
362  tsk = qrm_task_type(qrm_task_pnl_, front%num, front%lastpnl+1, 0,.false.)
363  if(.not. qrm_sched_task(tq_h, tsk, 'h', front%owner)) then
364  ! cannot schedule the task (queue is full)
365  return
366  else
367  ! mark the column as assigned
368  front_sched_ops = .true.
369  front%ptable(front%lastpnl+1) = qrm_busy_
370  end if
371  end if
372  end if
373 
374 
375 
376  ! now look for updates
377  do p=1, front%lastpnl
378  ! for every reduced panel, check if there are updates to be done
379  do c=p+1, front%nc
380  if(front%ptable(c) .eq. qrm_busy_) cycle ! column is busy
381  ! just need to check whether c is up-to-date wrt p-1
382  if(front%ptable(c) .eq. p-1) then
383  tsk = qrm_task_type(qrm_task_upd_, front%num, p, c, .false.)
384  if(.not. qrm_sched_task(tq_h, tsk, 't', front%owner)) then
385  ! cannot schedule the task (queue is full)
386  return
387  else
388  ! mark the column as assigned
389  front_sched_ops = .true.
390  front%ptable(c) = qrm_busy_
391  end if
392  end if
393  end do
394  end do
395 
396 
397  ! now check if there's something to assemble
398  p = adata%parent(front%num)
399  if (p .eq. 0) return ! front is a root node. no need to assemble
400  father => fdata%front_list(p)
401  ! check is father node is active
402  if(father%status .eq. qrm_active_) then
403  do c=front%npiv/front%nb+1, front%nc
404  if (front%ptable(c) .eq. min(c, front%np)) then
405  tsk = qrm_task_type(qrm_task_asm_, front%num, 0, c, .false.)
406  if(.not. qrm_sched_task(tq_h, tsk, 'h', front%owner)) then
407  ! cannot schedule the task (queue is full)
408  exit
409  else
410  ! mark the column as assigned
411  front_sched_ops = .true.
412  front%ptable(c) = qrm_busy_
413  father%status = qrm_busy_
414  ! better not schedule more than one assembly at once
415  ! because we may end up executing mutiple assemblies
416  ! at the same time which results in memory conflicts.
417  exit
418  end if
419  end if
420  end do
421  end if
422 
423 
424  return
425  end function front_sched_ops
426 
427 
428 
429 
430 
431 
432 
433 
434 !==========================================================================================
435 !==========================================================================================
436  function front_sched_act(front)
437  ! check if a specific front can be activated and eventually
438  ! schedules its activation
439  implicit none
440  logical :: front_sched_act
441  type(dqrm_front_type) :: front
442 
443  type(qrm_task_type) :: tsk
444 
445  ! initialize
446  front_sched_act = .false.
447 
448  !$ if(omp_test_lock(front%lock)) then
449  if(front%status .eq. qrm_activable_) then
450  tsk = qrm_task_type(qrm_task_act_, front%num, 0, 0, .false.)
451  if(qrm_sched_task(tq_h, tsk, 't')) then
452  ! mark the column as assigned
453  front_sched_act = .true.
454  front%status = qrm_busy_
455  end if
456  end if
457  !$ call omp_unset_lock(front%lock)
458  !$ end if
459 
460  return
461  end function front_sched_act
462 
463 
464 
465 
466 
467 
468 !==========================================================================================
469 !==========================================================================================
470  subroutine check_facto_over()
471  ! checks whether the facto is over and eventually schedules a
472  ! termination task
473  implicit none
474 
475  type(qrm_task_type) :: tsk
476  logical :: found
477 
478  ! all the fronts are done
479  if(fdata%done .eq. fdata%nfronts) then
480  tsk = qrm_task_type(qrm_task_exit_, 0, 0, 0, .false.)
481  found = qrm_sched_task(tq_h, tsk, 't')
482  end if
483 
484  return
485  end subroutine check_facto_over
486 
487 
488 
489 
490 
491 
492 
493 
494 
495 !==========================================================================================
496 !==========================================================================================
497  subroutine panel(task, thn, work, flops)
498  ! this subroutine performs the panel reduction
499  ! block-column task%pnl in front task%front
500  !
501 
502  use dqrm_fdata_mod
503  use qrm_common_mod
504  implicit none
505 
506  real(kind(1.d0)) :: t1, t2
507  type(qrm_task_type) :: task
508  integer :: thn
509  real(kind(1.d0)) :: work(:)
510  real(kind(1.d0)) :: flops
511 
512  type(dqrm_front_type), pointer :: front
513  integer :: f, np, nc, nb, &
514  & m, n, k, i, info, j, ib, fcol, lcol, ii, jj
515  type(qrm_task_type) :: tsk
516 
517  front => fdata%front_list(task%front)
518  f = adata%parent(front%num)
519 
520  call qrm_get(qrm_mat, 'qrm_ib', ib)
521 
522 #if defined(debug)
523  __qrm_prnt_dbg('(i3," =--> Panel : ",i4," -- ",i4)')thn,task%front,task%pnl
524 #endif
525 
526 #if defined(trace)
527  call qrm_trace_event_start(pnl_id, thn)
528 #endif
529 
530  fcol = (task%pnl-1)*front%nb+1 ! the first pivot in the panel
531  lcol = min(task%pnl*front%nb,front%n) ! the last pivot in the panel
532 
533  ! flops = flops+pureflopscount(front%stair, front%n, j, front%nb)
534 
535  do i=fcol, min(task%pnl*front%nb,front%ne), ib
536  n = min(ib, lcol-i+1) ! the width of the panel
537  m = max(front%stair(min(i+n-1,front%ne))- i+1,0) ! the height of the panel
538  k = min(m,n)
539  j = i+n
540 
541  call dgeqrf(m, n, front%front(i,i), front%m, front%tau(i), work(1), &
542  & ib**2, info)
543 
544  call dlarft( 'f', 'c', m, k, front%front(i,i), front%m, front%tau(i), &
545  & front%t(1,i), front%ib )
546 
547  flops = flops+qrm_count_flops(m, n, n, 'panel')
548 
549  n = lcol-j+1
550  if ( n .le. 0) exit
551  call dlarfb('l', 't', 'f', 'c', m, n, k, front%front(i,i), &
552  & front%m, front%t(1,i), front%ib, &
553  & front%front(i,j), front%m, work(1), front%nb)
554 
555  flops = flops+qrm_count_flops(m, n, k, 'update')
556 
557  end do
558 
559 #if defined(trace)
560  call qrm_trace_event_stop(pnl_id, thn)
561 #endif
562 
563  ! once all the computations are done, update the status of the
564  ! factorization
565 
566  !$ call omp_set_lock(front%lock)
567  if(task%pnl .lt. front%nc) then
568  if(front%ptable(task%pnl+1) .eq. task%pnl-1) then
569  tsk = qrm_task_type(qrm_task_upd_, front%num, task%pnl, task%pnl+1, .false.)
570  if(qrm_sched_task(tq_h, tsk, 'h', front%owner)) then
571  front%ptable(task%pnl+1) = qrm_busy_
572  end if
573  end if
574  end if
575 
576  if (f .eq. 0) then
577  ! if column does not have to be assembled, it's marked done
578  front%ptable(task%pnl) = qrm_done_
579  else
580  if ((task%pnl .le. front%npiv/front%nb) .or. &
581  & (front%npiv .eq. front%ne)) then
582  ! colums corresponding to pivots do not have to be assembled
583  front%ptable(task%pnl) = qrm_done_
584  else
585  front%ptable(task%pnl) = task%pnl
586  end if
587  end if
588  ! check if the front is completely factorized
589  if(task%pnl .eq. front%nc) then
590  front%status = qrm_factorized_
591  end if
592  front%lastpnl=task%pnl
593  !$ call omp_unset_lock(front%lock)
594 
595  return
596 
597  end subroutine panel
598 
599 
600 
601 
602 
603 
604 
605 !==========================================================================================
606 !==========================================================================================
607  subroutine update(task, thn, work, flops)
608  ! this subroutine performs the update of
609  ! block-column task%col wrt panel task%pnl
610  ! in front task%front
611  !
612 
613  use dqrm_fdata_mod
614  use qrm_common_mod
615  implicit none
616 
617  type(qrm_task_type) :: task
618  integer :: thn
619  real(kind(1.d0)) :: work(:)
620 
621  real(kind(1.d0)) :: flops
622  type(qrm_task_type) :: tsk
623  real(kind(1.d0)) :: t1, t2
624 
625 
626  type(dqrm_front_type), pointer :: front
627  integer :: f, i, j, m, n, k, ib, fcol, lcol
628 
629  front => fdata%front_list(task%front)
630  f = adata%parent(front%num)
631 
632 #if defined(debug)
633  __qrm_prnt_dbg('(i3," =--> Update : ",i4," -- ",i4,2x,i4)')thn,task%front,task%pnl,task%col
634 #endif
635 
636 #if defined(trace)
637  call qrm_trace_event_start(upd_id, thn)
638 #endif
639 
640  call qrm_get(qrm_mat, 'qrm_ib', ib)
641 
642  fcol = (task%pnl-1)*front%nb+1 ! the first pivot in the panel
643  lcol = min(task%pnl*front%nb, front%ne) ! the last pivot in the panel
644  j = (task%col-1)*front%nb+1 ! the first column in the update
645  n = min(front%nb, front%n-j+1) ! the number of columns to update
646 
647  do i=fcol, lcol, ib
648  k = min(ib, lcol-i+1) ! the width of the reflectors set
649  m = max(front%stair(min(i+k-1,front%ne))- i+1,0) ! the height of the panel
650  k = min(k,m)
651 
652  call dlarfb('l', 't', 'f', 'c', m, n , k, front%front(i,i), &
653  & front%m, front%t(1,i), front%ib, &
654  & front%front(i,j), front%m, work(1), front%nb)
655 
656  flops = flops+qrm_count_flops(m, n, k, 'update')
657 
658  end do
659 
660  ! once all the computations are done, update the status of the
661  ! factorization
662 
663 #if defined(trace)
664  call qrm_trace_event_stop(upd_id, thn)
665 #endif
666 
667  !$ call omp_set_lock(front%lock)
668  if((f .eq. 0) .and. (task%pnl .eq. front%np)) then
669  ! we just updated wrt the last panel and no assembly is required:
670  ! mark as done
671  front%ptable(task%col) = qrm_done_
672  else if ((task%pnl .eq. front%np) .and. &
673  & (front%npiv .eq. front%ne)) then
674  front%ptable(task%col) = qrm_done_
675  else
676  ! mark the column as up-to-date wrt panel task%pnl
677  front%ptable(task%col) = task%pnl
678  end if
679  i = minval(front%ptable)
680  if(i .ge. front%np) front%status = qrm_factorized_
681 
682  if((task%col .eq. (task%pnl+1)) .and. (task%col .le. front%np)) then
683  tsk = qrm_task_type(qrm_task_pnl_, front%num, task%col, 0, .false.)
684  if(qrm_sched_task(tq_h, tsk, 'h', front%owner)) then
685  front%ptable(task%col) = qrm_busy_
686  end if
687  end if
688  !$ call omp_unset_lock(front%lock)
689 
690  return
691 
692  end subroutine update
693 
694 
695 
696 
697 
698 
699 !==========================================================================================
700 !==========================================================================================
701  subroutine activate(task, thn, flops)
702  ! this subroutine activates front task%front
703 
704  use dqrm_fdata_mod
706  use qrm_queue_mod
707  implicit none
708 
709  type(qrm_task_type) :: task
710  integer :: thn
711  real(kind(1.d0)) :: flops
712 
713  type(dqrm_front_type), pointer :: front
714  integer :: msize, i, f
715  ! error management
716  integer :: err_act
717  character(len=*), parameter :: name='activate'
718 
719  call qrm_err_act_save(err_act)
720 
721  front => fdata%front_list(task%front)
722 
723 #if defined(_OPENMP)
724  call omp_set_lock(front%lock)
725 #endif
726 
727 #if defined(debug)
728  __qrm_prnt_dbg('(i3," =--> Activate: ",i4)')thn,task%front
729 #endif
730 
731 #if defined(trace)
732  call qrm_trace_event_start(act_id, thn)
733 #endif
734 
735  call dqrm_activate_front(qrm_mat, front%num, flops)
736  __qrm_check_ret(name,'qrm_activate_front',9999)
737 
738  call qrm_queue_rm(ready_q, task%front)
739  call qrm_queue_push(active_q, task%front)
740  if((front%m .eq. 0) .or. (front%n .eq. 0)) then
741  front%status = qrm_done_
742  !$ call omp_set_lock(fdata%lock)
743  fdata%done = fdata%done+1
744  !$ call omp_unset_lock(fdata%lock)
745  else
746  front%status = qrm_active_
747  end if
748  !$ call omp_unset_lock(front%lock)
749 
750 #if defined(trace)
751  call qrm_trace_event_stop(act_id, thn)
752 #endif
753 
754  f = adata%parent(front%num)
755  if(f .ne. 0) then
756  front => fdata%front_list(f)
757  !$ call omp_set_lock(front%lock)
758  if (front%status .eq. qrm_ready_) then
759  call qrm_queue_push(ready_q, f)
760  front%status = qrm_activable_
761  end if
762  !$ call omp_unset_lock(front%lock)
763  end if
764 
765  call qrm_err_act_restore(err_act)
766  return
767 
768 9999 continue ! error management
769  !$ call omp_unset_lock(front%lock)
770  call qrm_err_act_restore(err_act)
771  if(err_act .eq. qrm_abort_) then
772  call qrm_err_check()
773  end if
774 
775  return
776 
777  end subroutine activate
778 
779 
780 
781 
782 
783 
784 
785 !==========================================================================================
786 !==========================================================================================
787  subroutine assemble(task, thn)
788  ! this subroutine performs the assembly of column
789  ! task%col of front task%front into its father
790  !
791  use dqrm_fdata_mod
792  implicit none
793 
794  type(qrm_task_type) :: task
795  integer :: thn
796 
797  type(dqrm_front_type), pointer :: front, father
798  integer :: f, i, j, nb, row, col, frow, fcol, b
799  integer :: nc, mc, ptr
800  type(qrm_task_type) :: tsk
801  integer, allocatable :: tmp(:)
802  ! error management
803  integer :: err_act
804  character(len=*), parameter :: name='assemble'
805 
806  call qrm_err_act_save(err_act)
807 
808  front => fdata%front_list(task%front)
809  f = adata%parent(front%num)
810  father => fdata%front_list(f)
811 
812 #if defined(debug)
813  __qrm_prnt_dbg('(i3," =--> Assemble: ",i4," -- ",i4)')thn,task%front,task%col
814 #endif
815 
816 #if defined(trace)
817  call qrm_trace_event_start(asm_id, thn)
818 #endif
819 
820  call qrm_aalloc(tmp, father%nc)
821  __qrm_check_ret(name,'qrm_aalloc',9999)
822  tmp = 0
823 
824  do col = (task%col-1)*front%nb+1, min(front%n, task%col*front%nb)
825  if(col .le. front%npiv) cycle
826  fcol = front%colmap(col-front%npiv)
827  do row = front%npiv+1, min(col, min(front%m, front%n))
828  frow = front%rowmap(row - front%npiv)
829  father%front(frow, fcol) = front%front(row,col)
830  end do
831  b = (fcol-1)/father%nb+1
832  tmp(b) = tmp(b)+1
833  end do
834 
835  !$ call omp_set_lock(father%lock)
836  father%ptable = father%ptable+tmp
837  father%status = qrm_active_
838  !$ call omp_unset_lock(father%lock)
839 
840  call qrm_adealloc(tmp)
841 
842 #if defined(trace)
843  call qrm_trace_event_stop(asm_id, thn)
844 #endif
845 
846  !$ call omp_set_lock(front%lock)
847  front%ptable(task%col) = qrm_done_
848  !$ call omp_unset_lock(front%lock)
849 
850  call qrm_err_act_restore(err_act)
851  return
852 
853 9999 continue ! error management
854  call qrm_err_act_restore(err_act)
855  if(err_act .eq. qrm_abort_) then
856  call qrm_err_check()
857  end if
858 
859  return
860 
861  end subroutine assemble
862 
863 
864 
865 !==========================================================================================
866 !==========================================================================================
867  subroutine clean(task, thn)
868  ! cleanup: deallocates all the memory areas
869  ! needed for the front factorization.
870  ! The corresponding R and (eventually) H parts
871  ! are stored into dedicated memory areas
872 
873  use dqrm_fdata_mod
874  implicit none
875 
876  type(qrm_task_type) :: task
877  integer :: thn
878 
879  type(dqrm_front_type), pointer :: front, father
880  integer :: f, mn, h, i, j
881  ! error management
882  integer :: err_act
883  character(len=*), parameter :: name='clean'
884 
885  call qrm_err_act_save(err_act)
886 
887  front => fdata%front_list(task%front)
888 
889 #if defined(debug)
890  __qrm_prnt_dbg('(i3," =--> Clean : ",i4)')thn,task%front
891 #endif
892 
893 #if defined(trace)
894  call qrm_trace_event_start(clean_id, thn)
895 #endif
896 
897  !$ call omp_set_lock(front%lock)
898  call dqrm_clean_front(qrm_mat, task%front)
899  !$ call omp_unset_lock(front%lock)
900  __qrm_check_ret(name,'qrm_clean_front',9999)
901 
902 #if defined(trace)
903  call qrm_trace_event_stop(clean_id, thn)
904 #endif
905 
906  ! front is done, remove it from the queue of active nodes
907  call qrm_queue_rm(active_q, front%num)
908 
909  !$ call omp_set_lock(fdata%lock)
910  fdata%done = fdata%done+1
911  !$ call omp_unset_lock(fdata%lock)
912 
913  !$ call omp_set_lock(front%lock)
914  front%status = qrm_done_
915  !$ call omp_unset_lock(front%lock)
916 
917  call qrm_err_act_restore(err_act)
918  return
919 
920 9999 continue ! error management
921  call qrm_err_act_restore(err_act)
922  if(err_act .eq. qrm_abort_) then
923  call qrm_err_check()
924  end if
925 
926  return
927 
928  end subroutine clean
929 
930 
931 
932 
933 
934 end subroutine dqrm_factorization_core
935 
936 
937 
subroutine qrm_clean_task_queue(h)
Destroyes a set of queues.
Generic interface for the qrm_adealloc_i, qrm_adealloc_2i, qrm_adealloc_s, qrm_adealloc_2s, qrm_adealloc_3s, qrm_adealloc_d, qrm_adealloc_2d, qrm_adealloc_3d, qrm_adealloc_c, qrm_adealloc_2c, qrm_adealloc_3c, qrm_adealloc_z, qrm_adealloc_2z, qrm_adealloc_3z, routines.
This module contains all the facilities for front queues.
This type defines the handle for the queues attached to a family of threads.
This module contains the interfaces of all non-typed routines.
subroutine clean(task, thn)
A data type meant to to define a queue.
subroutine qrm_err_push(code, sub, ied, aed)
This subroutine pushes an error on top of the stack.
This module contains all the generic interfaces for the typed routines in the factorization phase...
subroutine qrm_par_mem_finalize()
Generif interface for the ::dqrm_pgeti, ::dqrm_pgetr and.
integer, parameter qrm_task_asm_
integer, parameter qrm_task_pnl_
subroutine assemble(task, thn)
logical function front_sched_act(front)
integer, parameter qrm_done_
subroutine qrm_par_mem_init()
This routine has to be called at the beginning of a parallel section. Afterwards, each thread will up...
subroutine qrm_err_act_save(err_act)
Saves a copy of the qrm_err_act variable.
This module contains the definition of a task type that is used for scheduling tasks during the facto...
logical function front_sched_ops(front)
subroutine dqrm_factorization_core(qrm_mat)
This is the main factorization routine. It performs the factorization of all the fronts that have bee...
logical function qrm_sched_task(h, tsk, pol, q)
Pushes a task on a queue.
subroutine dqrm_activate_front(qrm_mat, fnum, flops)
This routine activates a front.
This module contains all the error management routines and data.
This module contains the definition of the basic sparse matrix type and of the associated methods...
integer, parameter qrm_task_exit_
subroutine update(task, thn, work, flops)
subroutine qrm_queue_rm(q, n)
Removes (without returning it) an element from a queue.
subroutine activate(task, thn, flops)
subroutine qrm_init_task_queue(h)
Inititalizes a set of queues attached to a family of threads referenced through the handle h...
integer, parameter qrm_activable_
Generic interface for the qrm_aalloc_i, qrm_aalloc_2i, qrm_aalloc_s, qrm_aalloc_2s, qrm_aalloc_3s, qrm_aalloc_d, qrm_aalloc_2d, qrm_aalloc_3d, qrm_aalloc_c, qrm_aalloc_2c, qrm_aalloc_3c, qrm_aalloc_z, qrm_aalloc_2z, qrm_aalloc_3z, routines.
Definition: qrm_mem_mod.F90:78
This module contains all the facilities for visualizing the execution profile of a parallel code...
integer, parameter qrm_abort_
Possible actions to be performed upon detection of an error.
integer, parameter qrm_factorized_
logical function qrm_get_task(h, tsk)
Pops a task from a queue. Tasks are always popped from the head of the queue. The return value is ...
This type defines a computational task.
subroutine dqrm_clean_front(qrm_mat, fnum)
This routine performs the cleaning of a front.
subroutine qrm_err_check()
This subroutine checks the errors stack. If something is found all the entries in the stack are poppe...
subroutine qrm_queue_push(q, elem)
Pushes an element on a queue.
type(qrm_err_stack_type), save qrm_err_stack
The errors stack.
subroutine panel(task, thn, work, flops)
integer, parameter qrm_task_act_
integer, parameter qrm_fifo_
parameter to define the policy of the queue: FIFO
Generic interface for the ::qrm_count_realflops ::qrm_count_pureflops.
integer function qrm_task_queue_card(h)
Returns the number of tasks present on a set of queues referenced by a handle.
This type defines the data structure used to store a matrix.
integer, parameter qrm_task_upd_
subroutine check_facto_over()
This type defines a data structure containing all the data related to a front.
This module contains the definition of all the data related to the factorization phase.
integer, parameter qrm_ready_
subroutine qrm_queue_free(q)
Frees a queue.
integer, parameter qrm_lifo_
parameter to define the policy of the queue: LIFO
integer, parameter qrm_task_cln_
integer, parameter qrm_active_
This module implements the memory handling routines. Pretty mucch allocations and deallocations...
Definition: qrm_mem_mod.F90:38
integer, parameter qrm_busy_
subroutine qrm_err_act_restore(err_act)
Restores the value of the qrm_err_act variable.
integer function qrm_queue_next(q, n)
Returns the element that follows n in the queue q. Very useful for sweeping through a queue...
subroutine fill_queue()