13 SUBROUTINE fft_par_real(communicator, V1_in, V_out, opt_nb_plane)
20 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: V1_in
21 REAL(KIND=8),
DIMENSION(:,:,:),
ALLOCATABLE :: V_out
24 INTEGER,
OPTIONAL :: opt_nb_plane
25 INTEGER :: np, bloc_size, nb_field, &
26 m_max, m_max_c, rang, nb_procs, MPID, m_max_pad, N_r_pad
27 INTEGER(KIND=8) :: fftw_plan_multi_c2r
28 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:) :: cu
29 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
30 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:) :: dist_field, combined_field
31 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
32 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
35 #include "petsc/finclude/petsc.h" 36 mpi_comm :: communicator
38 CALL mpi_comm_size(communicator, nb_procs, code)
39 CALL mpi_comm_rank(communicator, rang, code)
42 nb_field =
SIZE(v1_in,2)
43 m_max_c =
SIZE(v1_in,3)
44 m_max = m_max_c*nb_procs
45 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN 46 CALL error_petsc(.OR.
'Bug in FFT_PAR_REAL: MOD(nb_field,2)/=0 m_max_c==0')
51 IF (modulo(np,nb_procs)==0)
THEN 52 bloc_size = np/nb_procs
54 CALL error_petsc(
'Bug in FFT_PAR_REAL: np is not a multiple of nb_procs')
57 IF (
PRESENT(opt_nb_plane))
THEN 58 IF (opt_nb_plane> 2*m_max-1)
THEN 59 m_max_pad = (opt_nb_plane+1)/2
68 ALLOCATE(cu(m_max_pad,nb_field/2, bloc_size))
69 ALLOCATE(dist_field(m_max_c,nb_field,np))
70 ALLOCATE(combined_field(m_max_c,nb_field,np))
73 dist_field(i,:,:) = transpose(v1_in(:,:,i))
76 longueur_tranche=bloc_size*m_max_c*nb_field
78 mpid=mpi_double_precision
79 CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
80 mpid, communicator, code)
85 shiftc = (nb-1)*bloc_size
86 shiftl = (nb-1)*m_max_c
93 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
94 -combined_field(:,2*nf,jindex),kind=8)/2
98 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
110 onembed(1)= m_max_pad
111 howmany = bloc_size*nb_field/2
117 IF (
ALLOCATED(v_out))
DEALLOCATE(v_out)
118 ALLOCATE(v_out(n_r_pad,nb_field/2,bloc_size))
120 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
121 onembed, ostride, odist, v_out, inembed, istride, idist, fftw_estimate)
122 CALL dfftw_execute(fftw_plan_multi_c2r)
124 DEALLOCATE(cu, dist_field, combined_field)
128 SUBROUTINE fft_par_cross_prod_bug(communicator,V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps)
135 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: V1_in, V2_in
136 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: V_out
137 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
138 INTEGER,
INTENT(IN) :: nb_procs, bloc_size, m_max_pad
140 INTEGER :: np, np_tot, nb_field, &
141 m_max, m_max_c, MPID, N_r_pad
142 INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
143 COMPLEX(KIND=8),
DIMENSION(m_max_pad,SIZE(V1_in,2),bloc_size) :: cu
144 COMPLEX(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
145 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2),bloc_size) :: ru
146 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
147 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate
148 REAL(KIND=8),
DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
149 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu, dist_prod_cu
150 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,3),bloc_size*nb_procs,SIZE(V1_in,2)/2) :: out_prod_cu
153 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
157 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
158 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
168 #include "petsc/finclude/petsc.h" 169 mpi_comm :: communicator
171 IF (
PRESENT(temps)) temps = 0.d0
173 nb_field=
SIZE(v1_in,2)
174 m_max_c =
SIZE(v1_in,3)
175 m_max = m_max_c*nb_procs
176 np_tot = nb_procs*bloc_size
178 n_r_pad=2*m_max_pad-1
180 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN 193 dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
194 dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
196 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
198 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
200 longueur_tranche=bloc_size*m_max_c*nb_field*2
203 mpid=mpi_double_precision
204 CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
205 mpid, communicator, code)
206 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
214 shiftc = (nb-1)*bloc_size
215 shiftl = (nb-1)*m_max_c
223 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
224 -combined_field(:,2*nf,jindex),kind=8)/2
228 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
234 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
237 fft_dim=1; istride=1; ostride=1;
241 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
242 odist=m_max_pad; onembed(1)=m_max_pad
245 howmany=bloc_size*nb_field
248 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
249 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
251 CALL dfftw_execute(fftw_plan_multi_c2r)
254 IF (nb_field==6)
THEN 255 prod_ru(:,1,:) = ru(:,2,:)*ru(:,6,:) - ru(:,3,:)*ru(:,5,:)
256 prod_ru(:,2,:) = ru(:,3,:)*ru(:,4,:) - ru(:,1,:)*ru(:,6,:)
257 prod_ru(:,3,:) = ru(:,1,:)*ru(:,5,:) - ru(:,2,:)*ru(:,4,:)
262 CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
263 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
265 CALL dfftw_execute(fftw_plan_multi_r2c)
268 prod_cu = prod_cu/n_r_pad
270 IF (
PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
275 combined_prod_cu(:,:,1)=prod_cu(1,:,:)
278 combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
281 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
284 longueur_tranche=bloc_size*m_max_c*nb_field
285 mpid=mpi_double_precision
286 CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
287 mpid,communicator,code)
288 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
293 shiftc = (nb-1)*bloc_size
294 shiftl = (nb-1)*m_max_c
295 intermediate = dist_prod_cu(:,:,shiftl+i)
297 IF (n+shiftc > np ) cycle
298 DO i_field = 1, nb_field/2
299 v_out(n+shiftc, i_field*2-1, i) =
REAL (intermediate(i_field,n),KIND=8)
300 v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
305 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
310 SUBROUTINE fft_par_cross_prod_dcl(communicator,V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps, padding)
317 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: V1_in, V2_in
318 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: V_out
319 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
320 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
321 INTEGER,
INTENT(IN) :: bloc_size, m_max_pad, nb_procs
322 INTEGER :: np, np_tot, nb_field, m_max, m_max_c, MPID, N_r_pad
323 INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
325 COMPLEX(KIND=8),
DIMENSION(m_max_pad, SIZE(V1_in,2), bloc_size) :: cu
326 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2),bloc_size) :: ru
327 COMPLEX(KIND=8),
DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
328 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
329 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate
330 REAL(KIND=8),
DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
331 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
332 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
335 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
339 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
340 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
343 #include "petsc/finclude/petsc.h" 344 mpi_comm :: communicator
346 IF (
PRESENT(temps)) temps = 0.d0
349 nb_field=
SIZE(v1_in,2)
350 m_max_c =
SIZE(v1_in,3)
351 m_max = m_max_c*nb_procs
352 n_r_pad=2*m_max_pad-1
353 np_tot = nb_procs*bloc_size
355 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN 373 dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
374 dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
376 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
378 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
380 longueur_tranche=bloc_size*m_max_c*nb_field*2
383 mpid=mpi_double_precision
384 CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
385 mpid, communicator, code)
386 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
394 shiftc = (nb-1)*bloc_size
395 shiftl = (nb-1)*m_max_c
403 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
404 -combined_field(:,2*nf,jindex),kind=8)/2
408 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
414 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
417 fft_dim=1; istride=1; ostride=1;
421 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
422 odist=m_max_pad; onembed(1)=m_max_pad
425 howmany=bloc_size*nb_field
429 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
430 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
432 CALL dfftw_execute(fftw_plan_multi_c2r)
435 IF (nb_field==6)
THEN 436 prod_ru(:,1,:) = ru(:,2,:)*ru(:,6,:) - ru(:,3,:)*ru(:,5,:)
437 prod_ru(:,2,:) = ru(:,3,:)*ru(:,4,:) - ru(:,1,:)*ru(:,6,:)
438 prod_ru(:,3,:) = ru(:,1,:)*ru(:,5,:) - ru(:,2,:)*ru(:,4,:)
443 CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
444 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
446 CALL dfftw_execute(fftw_plan_multi_r2c)
449 prod_cu = prod_cu/n_r_pad
451 IF (
PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
456 combined_prod_cu(:,:,1)=prod_cu(1,:,:)
459 combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
462 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
465 longueur_tranche=bloc_size*m_max_c*nb_field
466 mpid=mpi_double_precision
467 CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
468 mpid,communicator,code)
469 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
474 shiftc = (nb-1)*bloc_size
475 shiftl = (nb-1)*m_max_c
476 intermediate = dist_prod_cu(:,:,shiftl+i)
478 IF (n+shiftc > np ) cycle
479 DO i_field = 1, nb_field/2
480 v_out(n+shiftc, i_field*2-1, i) =
REAL (intermediate(i_field,n),KIND=8)
481 v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
486 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
492 bloc_size, m_max_pad,
l_g, opt_norm_out, opt_m_vel, opt_norm, temps, padding)
500 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: V1_in, V2_in
501 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: V_out
502 REAL(KIND=8),
DIMENSION(:,:),
OPTIONAL,
INTENT(OUT) :: opt_norm_out
503 REAL(KIND=8),
DIMENSION(:,:),
OPTIONAL,
INTENT(IN) :: opt_norm
504 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
505 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
506 INTEGER,
INTENT(IN) :: bloc_size, m_max_pad, nb_procs
507 REAL(KIND=8),
OPTIONAL,
INTENT(IN) :: opt_M_vel
508 INTEGER,
INTENT(IN) :: l_G
509 INTEGER,
INTENT(IN) :: pb
510 INTEGER :: np, np_tot, nb_field1, nb_field2, m_max, m_max_c, MPID, N_r_pad
511 INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
513 COMPLEX(KIND=8),
DIMENSION(m_max_pad, (SIZE(V1_in,2)+SIZE(V2_in,2))/2, bloc_size) :: cu
514 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,(SIZE(V1_in,2)+SIZE(V2_in,2))/2,bloc_size) :: ru
515 COMPLEX(KIND=8),
DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
516 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
517 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2, bloc_size) :: intermediate
518 REAL(KIND=8),
DIMENSION(2*m_max_pad-1, bloc_size) :: norm_grad_phi
519 REAL(KIND=8),
DIMENSION(2*m_max_pad-1, bloc_size) :: norm_vel
520 REAL(KIND=8),
DIMENSION(2*m_max_pad-1, bloc_size/l_G) :: norm_vel_int
522 REAL(KIND=8),
DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2)+SIZE(V2_in,2),bloc_size*nb_procs) :: dist_field, combined_field
523 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
524 COMPLEX(KIND=8),
DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
527 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code, l
531 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
532 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
535 #include "petsc/finclude/petsc.h" 536 mpi_comm :: communicator
538 IF (
PRESENT(temps)) temps = 0.d0
541 nb_field1=
SIZE(v1_in,2)
542 nb_field2=
SIZE(v2_in,2)
543 m_max_c =
SIZE(v1_in,3)
544 m_max = m_max_c*nb_procs
545 n_r_pad=2*m_max_pad-1
546 np_tot = nb_procs*bloc_size
548 IF (mod(nb_field1,2)/=0 .OR. mod(nb_field2,2)/=0 .OR. m_max_c==0)
THEN 566 dist_field(i,1:nb_field1,1:np) = transpose(v1_in(:,:,i))
567 dist_field(i,nb_field1+1:nb_field1+nb_field2,1:np) = transpose(v2_in(:,:,i))
569 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
571 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
573 longueur_tranche=bloc_size*m_max_c*(nb_field1+nb_field2)
576 mpid=mpi_double_precision
577 CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
578 mpid, communicator, code)
579 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
587 shiftc = (nb-1)*bloc_size
588 shiftl = (nb-1)*m_max_c
590 DO nf = 1, (nb_field1+nb_field2)/2
596 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
597 -combined_field(:,2*nf,jindex),kind=8)/2
601 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
607 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
610 fft_dim=1; istride=1; ostride=1;
614 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
615 odist=m_max_pad; onembed(1)=m_max_pad
618 howmany=bloc_size*(nb_field1+nb_field2)/2
621 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
622 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
624 CALL dfftw_execute(fftw_plan_multi_c2r)
628 IF (nb_field1 == 6 .AND. nb_field2 == 6)
THEN 629 norm_vel(:,:) = sqrt(ru(:,4,:)**2 + ru(:,5,:)**2 + ru(:,6,:)**2)
630 DO i = 1, 2*m_max_pad - 1
631 DO l = 1, bloc_size/l_g
632 x = maxval(norm_vel(i,(l-1)*l_g+1:l*l_g))
633 norm_vel_int(i,l) = x
636 DO l = 1, bloc_size/l_g
637 DO i = 2, 2*m_max_pad - 2
638 norm_vel(i,(l-1)*l_g+1:l*l_g) = maxval(norm_vel_int(i-1:i+1,l))
640 norm_vel(1,(l-1)*l_g+1:l*l_g) = max(norm_vel_int(1,l),norm_vel_int(2,l),norm_vel_int(2*m_max_pad - 1,l))
641 norm_vel(2*m_max_pad - 1,(l-1)*l_g+1:l*l_g) = &
642 max(norm_vel_int(2*m_max_pad - 2,l),norm_vel_int(2*m_max_pad - 1,l),norm_vel_int(1,l))
644 prod_ru(:,1,:) = (opt_m_vel - norm_vel(:,:))*ru(:,1,:)
645 prod_ru(:,2,:) = (opt_m_vel - norm_vel(:,:))*ru(:,2,:)
646 prod_ru(:,3,:) = (opt_m_vel - norm_vel(:,:))*ru(:,3,:)
648 opt_norm_out = norm_vel
651 IF (nb_field1 == 6 .AND. nb_field2 == 2)
THEN 652 norm_grad_phi(:,:) = sqrt(ru(:,1,:)**2 + ru(:,2,:)**2 + ru(:,3,:)**2) + 1.d-14
653 prod_ru(:,1,:) = opt_norm(:,:)*ru(:,4,:)*(1.d0 - ru(:,4,:))*ru(:,1,:)/norm_grad_phi(:,:)
654 prod_ru(:,2,:) = opt_norm(:,:)*ru(:,4,:)*(1.d0 - ru(:,4,:))*ru(:,2,:)/norm_grad_phi(:,:)
655 prod_ru(:,3,:) = opt_norm(:,:)*ru(:,4,:)*(1.d0 - ru(:,4,:))*ru(:,3,:)/norm_grad_phi(:,:)
658 CALL error_petsc(
'error in problem type while calling FFT_PAR_COMPRESSIVE_VISC_DCL ')
661 howmany = bloc_size*nb_field1/2
663 CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
664 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
666 CALL dfftw_execute(fftw_plan_multi_r2c)
669 prod_cu = prod_cu/n_r_pad
671 IF (
PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
676 combined_prod_cu(:,:,1)=prod_cu(1,:,:)
679 combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
682 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
684 longueur_tranche=bloc_size*m_max_c*nb_field1
687 mpid=mpi_double_precision
688 CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
689 mpid,communicator,code)
691 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
697 shiftc = (nb-1)*bloc_size
698 shiftl = (nb-1)*m_max_c
699 intermediate = dist_prod_cu(:,:,shiftl+i)
701 IF (n+shiftc > np ) cycle
702 DO i_field = 1, nb_field1/2
703 v_out(n+shiftc, i_field*2-1, i) =
REAL (intermediate(i_field,n),KIND=8)
704 v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
709 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
713 SUBROUTINE fft_par_dot_prod_dcl(communicator,V1_in, V2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
721 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: V1_in, V2_in
722 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: c_out
723 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
724 INTEGER,
INTENT(IN) :: nb_procs, bloc_size, m_max_pad
725 COMPLEX(KIND=8),
DIMENSION(m_max_pad, SIZE(V1_in,2), bloc_size) :: cu
726 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2),bloc_size) :: ru
727 COMPLEX(KIND=8),
DIMENSION(m_max_pad,bloc_size) :: prod_cu
728 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
729 COMPLEX(KIND=8),
DIMENSION(bloc_size) :: intermediate
730 REAL(KIND=8),
DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
731 COMPLEX(KIND=8),
DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
732 COMPLEX(KIND=8),
DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
734 INTEGER :: np, np_tot, nb_field, m_max, m_max_c, MPID, N_r_pad
735 INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
736 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
739 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
740 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
743 #include "petsc/finclude/petsc.h" 744 mpi_comm :: communicator
746 IF (
PRESENT(temps)) temps = 0.d0
749 nb_field=
SIZE(v1_in,2)
750 m_max_c =
SIZE(v1_in,3)
751 m_max = m_max_c*nb_procs
752 np_tot = nb_procs*bloc_size
753 n_r_pad=2*m_max_pad-1
755 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN 768 dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
769 dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
771 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
773 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
775 longueur_tranche=bloc_size*m_max_c*nb_field*2
778 mpid=mpi_double_precision
779 CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
780 mpid, communicator, code)
781 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
789 shiftc = (nb-1)*bloc_size
790 shiftl = (nb-1)*m_max_c
798 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
799 -combined_field(:,2*nf,jindex),kind=8)/2
803 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
809 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
812 fft_dim=1; istride=1; ostride=1;
816 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
817 odist=m_max_pad; onembed(1)=m_max_pad
820 howmany=bloc_size*nb_field
824 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
825 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
827 CALL dfftw_execute(fftw_plan_multi_c2r)
830 IF (nb_field==6)
THEN 831 prod_ru(:,:) = ru(:,1,:)*ru(:,4,:) + ru(:,2,:)*ru(:,5,:) + ru(:,3,:)*ru(:,6,:)
835 howmany = bloc_size*1
836 CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
837 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
839 CALL dfftw_execute(fftw_plan_multi_r2c)
842 prod_cu = prod_cu/n_r_pad
844 IF (
PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
848 combined_prod_cu(:,1)=prod_cu(1,:)
850 combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
853 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
856 longueur_tranche=bloc_size*m_max_c*2
857 mpid=mpi_double_precision
858 CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
859 mpid,communicator,code)
860 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
865 shiftc = (nb-1)*bloc_size
866 shiftl = (nb-1)*m_max_c
867 intermediate = dist_prod_cu(:,shiftl+i)
869 IF (n+shiftc > np ) cycle
870 c_out(n+shiftc, 1, i) =
REAL (intermediate(n),KIND=8)
871 c_out(n+shiftc, 2 , i) = aimag(intermediate(n))
875 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
879 SUBROUTINE fft_par_prod_dcl(communicator, c1_in, c2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
888 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: c1_in, c2_in
889 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: c_out
890 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
891 INTEGER,
INTENT(IN) :: nb_procs, bloc_size, m_max_pad
893 COMPLEX(KIND=8),
DIMENSION(m_max_pad, 2, bloc_size) :: cu
894 REAL(KIND=8),
DIMENSION(2*m_max_pad-1, 2, bloc_size) :: ru
895 COMPLEX(KIND=8),
DIMENSION(m_max_pad, bloc_size) :: prod_cu
896 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
897 REAL(KIND=8),
DIMENSION(SIZE(c1_in,3),4, bloc_size*nb_procs) :: dist_field, combined_field
898 COMPLEX(KIND=8),
DIMENSION(bloc_size,SIZE(c1_in,3)*nb_procs) :: dist_prod_cu, combined_prod_cu
899 COMPLEX(KIND=8),
DIMENSION(bloc_size) :: intermediate
901 INTEGER :: np, np_tot, m_max, m_max_c, MPID, N_r_pad
902 INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
903 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
906 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
907 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
910 #include "petsc/finclude/petsc.h" 911 mpi_comm :: communicator
913 IF (
PRESENT(temps)) temps = 0.d0
916 m_max_c =
SIZE(c1_in,3)
917 m_max = m_max_c*nb_procs
918 np_tot = nb_procs*bloc_size
919 n_r_pad=2*m_max_pad-1
933 dist_field(i,1:2,1:np) = transpose(c1_in(:,:,i))
934 dist_field(i,3:4,1:np) = transpose(c2_in(:,:,i))
936 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
938 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
940 longueur_tranche=bloc_size*m_max_c*4
943 mpid=mpi_double_precision
944 CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
945 mpid, communicator, code)
946 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
954 shiftc = (nb-1)*bloc_size
955 shiftl = (nb-1)*m_max_c
963 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
964 -combined_field(:,2*nf,jindex),kind=8)/2
968 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
974 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
977 fft_dim=1; istride=1; ostride=1;
981 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
982 odist=m_max_pad; onembed(1)=m_max_pad
988 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
989 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
990 CALL dfftw_execute(fftw_plan_multi_c2r)
993 prod_ru(:,:) = ru(:,1,:)*ru(:,2,:)
996 howmany = bloc_size*1
997 CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
998 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
999 CALL dfftw_execute(fftw_plan_multi_r2c)
1002 prod_cu = prod_cu/n_r_pad
1004 IF (
PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
1008 combined_prod_cu(:,1)=prod_cu(1,:)
1010 combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
1013 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1016 longueur_tranche=bloc_size*m_max_c*2
1017 mpid=mpi_double_precision
1018 CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1019 mpid,communicator,code)
1020 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1025 shiftc = (nb-1)*bloc_size
1026 shiftl = (nb-1)*m_max_c
1027 intermediate = dist_prod_cu(:,shiftl+i)
1029 IF (n+shiftc > np ) cycle
1030 c_out(n+shiftc, 1, i) =
REAL (intermediate(n),KIND=8)
1031 c_out(n+shiftc, 2 , i) = aimag(intermediate(n))
1035 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1039 SUBROUTINE fft_par_dot_prod_bis(communicator,V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps, padding)
1047 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: V1_in, V2_in
1048 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: V_out
1049 INTEGER,
INTENT(IN) :: bloc_size, m_max_pad, nb_procs
1050 COMPLEX(KIND=8),
DIMENSION(m_max_pad, SIZE(V1_in,2), bloc_size) :: cu
1051 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,SIZE(V1_in,2),bloc_size) :: ru
1052 COMPLEX(KIND=8),
DIMENSION(m_max_pad,bloc_size) :: prod_cu
1053 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
1054 COMPLEX(KIND=8),
DIMENSION(bloc_size) :: intermediate
1055 REAL(KIND=8),
DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
1056 COMPLEX(KIND=8),
DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
1057 COMPLEX(KIND=8),
DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
1058 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
1059 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
1060 INTEGER :: np, np_tot, nb_field, m_max, m_max_c, MPID, N_r_pad
1061 INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1063 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1067 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1068 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
1071 #include "petsc/finclude/petsc.h" 1072 mpi_comm :: communicator
1074 IF (
PRESENT(temps)) temps = 0.d0
1077 nb_field=
SIZE(v1_in,2)
1078 m_max_c =
SIZE(v1_in,3)
1079 m_max = m_max_c*nb_procs
1080 n_r_pad=2*m_max_pad-1
1081 np_tot = nb_procs*bloc_size
1083 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN 1101 dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
1102 dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
1104 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1106 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1108 longueur_tranche=bloc_size*m_max_c*nb_field*2
1111 mpid=mpi_double_precision
1112 CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1113 mpid, communicator, code)
1114 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1122 shiftc = (nb-1)*bloc_size
1123 shiftl = (nb-1)*m_max_c
1131 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
1132 -combined_field(:,2*nf,jindex),kind=8)/2
1136 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
1142 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1145 fft_dim=1; istride=1; ostride=1;
1149 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1150 odist=m_max_pad; onembed(1)=m_max_pad
1153 howmany=bloc_size*nb_field
1157 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1158 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1160 CALL dfftw_execute(fftw_plan_multi_c2r)
1163 IF (nb_field==6)
THEN 1164 prod_ru(:,:) = ru(:,1,:)*ru(:,4,:) + ru(:,2,:)*ru(:,5,:) + ru(:,3,:)*ru(:,6,:)
1169 CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
1170 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
1172 CALL dfftw_execute(fftw_plan_multi_r2c)
1175 prod_cu = prod_cu/n_r_pad
1177 IF (
PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
1182 combined_prod_cu(:,1)=prod_cu(1,:)
1185 combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
1188 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1191 longueur_tranche=bloc_size*m_max_c*2
1192 mpid=mpi_double_precision
1193 CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1194 mpid,communicator,code)
1195 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1200 shiftc = (nb-1)*bloc_size
1201 shiftl = (nb-1)*m_max_c
1202 intermediate = dist_prod_cu(:,shiftl+i)
1204 IF (n+shiftc > np ) cycle
1205 v_out(n+shiftc, 1, i) =
REAL (intermediate(n),KIND=8)
1206 v_out(n+shiftc, 2, i) = aimag(intermediate(n))
1210 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1224 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: V1_in, V2_in
1225 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: V_out
1226 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
1227 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
1229 LOGICAL,
SAVE :: once=.true.
1230 INTEGER,
SAVE :: np_ref, np, np_tot, bloc_size, nb_field, &
1231 m_max, m_max_c, N_r, rang, nb_procs, MPID, m_max_pad, N_r_pad
1232 INTEGER(KIND=8),
SAVE :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1233 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: cu, prod_cu
1234 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: ru, prod_ru
1238 INTEGER :: np_glob, np_loc, reste, np_alloc, nn, nb_bloc, n_sup, n_inf, n_up, n_cache
1239 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1241 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:) :: dist_field, combined_field
1242 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:) :: combined_prod_cu, dist_prod_cu, out_prod_cu
1245 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:) :: intermediate
1250 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1251 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
1261 #include "petsc/finclude/petsc.h" 1262 mpi_comm :: communicator
1264 CALL mpi_comm_size(communicator,nb_procs,code)
1265 CALL mpi_comm_rank(communicator,rang,code)
1267 IF (
PRESENT(temps)) temps = 0.d0
1270 IF (
SIZE(v1_in,1).NE.np_ref)
THEN 1272 np_ref =
SIZE(v1_in,1)
1279 np_glob =
SIZE(v1_in,1)
1280 m_max_c =
SIZE(v1_in,3)
1281 np_loc = n_cache/(12*m_max_c)
1282 nb_bloc = max(np_glob/np_loc,1)
1284 100 np_loc = np_glob/nb_bloc
1285 reste = np_glob - np_loc*nb_bloc
1286 np_alloc = np_loc + reste
1287 reste = np_alloc*nb_bloc - np_glob
1288 IF (reste>np_alloc)
THEN 1289 nb_bloc = nb_bloc - 1
1298 nb_field=
SIZE(v1_in,2)
1299 m_max_c =
SIZE(v1_in,3)
1300 m_max = m_max_c*nb_procs
1302 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN 1311 IF (modulo(np,nb_procs)==0)
THEN 1312 bloc_size = np/nb_procs
1314 bloc_size = np/nb_procs + 1
1316 np_tot = nb_procs*bloc_size
1322 IF (
PRESENT(padding))
THEN 1324 m_max_pad = 3*m_max/2
1326 WRITE(*,*)
' NO PADDING ' 1330 m_max_pad = 3*m_max/2
1332 n_r_pad=2*m_max_pad-1
1334 IF (
ALLOCATED(ru))
DEALLOCATE(ru,cu,prod_ru,prod_cu)
1335 ALLOCATE(cu(m_max_pad,nb_field,bloc_size))
1336 ALLOCATE(ru(n_r_pad, nb_field,bloc_size))
1337 ALLOCATE(prod_cu(m_max_pad,nb_field/2,bloc_size))
1338 ALLOCATE(prod_ru(n_r_pad, nb_field/2,bloc_size))
1340 ALLOCATE(intermediate(nb_field/2,bloc_size))
1341 ALLOCATE( dist_field(m_max_c,2*nb_field,np_tot))
1342 ALLOCATE(combined_field(m_max_c,2*nb_field,np_tot))
1343 ALLOCATE(dist_prod_cu(nb_field/2,bloc_size,m_max))
1344 ALLOCATE(combined_prod_cu(nb_field/2,bloc_size,m_max))
1345 ALLOCATE(out_prod_cu(m_max_c,np_tot,nb_field/2))
1356 IF (nn == nb_bloc)
THEN 1357 n_up = np_glob - n_inf + 1
1361 n_sup = n_inf + n_up - 1
1364 dist_field(i,1:nb_field,1:n_up) = transpose(v1_in(n_inf:n_sup,:,i))
1365 dist_field(i,nb_field+1:2*nb_field,1:n_up) = transpose(v2_in(n_inf:n_sup,:,i))
1367 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1369 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1371 longueur_tranche=bloc_size*m_max_c*nb_field*2
1374 mpid=mpi_double_precision
1375 CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1376 mpid, communicator, code)
1377 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1385 shiftc = (nb-1)*bloc_size
1386 shiftl = (nb-1)*m_max_c
1394 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
1395 -combined_field(:,2*nf,jindex),kind=8)/2
1399 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
1405 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1408 fft_dim=1; istride=1; ostride=1;
1412 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1413 odist=m_max_pad; onembed(1)=m_max_pad
1416 howmany=bloc_size*nb_field
1420 IF (once)
CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1421 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1423 CALL dfftw_execute(fftw_plan_multi_c2r)
1426 IF (nb_field==6)
THEN 1427 prod_ru(:,1,:) = ru(:,2,:)*ru(:,6,:) - ru(:,3,:)*ru(:,5,:)
1428 prod_ru(:,2,:) = ru(:,3,:)*ru(:,4,:) - ru(:,1,:)*ru(:,6,:)
1429 prod_ru(:,3,:) = ru(:,1,:)*ru(:,5,:) - ru(:,2,:)*ru(:,4,:)
1434 IF (once)
CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
1435 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
1437 CALL dfftw_execute(fftw_plan_multi_r2c)
1440 prod_cu = prod_cu/n_r_pad
1442 IF (
PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
1447 combined_prod_cu(:,:,1)=prod_cu(1,:,:)
1450 combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
1453 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1456 longueur_tranche=bloc_size*m_max_c*nb_field
1457 mpid=mpi_double_precision
1458 CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1459 mpid,communicator,code)
1460 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1465 shiftc = (nb-1)*bloc_size
1466 shiftl = (nb-1)*m_max_c
1467 intermediate = dist_prod_cu(:,:,shiftl+i)
1469 IF (n_inf-1+n+shiftc > np_glob ) cycle
1470 DO i_field = 1, nb_field/2
1471 v_out(n_inf-1+n+shiftc, i_field*2-1, i) =
REAL (intermediate(i_field,n),KIND=8)
1472 v_out(n_inf-1+n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
1477 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1483 DEALLOCATE(dist_field, combined_field, dist_prod_cu, combined_prod_cu, intermediate, out_prod_cu)
1487 SUBROUTINE fft_par_dot_prod(communicator,V1_in, V2_in, c_out, temps, padding)
1495 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: V1_in, V2_in
1496 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: c_out
1497 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
1498 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
1500 LOGICAL,
SAVE :: once=.true.
1501 INTEGER,
SAVE :: np_ref, np, np_tot, bloc_size, nb_field, &
1502 m_max, m_max_c, N_r, rang, nb_procs, MPID, m_max_pad, N_r_pad
1503 INTEGER(KIND=8),
SAVE :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1504 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: cu
1505 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: ru
1506 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: prod_cu
1507 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: prod_ru
1510 INTEGER :: np_glob, np_loc, reste, np_alloc, nn, nb_bloc, n_sup, n_inf, n_up, n_cache
1511 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1513 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:) :: dist_field, combined_field
1514 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:) :: combined_prod_cu, dist_prod_cu, out_prod_cu
1517 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:) :: intermediate
1521 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1522 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
1532 #include "petsc/finclude/petsc.h" 1533 mpi_comm :: communicator
1535 CALL mpi_comm_size(communicator,nb_procs,code)
1536 CALL mpi_comm_rank(communicator,rang,code)
1538 IF (
PRESENT(temps)) temps = 0.d0
1541 IF (
SIZE(v1_in,1).NE.np_ref)
THEN 1543 np_ref =
SIZE(v1_in,1)
1549 np_glob =
SIZE(v1_in,1)
1550 m_max_c =
SIZE(v1_in,3)
1551 np_loc = n_cache/(12*m_max_c)
1552 nb_bloc = max(np_glob/np_loc,1)
1554 100 np_loc = np_glob/nb_bloc
1555 reste = np_glob - np_loc*nb_bloc
1556 np_alloc = np_loc + reste
1557 reste = np_alloc*nb_bloc - np_glob
1558 IF (reste>np_alloc)
THEN 1559 nb_bloc = nb_bloc - 1
1568 nb_field=
SIZE(v1_in,2)
1569 m_max_c =
SIZE(v1_in,3)
1570 m_max = m_max_c*nb_procs
1572 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN 1581 IF (modulo(np,nb_procs)==0)
THEN 1582 bloc_size = np/nb_procs
1584 bloc_size = np/nb_procs + 1
1586 np_tot = nb_procs*bloc_size
1592 IF (
PRESENT(padding))
THEN 1594 m_max_pad = 3*m_max/2
1596 WRITE(*,*)
' NO PADDING ' 1600 m_max_pad = 3*m_max/2
1602 n_r_pad=2*m_max_pad-1
1604 IF (
ALLOCATED(ru))
DEALLOCATE(ru,cu,prod_ru,prod_cu)
1605 ALLOCATE(cu(m_max_pad,nb_field,bloc_size))
1606 ALLOCATE(ru(n_r_pad, nb_field,bloc_size))
1607 ALLOCATE(prod_cu(m_max_pad,bloc_size))
1608 ALLOCATE(prod_ru(n_r_pad, bloc_size))
1610 ALLOCATE(intermediate(bloc_size))
1611 ALLOCATE( dist_field(m_max_c,2*nb_field,np_tot))
1612 ALLOCATE(combined_field(m_max_c,2*nb_field,np_tot))
1613 ALLOCATE(dist_prod_cu(bloc_size,m_max))
1614 ALLOCATE(combined_prod_cu(bloc_size,m_max))
1615 ALLOCATE(out_prod_cu(m_max_c,np_tot))
1625 IF (nn == nb_bloc)
THEN 1626 n_up = np_glob - n_inf + 1
1630 n_sup = n_inf + n_up - 1
1633 dist_field(i,1:nb_field,1:n_up) = transpose(v1_in(n_inf:n_sup,:,i))
1634 dist_field(i,nb_field+1:2*nb_field,1:n_up) = transpose(v2_in(n_inf:n_sup,:,i))
1636 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1638 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1640 longueur_tranche=bloc_size*m_max_c*nb_field*2
1643 mpid=mpi_double_precision
1644 CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1645 mpid, communicator, code)
1646 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1654 shiftc = (nb-1)*bloc_size
1655 shiftl = (nb-1)*m_max_c
1663 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
1664 -combined_field(:,2*nf,jindex),kind=8)/2
1668 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
1674 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1677 fft_dim=1; istride=1; ostride=1;
1681 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1682 odist=m_max_pad; onembed(1)=m_max_pad
1685 howmany=bloc_size*nb_field
1689 IF (once)
CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1690 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1692 CALL dfftw_execute(fftw_plan_multi_c2r)
1695 IF (nb_field==6)
THEN 1696 prod_ru(:,:) = ru(:,1,:)*ru(:,4,:) + ru(:,2,:)*ru(:,5,:) + ru(:,3,:)*ru(:,6,:)
1700 howmany = bloc_size*1
1701 IF (once)
CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
1702 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
1704 CALL dfftw_execute(fftw_plan_multi_r2c)
1707 prod_cu = prod_cu/n_r_pad
1709 IF (
PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
1713 combined_prod_cu(:,1)=prod_cu(1,:)
1715 combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
1718 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1721 longueur_tranche=bloc_size*m_max_c*2
1722 mpid=mpi_double_precision
1723 CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1724 mpid,communicator,code)
1725 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1731 shiftc = (nb-1)*bloc_size
1732 shiftl = (nb-1)*m_max_c
1733 intermediate = dist_prod_cu(:,shiftl+i)
1735 IF (n_inf-1+n+shiftc > np_glob ) cycle
1736 c_out(n_inf-1+n+shiftc, 1, i) =
REAL (intermediate(n),KIND=8)
1737 c_out(n_inf-1+n+shiftc, 2 , i) = aimag(intermediate(n))
1741 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1747 DEALLOCATE(dist_field, combined_field, dist_prod_cu, combined_prod_cu, intermediate, out_prod_cu)
1751 SUBROUTINE fft_par_prod(communicator, c1_in, c2_in, c_out, temps, padding)
1759 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: c1_in, c2_in
1760 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: c_out
1761 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
1762 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
1764 LOGICAL,
SAVE :: once=.true.
1765 INTEGER,
SAVE :: np_ref, np, np_tot, bloc_size, &
1766 m_max, m_max_c, N_r, rang, nb_procs, MPID, m_max_pad, N_r_pad
1767 INTEGER(KIND=8),
SAVE :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1768 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: cu
1769 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: ru
1770 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: prod_cu
1771 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: prod_ru
1774 INTEGER :: np_glob, np_loc, reste, np_alloc, nn, nb_bloc, n_sup, n_inf, n_up, n_cache
1775 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1777 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:) :: dist_field, combined_field
1778 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:) :: combined_prod_cu, dist_prod_cu, out_prod_cu
1781 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:) :: intermediate
1785 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1786 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
1796 #include "petsc/finclude/petsc.h" 1797 mpi_comm :: communicator
1799 CALL mpi_comm_size(communicator,nb_procs,code)
1800 CALL mpi_comm_rank(communicator,rang,code)
1802 IF (
PRESENT(temps)) temps = 0.d0
1805 IF (
SIZE(c1_in,1).NE.np_ref)
THEN 1807 np_ref =
SIZE(c1_in,1)
1813 np_glob =
SIZE(c1_in,1)
1814 m_max_c =
SIZE(c1_in,3)
1815 np_loc = n_cache/(12*m_max_c)
1816 nb_bloc = max(np_glob/np_loc,1)
1818 100 np_loc = np_glob/nb_bloc
1819 reste = np_glob - np_loc*nb_bloc
1820 np_alloc = np_loc + reste
1821 reste = np_alloc*nb_bloc - np_glob
1822 IF (reste>np_alloc)
THEN 1823 nb_bloc = nb_bloc - 1
1832 m_max_c =
SIZE(c1_in,3)
1833 m_max = m_max_c*nb_procs
1835 IF (m_max_c==0)
THEN 1844 IF (modulo(np,nb_procs)==0)
THEN 1845 bloc_size = np/nb_procs
1847 bloc_size = np/nb_procs + 1
1849 np_tot = nb_procs*bloc_size
1855 IF (
PRESENT(padding))
THEN 1857 m_max_pad = 3*m_max/2
1859 WRITE(*,*)
' NO PADDING ' 1863 m_max_pad = 3*m_max/2
1865 n_r_pad=2*m_max_pad-1
1867 IF (
ALLOCATED(ru))
DEALLOCATE(ru,cu,prod_ru,prod_cu)
1868 ALLOCATE(cu(m_max_pad,2,bloc_size))
1869 ALLOCATE(ru(n_r_pad, 2,bloc_size))
1870 ALLOCATE(prod_cu(m_max_pad,bloc_size))
1871 ALLOCATE(prod_ru(n_r_pad, bloc_size))
1873 ALLOCATE(intermediate(bloc_size))
1874 ALLOCATE( dist_field(m_max_c,4,np_tot))
1875 ALLOCATE(combined_field(m_max_c,4,np_tot))
1876 ALLOCATE(dist_prod_cu(bloc_size,m_max))
1877 ALLOCATE(combined_prod_cu(bloc_size,m_max))
1878 ALLOCATE(out_prod_cu(m_max_c,np_tot))
1888 IF (nn == nb_bloc)
THEN 1889 n_up = np_glob - n_inf + 1
1893 n_sup = n_inf + n_up - 1
1896 dist_field(i,1:2,1:n_up) = transpose(c1_in(n_inf:n_sup,:,i))
1897 dist_field(i,3:4,1:n_up) = transpose(c2_in(n_inf:n_sup,:,i))
1899 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1901 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1903 longueur_tranche=bloc_size*m_max_c*4
1906 mpid=mpi_double_precision
1907 CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1908 mpid, communicator, code)
1909 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1917 shiftc = (nb-1)*bloc_size
1918 shiftl = (nb-1)*m_max_c
1926 cu(shiftl+1:shiftl+m_max_c,nf,n) = cmplx(combined_field(:,2*nf-1,jindex),&
1927 -combined_field(:,2*nf,jindex),kind=8)/2
1931 cu(1,:,:) = 2*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
1937 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1940 fft_dim=1; istride=1; ostride=1;
1944 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1945 odist=m_max_pad; onembed(1)=m_max_pad
1951 IF (once)
CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1952 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1953 CALL dfftw_execute(fftw_plan_multi_c2r)
1956 prod_ru(:,:) = ru(:,1,:)*ru(:,2,:)
1959 howmany = bloc_size*1
1960 IF (once)
CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
1961 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
1962 CALL dfftw_execute(fftw_plan_multi_r2c)
1965 prod_cu = prod_cu/n_r_pad
1967 IF (
PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
1971 combined_prod_cu(:,1)=prod_cu(1,:)
1973 combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
1976 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1979 longueur_tranche=bloc_size*m_max_c*2
1980 mpid=mpi_double_precision
1981 CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1982 mpid,communicator,code)
1983 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1988 shiftc = (nb-1)*bloc_size
1989 shiftl = (nb-1)*m_max_c
1990 intermediate = dist_prod_cu(:,shiftl+i)
1992 IF (n_inf-1+n+shiftc > np_glob ) cycle
1993 c_out(n_inf-1+n+shiftc, 1, i) =
REAL (intermediate(n),KIND=8)
1994 c_out(n_inf-1+n+shiftc, 2 , i) = aimag(intermediate(n))
1998 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2004 DEALLOCATE(dist_field, combined_field, dist_prod_cu, combined_prod_cu, intermediate, out_prod_cu)
2015 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: c_in
2016 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: c_out
2017 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
2018 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
2020 LOGICAL,
SAVE :: once=.true.
2021 INTEGER,
SAVE :: np_ref, np, np_tot, bloc_size, &
2022 m_max, m_max_c, N_r, rang, nb_procs, MPID, m_max_pad, N_r_pad
2023 INTEGER(KIND=8),
SAVE :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
2024 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: cu
2025 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: ru
2026 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: prod_cu
2027 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:),
SAVE :: prod_ru
2030 INTEGER :: np_glob, np_loc, reste, np_alloc, nn, nb_bloc, n_sup, n_inf, n_up, n_cache
2031 INTEGER :: nb, shiftc, shiftl, jindex, longueur_tranche, i, n, code
2033 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:) :: dist_field, combined_field
2034 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:) :: combined_prod_cu, dist_prod_cu
2037 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:) :: intermediate
2041 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
2042 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
2052 #include "petsc/finclude/petsc.h" 2053 mpi_comm :: communicator
2055 CALL mpi_comm_size(communicator,nb_procs,code)
2056 CALL mpi_comm_rank(communicator,rang,code)
2058 IF (
PRESENT(temps)) temps = 0.d0
2061 IF (
SIZE(c_in,1).NE.np_ref)
THEN 2063 np_ref =
SIZE(c_in,1)
2069 np_glob =
SIZE(c_in,1)
2070 m_max_c =
SIZE(c_in,3)
2071 np_loc = n_cache/(12*m_max_c)
2072 nb_bloc = max(np_glob/np_loc,1)
2074 100 np_loc = np_glob/nb_bloc
2075 reste = np_glob - np_loc*nb_bloc
2076 np_alloc = np_loc + reste
2077 reste = np_alloc*nb_bloc - np_glob
2078 IF (reste>np_alloc)
THEN 2079 nb_bloc = nb_bloc - 1
2088 m_max_c =
SIZE(c_in,3)
2089 m_max = m_max_c*nb_procs
2091 IF (m_max_c==0)
THEN 2100 IF (modulo(np,nb_procs)==0)
THEN 2101 bloc_size = np/nb_procs
2103 bloc_size = np/nb_procs + 1
2105 np_tot = nb_procs*bloc_size
2111 IF (
PRESENT(padding))
THEN 2113 m_max_pad = 3*m_max/2
2115 WRITE(*,*)
' NO PADDING ' 2119 m_max_pad = 3*m_max/2
2121 n_r_pad=2*m_max_pad-1
2123 IF (
ALLOCATED(ru))
DEALLOCATE(ru,cu,prod_ru,prod_cu)
2124 ALLOCATE(cu(m_max_pad,bloc_size))
2125 ALLOCATE(ru(n_r_pad, bloc_size))
2126 ALLOCATE(prod_cu(m_max_pad,bloc_size))
2127 ALLOCATE(prod_ru(n_r_pad, bloc_size))
2129 ALLOCATE(intermediate(bloc_size))
2130 ALLOCATE( dist_field(m_max_c,2,np_tot))
2131 ALLOCATE(combined_field(m_max_c,2,np_tot))
2132 ALLOCATE(dist_prod_cu(bloc_size,m_max))
2133 ALLOCATE(combined_prod_cu(bloc_size,m_max))
2144 IF (nn == nb_bloc)
THEN 2145 n_up = np_glob - n_inf + 1
2149 n_sup = n_inf + n_up - 1
2152 dist_field(i,:,1:n_up) = transpose(c_in(n_inf:n_sup,:,i))
2154 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2156 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
2158 longueur_tranche=bloc_size*m_max_c*2
2161 mpid=mpi_double_precision
2162 CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
2163 mpid, communicator, code)
2164 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
2172 shiftc = (nb-1)*bloc_size
2173 shiftl = (nb-1)*m_max_c
2178 cu(shiftl+1:shiftl+m_max_c,n) = cmplx(combined_field(:,1,jindex),&
2179 -combined_field(:,2,jindex),kind=8)/2
2182 cu(1,:) = 2*cmplx(
REAL(cu(1,:),KIND=8),0.d0,KIND=8)
2188 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2191 fft_dim=1; istride=1; ostride=1;
2195 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
2196 odist=m_max_pad; onembed(1)=m_max_pad
2201 IF (once)
CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2202 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate);
2203 CALL dfftw_execute(fftw_plan_multi_c2r)
2206 prod_ru = ru*(ru**2-1)
2210 IF (once)
CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
2211 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate);
2212 CALL dfftw_execute(fftw_plan_multi_r2c)
2215 prod_cu = prod_cu/n_r_pad
2217 IF (
PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
2221 combined_prod_cu(:,1)=prod_cu(1,:)
2223 combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
2226 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2229 longueur_tranche=bloc_size*m_max_c*2
2230 mpid=mpi_double_precision
2231 CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
2232 mpid,communicator,code)
2233 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
2238 shiftc = (nb-1)*bloc_size
2239 shiftl = (nb-1)*m_max_c
2240 intermediate = dist_prod_cu(:,shiftl+i)
2242 IF (n_inf-1+n+shiftc > np_glob ) cycle
2243 c_out(n_inf-1+n+shiftc, 1, i) =
REAL (intermediate(n),KIND=8)
2244 c_out(n_inf-1+n+shiftc, 2, i) = aimag(intermediate(n))
2248 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2254 DEALLOCATE(dist_field, combined_field, dist_prod_cu, combined_prod_cu, intermediate)
2259 SUBROUTINE ref(communicator,V1_in, V2_in, V_out, temps)
2267 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: V1_in, V2_in
2268 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: V_out
2269 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
2272 LOGICAL,
SAVE :: once=.true.
2273 INTEGER,
SAVE :: np, np_tot, bloc_size, nb_field, m_max, m_max_c, N_r, rang, nb_procs, MPID
2274 INTEGER(KIND=8),
SAVE :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
2275 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: cu, prod_cu
2276 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: ru, prod_ru
2280 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
2282 REAL(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: dist_field, combined_field
2283 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:,:),
SAVE :: combined_prod_cu, dist_prod_cu, out_prod_cu
2286 COMPLEX(KIND=8),
ALLOCATABLE,
DIMENSION(:,:) :: intermediate
2291 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
2292 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
2295 #include "petsc/finclude/petsc.h" 2296 mpi_comm :: communicator
2301 IF (
PRESENT(temps)) temps = 0.d0
2304 IF ((
SIZE(v1_in,1).NE.np) .OR. (
SIZE(v1_in,2).NE.nb_field) .OR. (
SIZE(v1_in,3).NE.m_max_c))
THEN 2310 CALL mpi_comm_size(communicator,nb_procs,code)
2311 CALL mpi_comm_rank(communicator,rang,code)
2314 nb_field=
SIZE(v1_in,2)
2315 m_max_c =
SIZE(v1_in,3)
2316 m_max = m_max_c*nb_procs
2318 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN 2326 IF (modulo(np,nb_procs)==0)
THEN 2327 bloc_size = np/nb_procs
2329 bloc_size = np/nb_procs + 1
2331 np_tot = nb_procs*bloc_size
2333 IF (
ALLOCATED(ru))
DEALLOCATE(ru,cu,prod_ru,prod_cu)
2334 ALLOCATE(cu(m_max,nb_field,bloc_size))
2335 ALLOCATE(ru(n_r, nb_field,bloc_size))
2336 ALLOCATE(prod_cu(m_max,nb_field/2,bloc_size))
2337 ALLOCATE(prod_ru(n_r, nb_field/2,bloc_size))
2342 ALLOCATE(intermediate(nb_field/2,bloc_size))
2343 ALLOCATE( dist_field(m_max_c,2*nb_field,np_tot))
2344 ALLOCATE(combined_field(m_max_c,2*nb_field,np_tot))
2345 ALLOCATE(dist_prod_cu(nb_field/2,bloc_size,m_max), combined_prod_cu(nb_field/2,bloc_size,m_max))
2346 ALLOCATE(out_prod_cu(m_max_c,np_tot,nb_field/2))
2358 dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
2359 dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
2367 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2369 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
2371 longueur_tranche=bloc_size*m_max_c*nb_field*2
2374 mpid=mpi_double_precision
2375 CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
2376 mpid, communicator, code)
2377 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
2383 shiftc = (nb-1)*bloc_size
2384 shiftl = (nb-1)*m_max_c
2394 cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),-combined_field(:,2*nf,jindex),kind=8)
2398 cu(1,:,:) = 2.d0*cmplx(
REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
2399 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2402 fft_dim=1; istride=1; ostride=1;
2403 idist=n_r; inembed(1)=n_r; dim(1)=n_r
2404 odist=m_max; onembed(1)=m_max
2405 IF (rang==(nb_procs-1))
THEN 2406 howmany= (np - bloc_size*(nb_procs-1))*nb_field
2408 howmany=bloc_size*nb_field
2412 IF (once)
CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2413 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate);
2414 CALL dfftw_execute(fftw_plan_multi_c2r)
2424 IF (nb_field==6)
THEN 2425 prod_ru(:,1,:) = ru(:,2,:)*ru(:,6,:) - ru(:,3,:)*ru(:,5,:)
2426 prod_ru(:,2,:) = ru(:,3,:)*ru(:,4,:) - ru(:,1,:)*ru(:,6,:)
2427 prod_ru(:,3,:) = ru(:,1,:)*ru(:,5,:) - ru(:,2,:)*ru(:,4,:)
2432 IF (once)
CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
2433 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate);
2434 CALL dfftw_execute(fftw_plan_multi_r2c)
2436 prod_cu = prod_cu/n_r
2438 IF (
PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
2443 combined_prod_cu(:,:,1)=prod_cu(1,:,:)
2446 combined_prod_cu(:,:,n)=2.d0*conjg(prod_cu(n,:,:))
2448 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2451 longueur_tranche=bloc_size*m_max_c*nb_field
2452 mpid=mpi_double_precision
2453 CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
2454 mpid,communicator,code)
2455 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
2462 DO i_field = 1, nb_field/2
2465 shiftc = (nb-1)*bloc_size
2466 shiftl = (nb-1)*m_max_c
2467 out_prod_cu(:,n+shiftc,i_field) = dist_prod_cu(i_field,n,shiftl+1:shiftl+m_max_c)
2472 DO i_field = 1, nb_field/2
2474 v_out(:, i_field*2-1, i) =
REAL(out_prod_cu(i, 1:np, i_field),KIND=8)
2475 v_out(:, i_field*2, i) = aimag(out_prod_cu(i, 1:np, i_field))
2481 shiftc = (nb-1)*bloc_size
2482 shiftl = (nb-1)*m_max_c
2483 intermediate = dist_prod_cu(:,:,shiftl+i)
2485 IF (n+shiftc > np ) cycle
2486 DO i_field = 1, nb_field/2
2487 v_out(n+shiftc, i_field*2-1, i) =
REAL (intermediate(i_field,n),KIND=8)
2488 v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
2497 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2499 DEALLOCATE(dist_field, combined_field, dist_prod_cu, combined_prod_cu, intermediate, out_prod_cu)
2500 IF (once) once = .false.
2503 SUBROUTINE fft_heaviside_dcl(communicator,V1_in, V_out, nb_procs, bloc_size, m_max_pad, temps, padding)
2510 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(IN) :: V1_in
2511 REAL(KIND=8),
DIMENSION(:,:,:),
INTENT(OUT) :: V_out
2512 REAL(KIND=8),
DIMENSION(:),
OPTIONAL,
INTENT(INOUT) :: temps
2513 LOGICAL,
OPTIONAL,
INTENT(IN) :: padding
2514 INTEGER,
INTENT(IN) :: bloc_size, m_max_pad, nb_procs
2515 INTEGER :: np, np_tot, nb_field, m_max, m_max_c, MPID, N_r_pad
2516 INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
2519 COMPLEX(KIND=8),
DIMENSION(m_max_pad, bloc_size) :: cu
2520 REAL(KIND=8),
DIMENSION(2*m_max_pad-1, bloc_size) :: ru
2521 COMPLEX(KIND=8),
DIMENSION(m_max_pad,bloc_size) :: prod_cu
2522 REAL(KIND=8),
DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
2523 COMPLEX(KIND=8),
DIMENSION(bloc_size) :: intermediate
2524 REAL(KIND=8),
DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
2525 COMPLEX(KIND=8),
DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
2526 COMPLEX(KIND=8),
DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
2528 INTEGER :: n1, n2, rank
2529 INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
2533 INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
2534 INTEGER,
DIMENSION(1) :: dim, inembed, onembed
2537 #include "petsc/finclude/petsc.h" 2538 mpi_comm :: communicator
2539 petscerrorcode :: ierr
2541 IF (
PRESENT(temps)) temps = 0.d0
2544 nb_field=
SIZE(v1_in,2)
2545 m_max_c =
SIZE(v1_in,3)
2546 m_max = m_max_c*nb_procs
2547 n_r_pad=2*m_max_pad-1
2548 np_tot = nb_procs*bloc_size
2550 IF (mod(nb_field,2)/=0 .OR. m_max_c==0)
THEN 2568 dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
2570 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2572 IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
2574 longueur_tranche=bloc_size*m_max_c*nb_field
2577 mpid=mpi_double_precision
2578 CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
2579 mpid, communicator, code)
2581 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
2589 shiftc = (nb-1)*bloc_size
2590 shiftl = (nb-1)*m_max_c
2597 cu(shiftl+1:shiftl+m_max_c,n) = cmplx(combined_field(:,1,jindex),&
2598 -combined_field(:,2,jindex),kind=8)/2
2601 cu(1,:) = 2*cmplx(
REAL(cu(1,:),KIND=8),0.d0,KIND=8)
2607 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2610 fft_dim=1; istride=1; ostride=1;
2614 idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
2615 odist=m_max_pad; onembed(1)=m_max_pad
2618 howmany=bloc_size*nb_field/2
2622 CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2623 onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
2625 CALL dfftw_execute(fftw_plan_multi_c2r)
2627 IF (nb_field==2)
THEN 2628 DO n1 = 1, 2*m_max_pad-1
2629 DO n2 = 1, bloc_size
2631 IF (ru(n1,n2) > 0.5)
THEN 2632 prod_ru(n1,n2) = 1.d0
2634 prod_ru(n1,n2) = 0.d0
2642 CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
2643 inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
2645 CALL dfftw_execute(fftw_plan_multi_r2c)
2648 prod_cu = prod_cu/n_r_pad
2650 IF (
PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
2654 combined_prod_cu(:,1)=prod_cu(1,:)
2657 combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
2660 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2663 longueur_tranche=bloc_size*m_max_c*nb_field
2664 mpid=mpi_double_precision
2665 CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
2666 mpid,communicator,code)
2667 IF (
PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
2672 shiftc = (nb-1)*bloc_size
2673 shiftl = (nb-1)*m_max_c
2674 intermediate = dist_prod_cu(:,shiftl+i)
2676 IF (n+shiftc > np ) cycle
2677 v_out(n+shiftc, 1, i) =
REAL (intermediate(n),KIND=8)
2678 v_out(n+shiftc, 2, i) = aimag(intermediate(n))
2682 IF (
PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
subroutine fft_par_dot_prod_bis(communicator, V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps, padding)
subroutine, public fft_par_allen_cahn(communicator, c_in, c_out, temps, padding)
subroutine fft_par_cross_prod_bug(communicator, V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_par_dot_prod(communicator, V1_in, V2_in, c_out, temps, padding)
subroutine, public fft_par_dot_prod_dcl(communicator, V1_in, V2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine error_petsc(string)
subroutine, public fft_heaviside_dcl(communicator, V1_in, V_out, nb_procs, bloc_size, m_max_pad, temps, padding)
subroutine, public ref(communicator, V1_in, V2_in, V_out, temps)
subroutine, public fft_par_compressive_visc_dcl(communicator, V1_in, V2_in, V_out, pb, nb_procs, bloc_size, m_max_pad, l_G, opt_norm_out, opt_M_vel, opt_norm, temps, padding)
subroutine, public fft_par_real(communicator, V1_in, V_out, opt_nb_plane)
subroutine, public fft_par_cross_prod_dcl(communicator, V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps, padding)
subroutine, public fft_par_prod_dcl(communicator, c1_in, c2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_par_prod(communicator, c1_in, c2_in, c_out, temps, padding)
subroutine, public fft_par_cross_prod(communicator, V1_in, V2_in, V_out, temps, padding)