SFEMaNS  version 5.3
Reference documentation for SFEMaNS
fft_parallel.f90
Go to the documentation of this file.
1 !
2 !Authors: Katarzyna Boronska, Jean-Luc Guermond, Copyrights 2007
3 !
5 #include "petsc/finclude/petsc.h"
6  USE petsc
7  IMPLICIT NONE
17  PRIVATE
18 !===
19 !Comments about the use of select_mode.
20 !The first mode must be 0 and all the other modes must be multiple of one non-zero integer
21 !If select_mode is used, the FFT does not care and computes the real values
22 !as if al the modes were contiguous. It does not matter since the values
23 !in the real spaces are computed at the same fake angles, but they are sent back
24 !in the fourier space on the correct fourier modes.
25 !===
26 CONTAINS
27  SUBROUTINE fft_par_real(communicator, V1_in, V_out, opt_nb_plane)
28  USE my_util
29  IMPLICIT NONE
30  include 'fftw3.f'
31  ! Format: V_1in(1:np,1:6,1:m_max_c)
32  ! INPUT ARE COSINE AND SINE COEFFICIENTS
33  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
34  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V1_in
35  REAL(KIND=8), DIMENSION(:,:,:), ALLOCATABLE :: V_out
36  ! FL-CN possible faute ??? 19/03/2013
37  !REAL(KIND=8), DIMENSION(:,:,:), POINTER :: V_out
38  INTEGER, OPTIONAL :: opt_nb_plane
39  INTEGER :: np, bloc_size, nb_field, &
40  m_max, m_max_c, rang, nb_procs, MPID, m_max_pad, N_r_pad
41  INTEGER(KIND=8) :: fftw_plan_multi_c2r
42  COMPLEX(KIND=8), ALLOCATABLE, DIMENSION(:,:,:) :: cu
43  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
44  REAL(KIND=8), ALLOCATABLE, DIMENSION(:,:,:) :: dist_field, combined_field
45  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
46  INTEGER, DIMENSION(1) :: dim, inembed, onembed
47  ! Recall complexes must be rescaled
48  ! End FFTW parameters
49 !#include "petsc/finclude/petsc.h"
50  mpi_comm :: communicator
51 
52  CALL mpi_comm_size(communicator, nb_procs, code)
53  CALL mpi_comm_rank(communicator, rang, code)
54 
55  np = SIZE(v1_in,1)
56  nb_field = SIZE(v1_in,2) ! Number of fields
57  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
58  m_max = m_max_c*nb_procs ! Number of comlex coefficients per point per processor
59  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
60  CALL error_petsc(.OR.'Bug in FFT_PAR_REAL: MOD(nb_field,2)/=0 m_max_c==0')
61  END IF
62 
63  !===Bloc_size is the number of points that are handled by one processor
64  !===once the Fourier modes are all collected on the processor
65  IF (modulo(np,nb_procs)==0) THEN
66  bloc_size = np/nb_procs
67  ELSE
68  bloc_size=1
69  CALL error_petsc('Bug in FFT_PAR_REAL: np is not a multiple of nb_procs')
70  END IF
71 
72  IF (PRESENT(opt_nb_plane)) THEN
73  IF (opt_nb_plane> 2*m_max-1) THEN
74  m_max_pad = (opt_nb_plane+1)/2
75  ELSE
76  m_max_pad = m_max
77  END IF
78  ELSE
79  m_max_pad = m_max
80  END IF
81  n_r_pad=2*m_max_pad-1
82 
83  ALLOCATE(cu(m_max_pad,nb_field/2, bloc_size))
84  ALLOCATE(dist_field(m_max_c,nb_field,np))
85  ALLOCATE(combined_field(m_max_c,nb_field,np))
86 
87  DO i = 1, m_max_c
88  dist_field(i,:,:) = transpose(v1_in(:,:,i))
89  END DO
90 
91  longueur_tranche=bloc_size*m_max_c*nb_field
92 
93  mpid=mpi_double_precision
94  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
95  mpid, communicator, code)
96 
97  cu = 0.d0
98  DO n = 1, bloc_size
99  DO nb = 1, nb_procs
100  shiftc = (nb-1)*bloc_size
101  shiftl = (nb-1)*m_max_c
102  jindex = n + shiftc
103  DO nf = 1, nb_field/2
104  !===Put real and imaginary parts in a complex
105  !===nf=1,2,3 => V1_in
106  !===INPUT ARE COSINE AND SINE COEFFICIENTS
107  !===THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
108  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
109  -combined_field(:,2*nf,jindex),kind=8)
110  END DO
111  END DO
112  END DO
113  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
114  !===Padding is done by initialization of cu: cu = 0
115  !===This is equivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
116 
117  !===Set the parameters for dfftw
118  fft_dim = 1
119  istride = 1
120  ostride = 1
121  idist = n_r_pad
122  inembed(1)= n_r_pad
123  dim(1) = n_r_pad
124  odist = m_max_pad
125  onembed(1)= m_max_pad
126  howmany = bloc_size*nb_field/2
127 
128  ! FL-CN possible faute ??? 19/03/2013
129  ! IF (ASSOCIATED(V_out)) NULLIFY(V_out)
130  ! IF (ASSOCIATED(V_out)) DEALLOCATE(V_out)
131  ! pb sur la ligne suivante
132  IF (ALLOCATED(v_out)) DEALLOCATE(v_out)
133  ALLOCATE(v_out(n_r_pad,nb_field/2,bloc_size))
134 
135  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
136  onembed, ostride, odist, v_out, inembed, istride, idist, fftw_estimate)
137  CALL dfftw_execute(fftw_plan_multi_c2r)
138 
139  ! clean up
140  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
141 
142  DEALLOCATE(cu, dist_field, combined_field)
143 
144  END SUBROUTINE fft_par_real
145 
146  SUBROUTINE fft_par_cross_prod_dcl(communicator,V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps)
147  !This a de-aliased version of the code, FEB 4, 2011, JLG
148  IMPLICIT NONE
149  include 'fftw3.f'
150  ! Format: V_1in(1:np,1:6,1:m_max_c)
151  ! INPUT ARE COSINE AND SINE COEFFICIENTS
152  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
153  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V1_in, V2_in
154  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: V_out
155  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
156  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
157  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, MPID, N_r_pad
158  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
159  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(V1_in,2), bloc_size) :: cu
160  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2),bloc_size) :: ru
161  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
162  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
163  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate
164  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
165  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
166  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
167  INTEGER :: i_field
168  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
169  REAL(KIND=8) :: t
170  ! FFTW parameters
171  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
172  INTEGER, DIMENSION(1) :: dim, inembed, onembed
173  ! Recall complexes must be rescaled
174  ! End FFTW parameters
175 !#include "petsc/finclude/petsc.h"
176  mpi_comm :: communicator
177 
178  IF (PRESENT(temps)) temps = 0.d0
179 
180  np = SIZE(v1_in,1)
181  nb_field= SIZE(v1_in,2) ! Number of fields
182  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
183  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
184  n_r_pad=2*m_max_pad-1
185  np_tot = nb_procs*bloc_size
186 
187  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
188  WRITE(*,*) ' BUG '
189  stop
190  END IF
191 
192  ! Bloc_size is the number of points that are handled by one processor
193  ! once the Fourier modes are all collected
194  ! Computation of bloc_size and np_tot
195  ! fin de la repartition des points
196 
197  ! Packing all 3 complex components of both v1 and v2 input fields
198  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
199  ! so that after distributing the data to the processes, each one will obtain a part
200  ! on nodal points
201  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
202  t = mpi_wtime()
203 
204  DO i = 1, m_max_c
205  dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
206  dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
207  END DO
208  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
209 
210  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
211 
212  longueur_tranche=bloc_size*m_max_c*nb_field*2
213 
214  t = mpi_wtime()
215  mpid=mpi_double_precision
216  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
217  mpid, communicator, code)
218  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
219 
220  t = mpi_wtime()
221  !JLG, FEB 4, 2011
222  cu = 0.d0
223  !JLG, FEB 4, 2011
224  DO n = 1, bloc_size
225  DO nb = 1, nb_procs
226  shiftc = (nb-1)*bloc_size
227  shiftl = (nb-1)*m_max_c
228  jindex = n + shiftc
229  DO nf = 1, nb_field
230  ! Put real and imaginary parts in a complex
231  ! nf=1,2,3 => V1_in
232  ! nf=4,5,6 => V2_in
233  ! INPUT ARE COSINE AND SINE COEFFICIENTS
234  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
235  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
236  -combined_field(:,2*nf,jindex),kind=8)
237  END DO
238  END DO
239  END DO
240  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
241  !JLG, FEB 4, 2011
242  !Padding is done by initialization of cu: cu = 0
243  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
244  !JLG, FEB 4, 2011
245 
246  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
247 
248  ! Set the parameters for dfftw
249  fft_dim=1; istride=1; ostride=1;
250  !JLG, FEB 4, 2011
251 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
252 !!$ odist=m_max; onembed(1)=m_max
253  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
254  odist=m_max_pad; onembed(1)=m_max_pad
255  !JLG, FEB 4, 2011
256 
257  howmany=bloc_size*nb_field
258 
259  t = mpi_wtime()
260  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
261  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
262  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
263  CALL dfftw_execute(fftw_plan_multi_c2r)
264 
265  ! clean up
266  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
267 
268  ! CROSS PRODDUCT
269  IF (nb_field==6) THEN
270  prod_ru(:,1,:) = ru(:,2,:)*ru(:,6,:) - ru(:,3,:)*ru(:,5,:)
271  prod_ru(:,2,:) = ru(:,3,:)*ru(:,4,:) - ru(:,1,:)*ru(:,6,:)
272  prod_ru(:,3,:) = ru(:,1,:)*ru(:,5,:) - ru(:,2,:)*ru(:,4,:)
273  END IF
274  ! CROSS PRODUCT
275 
276  howmany = howmany/2
277  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
278  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
279  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
280  CALL dfftw_execute(fftw_plan_multi_r2c)
281 
282  ! clean up
283  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
284 
285  !JLG, FEB 4, 2011
286 !!$ prod_cu = prod_cu/N_r !Scaling
287  prod_cu = (1.d0/n_r_pad)*prod_cu !Scaling
288  !JLG, FEB 4, 2011
289  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
290 
291  !Now we need to redistribute the Fourier coefficients on each processor
292 
293  t = mpi_wtime()
294  combined_prod_cu(:,:,1)=prod_cu(1,:,:)
295  DO n=2, m_max
296  !combined_prod_cu(:,:,n)=prod_cu(n,:,:)
297  combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
298  END DO
299 
300  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
301 
302  t = mpi_wtime()
303  longueur_tranche=bloc_size*m_max_c*nb_field
304  mpid=mpi_double_precision
305  CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
306  mpid,communicator,code)
307  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
308  ! dimensions:
309  t = mpi_wtime()
310  DO i = 1, m_max_c
311  DO nb = 1, nb_procs
312  shiftc = (nb-1)*bloc_size
313  shiftl = (nb-1)*m_max_c
314  intermediate = dist_prod_cu(:,:,shiftl+i)
315  DO n = 1, bloc_size
316  IF (n+shiftc > np ) cycle
317  DO i_field = 1, nb_field/2
318  v_out(n+shiftc, i_field*2-1, i) = REAL (intermediate(i_field,n),KIND=8)
319  v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
320  END DO
321  END DO
322  END DO
323  END DO
324 
325  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
326 
327  END SUBROUTINE fft_par_cross_prod_dcl
328 
329  SUBROUTINE fft_par_dot_prod_dcl(communicator,V1_in, V2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
330  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
331  !This a de-aliased version of the code, FEB 4, 2011, JLG
332  IMPLICIT NONE
333  include 'fftw3.f'
334  ! Format: V_1in(1:np,1:6,1:m_max_c)
335  ! INPUT ARE COSINE AND SINE COEFFICIENTS
336  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
337  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V1_in, V2_in
338  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: c_out
339  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
340  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
341  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(V1_in,2), bloc_size) :: cu
342  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2),bloc_size) :: ru
343  COMPLEX(KIND=8), DIMENSION(m_max_pad,bloc_size) :: prod_cu
344  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
345  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate
346  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
347  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
348  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
349  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, MPID, N_r_pad
350  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
351  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
352  REAL(KIND=8) :: t
353  ! FFTW parameters
354  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
355  INTEGER, DIMENSION(1) :: dim, inembed, onembed
356  ! Recall complexes must be rescaled
357  ! End FFTW parameters
358 !#include "petsc/finclude/petsc.h"
359  mpi_comm :: communicator
360 
361  IF (PRESENT(temps)) temps = 0.d0
362 
363  np = SIZE(v1_in,1)
364  nb_field= SIZE(v1_in,2) ! Number of fields
365  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
366  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
367  np_tot = nb_procs*bloc_size
368  n_r_pad=2*m_max_pad-1
369 
370  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
371  WRITE(*,*) ' BUG '
372  stop
373  END IF
374 
375  ! Packing all 3 complex components of both v1 and v2 input fields
376  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
377  ! so that after distributing the data to the processes, each one will obtain a part
378  ! on nodal points
379  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
380  t = mpi_wtime()
381 
382  DO i = 1, m_max_c
383  dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
384  dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
385  END DO
386  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
387 
388  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
389 
390  longueur_tranche=bloc_size*m_max_c*nb_field*2
391 
392  t = mpi_wtime()
393  mpid=mpi_double_precision
394  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
395  mpid, communicator, code)
396  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
397 
398  t = mpi_wtime()
399  !JLG, FEB 4, 2011
400  cu = 0.d0
401  !JLG, FEB 4, 2011
402  DO n = 1, bloc_size
403  DO nb = 1, nb_procs
404  shiftc = (nb-1)*bloc_size
405  shiftl = (nb-1)*m_max_c
406  jindex = n + shiftc
407  DO nf = 1, nb_field
408  ! Put real and imaginary parts in a complex
409  ! nf=1,2,3 => V1_in
410  ! nf=4,5,6 => V2_in
411  ! INPUT ARE COSINE AND SINE COEFFICIENTS
412  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
413  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
414  -combined_field(:,2*nf,jindex),kind=8)
415  END DO
416  END DO
417  END DO
418  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
419  !JLG, FEB 4, 2011
420  !Padding is done by initialization of cu: cu = 0
421  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
422  !JLG, FEB 4, 2011
423 
424  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
425 
426  ! Set the parameters for dfftw
427  fft_dim=1; istride=1; ostride=1;
428  !JLG, FEB 4, 2011
429 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
430 !!$ odist=m_max; onembed(1)=m_max
431  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
432  odist=m_max_pad; onembed(1)=m_max_pad
433  !JLG, FEB 4, 2011
434 
435  howmany=bloc_size*nb_field
436 
437 
438  t = mpi_wtime()
439  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
440  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
441  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
442  CALL dfftw_execute(fftw_plan_multi_c2r)
443 
444  ! clean up
445  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
446 
447  ! DOT PRODDUCT
448  IF (nb_field==6) THEN
449  prod_ru(:,:) = ru(:,1,:)*ru(:,4,:) + ru(:,2,:)*ru(:,5,:) + ru(:,3,:)*ru(:,6,:)
450  END IF
451  ! DOT PRODUCT
452 
453  howmany = bloc_size*1
454  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
455  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
456  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
457  CALL dfftw_execute(fftw_plan_multi_r2c)
458 
459  ! clean up
460  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
461 
462  !JLG, FEB 4, 2011
463 !!$ prod_cu = prod_cu/N_r !Scaling
464  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
465  !JLG, FEB 4, 2011
466  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
467 
468  !Now we need to redistribute the Fourier coefficients on each processor
469  t = mpi_wtime()
470  combined_prod_cu(:,1)=prod_cu(1,:)
471  DO n=2, m_max
472  combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
473  END DO
474 
475  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
476 
477  t = mpi_wtime()
478  longueur_tranche=bloc_size*m_max_c*2
479  mpid=mpi_double_precision
480  CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
481  mpid,communicator,code)
482  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
483 
484  t = mpi_wtime()
485  DO i = 1, m_max_c
486  DO nb = 1, nb_procs
487  shiftc = (nb-1)*bloc_size
488  shiftl = (nb-1)*m_max_c
489  intermediate = dist_prod_cu(:,shiftl+i)
490  DO n = 1, bloc_size
491  IF (n+shiftc > np ) cycle
492  c_out(n+shiftc, 1, i) = REAL (intermediate(n),KIND=8)
493  c_out(n+shiftc, 2 , i) = aimag(intermediate(n))
494  END DO
495  END DO
496  END DO
497  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
498 
499  END SUBROUTINE fft_par_dot_prod_dcl
500 
501  SUBROUTINE fft_par_prod_dcl(communicator, c1_in, c2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
502  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
503  !This a de-aliased version of the code, FEB 4, 2011, JLG
504  USE my_util
505  IMPLICIT NONE
506  include 'fftw3.f'
507  ! Format: c_1in(1:np,1:2,1:m_max_c)
508  ! INPUT ARE COSINE AND SINE COEFFICIENTS
509  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
510  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: c1_in, c2_in
511  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: c_out
512  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
513  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
514  COMPLEX(KIND=8), DIMENSION(m_max_pad, 2, bloc_size) :: cu
515  REAL(KIND=8), DIMENSION(2*m_max_pad-1, 2, bloc_size) :: ru
516  COMPLEX(KIND=8), DIMENSION(m_max_pad, bloc_size) :: prod_cu
517  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
518  REAL(KIND=8), DIMENSION(SIZE(c1_in,3),4, bloc_size*nb_procs) :: dist_field, combined_field
519  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(c1_in,3)*nb_procs) :: dist_prod_cu, combined_prod_cu
520  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate
521  INTEGER :: np, np_tot, m_max, m_max_c, MPID, N_r_pad
522  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
523  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
524  REAL(KIND=8) :: t
525  ! FFTW parameters
526  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
527  INTEGER, DIMENSION(1) :: dim, inembed, onembed
528  ! Recall complexes must be rescaled
529  ! End FFTW parameters
530 !#include "petsc/finclude/petsc.h"
531  mpi_comm :: communicator
532 
533  IF (PRESENT(temps)) temps = 0.d0
534 
535  np = SIZE(c1_in,1)
536  m_max_c = SIZE(c1_in,3) ! Number of complex (cosines + sines) coefficients per point
537  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
538  np_tot = nb_procs*bloc_size
539  n_r_pad=2*m_max_pad-1
540 
541  IF (m_max_c==0) THEN
542  WRITE(*,*) ' BUG '
543  stop
544  END IF
545 
546  ! Packing all 3 complex components of both v1 and v2 input fields
547  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
548  ! so that after distributing the data to the processes, each one will obtain a part
549  ! on nodal points
550  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
551  t = mpi_wtime()
552  DO i = 1, m_max_c
553  dist_field(i,1:2,1:np) = transpose(c1_in(:,:,i))
554  dist_field(i,3:4,1:np) = transpose(c2_in(:,:,i))
555  END DO
556  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
557 
558  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
559 
560  longueur_tranche=bloc_size*m_max_c*4
561 
562  t = mpi_wtime()
563  mpid=mpi_double_precision
564  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
565  mpid, communicator, code)
566  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
567 
568  t = mpi_wtime()
569  !JLG, FEB 4, 2011
570  cu = 0.d0
571  !JLG, FEB 4, 2011
572  DO n = 1, bloc_size
573  DO nb = 1, nb_procs
574  shiftc = (nb-1)*bloc_size
575  shiftl = (nb-1)*m_max_c
576  jindex = n + shiftc
577  DO nf = 1, 2
578  ! Put real and imaginary parts in a complex
579  ! nf=1 => c1_in
580  ! nf=2 => c2_in
581  ! INPUT ARE COSINE AND SINE COEFFICIENTS
582  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
583  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
584  -combined_field(:,2*nf,jindex),kind=8)
585  END DO
586  END DO
587  END DO
588  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
589  !JLG, FEB 4, 2011
590  !Padding is done by initialization of cu: cu = 0
591  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
592  !JLG, FEB 4, 2011
593 
594  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
595 
596  ! Set the parameters for dfftw
597  fft_dim=1; istride=1; ostride=1;
598  !JLG, FEB 4, 2011
599 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
600 !!$ odist=m_max; onembed(1)=m_max
601  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
602  odist=m_max_pad; onembed(1)=m_max_pad
603  !JLG, FEB 4, 2011
604 
605  howmany=bloc_size*2
606 
607  t = mpi_wtime()
608  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
609  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
610  CALL dfftw_execute(fftw_plan_multi_c2r)
611 
612  ! clean up
613  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
614 
615  !PRODDUCT
616  prod_ru(:,:) = ru(:,1,:)*ru(:,2,:)
617  !PRODUCT
618 
619  howmany = bloc_size*1
620  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
621  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
622  CALL dfftw_execute(fftw_plan_multi_r2c)
623 
624  ! clean up
625  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
626 
627  !JLG, FEB 4, 2011
628 !!$ prod_cu = prod_cu/N_r !Scaling
629  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
630  !JLG, FEB 4, 2011
631  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
632 
633  !Now we need to redistribute the Fourier coefficients on each processor
634  t = mpi_wtime()
635  combined_prod_cu(:,1)=prod_cu(1,:)
636  DO n=2, m_max
637  combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
638  END DO
639 
640  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
641 
642  t = mpi_wtime()
643  longueur_tranche=bloc_size*m_max_c*2
644  mpid=mpi_double_precision
645  CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
646  mpid,communicator,code)
647  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
648 
649  t = mpi_wtime()
650  DO i = 1, m_max_c
651  DO nb = 1, nb_procs
652  shiftc = (nb-1)*bloc_size
653  shiftl = (nb-1)*m_max_c
654  intermediate = dist_prod_cu(:,shiftl+i)
655  DO n = 1, bloc_size
656  IF (n+shiftc > np ) cycle
657  c_out(n+shiftc, 1, i) = REAL (intermediate(n),KIND=8)
658  c_out(n+shiftc, 2 , i) = aimag(intermediate(n))
659  END DO
660  END DO
661  END DO
662  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
663 
664  END SUBROUTINE fft_par_prod_dcl
665 
666  SUBROUTINE fft_heaviside_dcl(communicator, V1_in, values, V_out, nb_procs, bloc_size, &
667  m_max_pad, coeff_tanh, temps)
668  !This a de-aliased version of the code, FEB 4, 2011, JLG
669  IMPLICIT NONE
670  include 'fftw3.f'
671  ! Format: V_1in(1:np,1:6,1:m_max_c)
672  ! INPUT ARE COSINE AND SINE COEFFICIENTS
673  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
674  REAL(KIND=8), DIMENSION(:,:,:,:),INTENT(IN) :: V1_in
675  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: values
676  REAL(KIND=8), INTENT(IN) :: coeff_tanh
677  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: V_out
678  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
679  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
680  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, MPID, N_r_pad
681  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
682  COMPLEX(KIND=8), DIMENSION(m_max_pad, bloc_size) :: cu
683  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size) :: ru
684  COMPLEX(KIND=8), DIMENSION(m_max_pad,bloc_size) :: prod_cu
685  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
686  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate
687  REAL(KIND=8), DIMENSION(SIZE(V1_in,4),SIZE(V1_in,3),bloc_size*nb_procs) :: dist_field, combined_field
688  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,4)*nb_procs) :: combined_prod_cu
689  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,4)*nb_procs) :: dist_prod_cu
690  REAL(KIND=8), DIMENSION(SIZE(values),2*m_max_pad-1, bloc_size) :: V1_real_loc
691  INTEGER :: n1, n2
692  INTEGER :: nb, shiftc, shiftl, jindex, longueur_tranche, i, n, code, interface_nb
693  REAL(KIND=8) :: t
694  ! FFTW parameters
695  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
696  INTEGER, DIMENSION(1) :: dim, inembed, onembed
697  ! Recall complexes must be rescaled
698  ! End FFTW parameters
699 !#include "petsc/finclude/petsc.h"
700  mpi_comm :: communicator
701 
702  IF (PRESENT(temps)) temps = 0.d0
703 
704  np = SIZE(v1_in,2)
705  nb_field= SIZE(v1_in,3) ! Number of fields
706  m_max_c = SIZE(v1_in,4) ! Number of complex (cosines + sines) coefficients per point
707  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
708  n_r_pad=2*m_max_pad-1
709  np_tot = nb_procs*bloc_size
710 
711  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
712  WRITE(*,*) ' BUG '
713  stop
714  END IF
715 
716  ! Bloc_size is the number of points that are handled by one processor
717  ! once the Fourier modes are all collected
718  ! Computation of bloc_size and np_tot
719  ! fin de la repartition des points
720 
721  ! Packing all 3 complex components of both v1 and v2 input fields
722  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
723  ! so that after distributing the data to the processes, each one will obtain a part
724  ! on nodal points
725  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
726  t = mpi_wtime()
727 
728  DO interface_nb = 1, SIZE(values)-1
729  dist_field = 0.d0
730  DO i = 1, m_max_c
731  dist_field(i,1:nb_field,1:np) = transpose(v1_in(interface_nb,:,:,i))
732  END DO
733  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
734 
735  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
736 
737  longueur_tranche=bloc_size*m_max_c*nb_field
738 
739  t = mpi_wtime()
740  mpid=mpi_double_precision
741  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
742  mpid, communicator, code)
743 
744  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
745 
746  t = mpi_wtime()
747  !JLG, FEB 4, 2011
748  cu = 0.d0
749  !JLG, FEB 4, 2011
750  DO n = 1, bloc_size
751  DO nb = 1, nb_procs
752  shiftc = (nb-1)*bloc_size
753  shiftl = (nb-1)*m_max_c
754  jindex = n + shiftc
755  ! Put real and imaginary parts in a complex
756  ! nf=1,2,3 => V1_in
757  ! nf=4,5,6 => V2_in
758  ! INPUT ARE COSINE AND SINE COEFFICIENTS
759  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
760  cu(shiftl+1:shiftl+m_max_c,n) = 0.5d0*cmplx(combined_field(:,1,jindex),&
761  -combined_field(:,2,jindex),kind=8)
762  END DO
763  END DO
764  cu(1,:) = 2*cmplx(REAL(cu(1,:),KIND=8),0.d0,KIND=8)
765 
766 
767  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
768 
769  ! Set the parameters for dfftw
770  fft_dim=1; istride=1; ostride=1;
771  !JLG, FEB 4, 2011
772 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
773 !!$ odist=m_max; onembed(1)=m_max
774  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
775  odist=m_max_pad; onembed(1)=m_max_pad
776  !JLG, FEB 4, 2011
777 
778  howmany=bloc_size*nb_field/2
779 
780  t = mpi_wtime()
781  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
782  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
783  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
784  CALL dfftw_execute(fftw_plan_multi_c2r)
785 
786  ! clean up
787  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
788  v1_real_loc(interface_nb,:,:)=ru
789  END DO
790 
791  ! CROSS PRODDUCT
792  IF (nb_field==2) THEN
793  DO n1 = 1, 2*m_max_pad-1
794  DO n2 = 1, bloc_size
795  prod_ru(n1,n2) = values(1)
796  DO interface_nb = 1, SIZE(values)-1
797  !reconstruction convex
798  prod_ru(n1,n2) = prod_ru(n1,n2)*(1.d0-regul(v1_real_loc(interface_nb,n1,n2),coeff_tanh)) + &
799  values(interface_nb+1)*regul(v1_real_loc(interface_nb,n1,n2),coeff_tanh)
800  END DO
801  END DO
802  END DO
803  END IF
804  ! CROSS PRODUCT
805 
806  howmany = howmany
807  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
808  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
809  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
810  CALL dfftw_execute(fftw_plan_multi_r2c)
811 
812  ! clean up
813  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
814 
815  !JLG, FEB 4, 2011
816 !!$ prod_cu = prod_cu/N_r !Scaling
817  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
818  !JLG, FEB 4, 2011
819  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
820  !Now we need to redistribute the Fourier coefficients on each processor
821 
822  t = mpi_wtime()
823  combined_prod_cu(:,1)=prod_cu(1,:)
824  DO n=2, m_max
825  !combined_prod_cu(:,:,n)=prod_cu(n,:,:)
826  combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
827  END DO
828 
829  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
830 
831  t = mpi_wtime()
832  longueur_tranche=bloc_size*m_max_c*nb_field
833  mpid=mpi_double_precision
834  CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
835  mpid,communicator,code)
836  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
837  ! dimensions:
838  t = mpi_wtime()
839  DO i = 1, m_max_c
840  DO nb = 1, nb_procs
841  shiftc = (nb-1)*bloc_size
842  shiftl = (nb-1)*m_max_c
843  intermediate = dist_prod_cu(:,shiftl+i)
844  DO n = 1, bloc_size
845  IF (n+shiftc > np ) cycle
846  v_out(n+shiftc, 1, i) = REAL (intermediate(n),KIND=8)
847  v_out(n+shiftc, 2, i) = aimag(intermediate(n))
848  END DO
849  END DO
850  END DO
851  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
852 
853  END SUBROUTINE fft_heaviside_dcl
854 
855  SUBROUTINE fft_scalar_vect_dcl(communicator,V1_in, V2_in, V_out, pb, nb_procs, &
856  bloc_size, m_max_pad, temps)
857  !This a de-aliased version of the code, FEB 4, 2011, JLG
858  USE my_util
859  IMPLICIT NONE
860  include 'fftw3.f'
861  ! Format: V_1in(1:np,1:6,1:m_max_c)
862  ! INPUT ARE COSINE AND SINE COEFFICIENTS
863  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
864  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V1_in ! VECTOR
865  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V2_in ! SCALAR
866  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: V_out
867  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
868  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
869  INTEGER, INTENT(IN) :: pb
870  INTEGER :: np, np_tot, nb_field1, nb_field2, m_max, m_max_c, MPID, N_r_pad
871  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
872  COMPLEX(KIND=8), DIMENSION(m_max_pad, (SIZE(V1_in,2)+SIZE(V2_in,2))/2, bloc_size) :: cu
873  REAL(KIND=8), DIMENSION(2*m_max_pad-1,(SIZE(V1_in,2)+SIZE(V2_in,2))/2,bloc_size) :: ru
874  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
875  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
876  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2, bloc_size) :: intermediate
877  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2)+SIZE(V2_in,2),bloc_size*nb_procs) :: dist_field
878  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2)+SIZE(V2_in,2),bloc_size*nb_procs) :: combined_field
879  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
880  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
881  INTEGER :: i_field
882  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
883  REAL(KIND=8) :: t
884  ! FFTW parameters
885  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
886  INTEGER, DIMENSION(1) :: dim, inembed, onembed
887  ! Recall complexes must be rescaled
888  ! End FFTW parameters
889 !#include "petsc/finclude/petsc.h"
890  mpi_comm :: communicator
891 
892  IF (PRESENT(temps)) temps = 0.d0
893 
894  np = SIZE(v1_in,1)
895  nb_field1= SIZE(v1_in,2) ! Number of fields
896  nb_field2= SIZE(v2_in,2) ! Number of fields
897  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
898  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
899  n_r_pad=2*m_max_pad-1
900  np_tot = nb_procs*bloc_size
901 
902  IF (mod(nb_field1,2)/=0 .OR. mod(nb_field2,2)/=0 .OR. m_max_c==0) THEN
903  WRITE(*,*) ' BUG '
904  stop
905  END IF
906 
907  ! Bloc_size is the number of points that are handled by one processor
908  ! once the Fourier modes are all collected
909  ! Computation of bloc_size and np_tot
910  ! fin de la repartition des points
911 
912  ! Packing all 3 complex components of both v1 and v2 input fields
913  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
914  ! so that after distributing the data to the processes, each one will obtain a part
915  ! on nodal points
916  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
917  t = mpi_wtime()
918 
919  DO i = 1, m_max_c
920  dist_field(i,1:nb_field1,1:np) = transpose(v1_in(:,:,i))
921  dist_field(i,nb_field1+1:nb_field1+nb_field2,1:np) = transpose(v2_in(:,:,i))
922  END DO
923  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
924 
925  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
926 
927  longueur_tranche=bloc_size*m_max_c*(nb_field1+nb_field2)
928 
929  t = mpi_wtime()
930  mpid=mpi_double_precision
931  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
932  mpid, communicator, code)
933  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
934 
935  t = mpi_wtime()
936  !JLG, FEB 4, 2011
937  cu = 0.d0
938  !JLG, FEB 4, 2011
939  DO n = 1, bloc_size
940  DO nb = 1, nb_procs
941  shiftc = (nb-1)*bloc_size
942  shiftl = (nb-1)*m_max_c
943  jindex = n + shiftc
944  DO nf = 1, (nb_field1+nb_field2)/2
945  ! Put real and imaginary parts in a complex
946  ! nf=1,2,3 => V1_in
947  ! nf=4 => V2_in
948  ! INPUT ARE COSINE AND SINE COEFFICIENTS
949  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
950  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
951  -combined_field(:,2*nf,jindex),kind=8)
952  END DO
953  END DO
954  END DO
955  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
956  !JLG, FEB 4, 2011
957  !Padding is done by initialization of cu: cu = 0
958  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
959  !JLG, FEB 4, 2011
960 
961  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
962 
963  ! Set the parameters for dfftw
964  fft_dim=1; istride=1; ostride=1;
965  !JLG, FEB 4, 2011
966 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
967 !!$ odist=m_max; onembed(1)=m_max
968  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
969  odist=m_max_pad; onembed(1)=m_max_pad
970  !JLG, FEB 4, 2011
971 
972  howmany=bloc_size*(nb_field1+nb_field2)/2
973 
974  t = mpi_wtime()
975  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
976  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
977  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
978  CALL dfftw_execute(fftw_plan_multi_c2r)
979 
980  ! clean up
981  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
982 
983  IF (nb_field1 == 6 .AND. nb_field2 == 2) THEN
984  IF (pb==1) THEN
985  prod_ru(:,1,:) = ru(:,4,:)*ru(:,1,:)
986  prod_ru(:,2,:) = ru(:,4,:)*ru(:,2,:)
987  prod_ru(:,3,:) = ru(:,4,:)*ru(:,3,:)
988  ELSE IF (pb==2) THEN
989  prod_ru(:,1,:) = ru(:,1,:)/ru(:,4,:)
990  prod_ru(:,2,:) = ru(:,2,:)/ru(:,4,:)
991  prod_ru(:,3,:) = ru(:,3,:)/ru(:,4,:)
992  ELSE
993  CALL error_petsc('error in problem type while calling FFT_SCALAR_VECT_DCL ')
994  END IF
995  ELSE
996  CALL error_petsc('error in dimension of INPUT variables while calling FFT_SCALAR_VECT_DCL ')
997  END IF
998 
999  howmany = bloc_size*nb_field1/2
1000 
1001  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
1002  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
1003  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
1004  CALL dfftw_execute(fftw_plan_multi_r2c)
1005  ! clean up
1006  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
1007 
1008 
1009 
1010  !JLG, FEB 4, 2011
1011 !!$ prod_cu = prod_cu/N_r !Scaling
1012  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
1013  !JLG, FEB 4, 2011
1014  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
1015 
1016  !Now we need to redistribute the Fourier coefficients on each processor
1017 
1018  t = mpi_wtime()
1019  combined_prod_cu(:,:,1)=prod_cu(1,:,:)
1020  DO n=2, m_max
1021  !combined_prod_cu(:,:,n)=prod_cu(n,:,:)
1022  combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
1023  END DO
1024 
1025  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1026 
1027  longueur_tranche=bloc_size*m_max_c*nb_field1
1028 
1029  t = mpi_wtime()
1030  mpid=mpi_double_precision
1031  CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1032  mpid,communicator,code)
1033 
1034  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1035  ! dimensions:
1036  t = mpi_wtime()
1037 
1038  DO i = 1, m_max_c
1039  DO nb = 1, nb_procs
1040  shiftc = (nb-1)*bloc_size
1041  shiftl = (nb-1)*m_max_c
1042  intermediate = dist_prod_cu(:,:,shiftl+i)
1043  DO n = 1, bloc_size
1044  IF (n+shiftc > np ) cycle
1045  DO i_field = 1, nb_field1/2
1046  v_out(n+shiftc, i_field*2-1, i) = REAL (intermediate(i_field,n),KIND=8)
1047  v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
1048  END DO
1049  END DO
1050  END DO
1051  END DO
1052  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1053 
1054  END SUBROUTINE fft_scalar_vect_dcl
1055 
1056  SUBROUTINE fft_scalar_vect_no_overshoot(communicator, scalar_bounds, V1_in, V2_in, V_out, pb, nb_procs, &
1057  bloc_size, m_max_pad, temps)
1058  !This a de-aliased version of the code, FEB 4, 2011, JLG
1059  USE my_util
1060  IMPLICIT NONE
1061  include 'fftw3.f'
1062  ! Format: V_1in(1:np,1:6,1:m_max_c)
1063  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1064  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1065  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V1_in ! VECTOR
1066  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V2_in ! SCALAR
1067  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: scalar_bounds
1068  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: V_out
1069  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
1070  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
1071  INTEGER, INTENT(IN) :: pb
1072  INTEGER :: np, np_tot, nb_field1, nb_field2, m_max, m_max_c, MPID, N_r_pad
1073  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1074  COMPLEX(KIND=8), DIMENSION(m_max_pad, (SIZE(V1_in,2)+SIZE(V2_in,2))/2, bloc_size) :: cu
1075  REAL(KIND=8), DIMENSION(2*m_max_pad-1,(SIZE(V1_in,2)+SIZE(V2_in,2))/2,bloc_size) :: ru
1076  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
1077  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
1078  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2, bloc_size) :: intermediate
1079  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2)+SIZE(V2_in,2),bloc_size*nb_procs) :: dist_field
1080  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2)+SIZE(V2_in,2),bloc_size*nb_procs) :: combined_field
1081  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
1082  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
1083  INTEGER :: i_field
1084  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1085  REAL(KIND=8) :: t, scalar_min, scalar_max
1086  ! FFTW parameters
1087  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1088  INTEGER, DIMENSION(1) :: dim, inembed, onembed
1089  ! Recall complexes must be rescaled
1090  ! End FFTW parameters
1091 !#include "petsc/finclude/petsc.h"
1092  mpi_comm :: communicator
1093 
1094  IF (PRESENT(temps)) temps = 0.d0
1095 
1096  np = SIZE(v1_in,1)
1097  nb_field1= SIZE(v1_in,2) ! Number of fields
1098  nb_field2= SIZE(v2_in,2) ! Number of fields
1099  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
1100  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
1101  n_r_pad=2*m_max_pad-1
1102  np_tot = nb_procs*bloc_size
1103 
1104  IF (mod(nb_field1,2)/=0 .OR. mod(nb_field2,2)/=0 .OR. m_max_c==0) THEN
1105  WRITE(*,*) ' BUG '
1106  stop
1107  END IF
1108 
1109  ! Bloc_size is the number of points that are handled by one processor
1110  ! once the Fourier modes are all collected
1111  ! Computation of bloc_size and np_tot
1112  ! fin de la repartition des points
1113 
1114  ! Packing all 3 complex components of both v1 and v2 input fields
1115  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
1116  ! so that after distributing the data to the processes, each one will obtain a part
1117  ! on nodal points
1118  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
1119  t = mpi_wtime()
1120 
1121  DO i = 1, m_max_c
1122  dist_field(i,1:nb_field1,1:np) = transpose(v1_in(:,:,i))
1123  dist_field(i,nb_field1+1:nb_field1+nb_field2,1:np) = transpose(v2_in(:,:,i))
1124  END DO
1125  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1126 
1127  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1128 
1129  longueur_tranche=bloc_size*m_max_c*(nb_field1+nb_field2)
1130 
1131  t = mpi_wtime()
1132  mpid=mpi_double_precision
1133  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1134  mpid, communicator, code)
1135  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1136 
1137  t = mpi_wtime()
1138  !JLG, FEB 4, 2011
1139  cu = 0.d0
1140  !JLG, FEB 4, 2011
1141  DO n = 1, bloc_size
1142  DO nb = 1, nb_procs
1143  shiftc = (nb-1)*bloc_size
1144  shiftl = (nb-1)*m_max_c
1145  jindex = n + shiftc
1146  DO nf = 1, (nb_field1+nb_field2)/2
1147  ! Put real and imaginary parts in a complex
1148  ! nf=1,2,3 => V1_in
1149  ! nf=4 => V2_in
1150  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1151  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1152  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
1153  -combined_field(:,2*nf,jindex),kind=8)
1154  END DO
1155  END DO
1156  END DO
1157  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
1158  !JLG, FEB 4, 2011
1159  !Padding is done by initialization of cu: cu = 0
1160  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
1161  !JLG, FEB 4, 2011
1162 
1163  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1164 
1165  ! Set the parameters for dfftw
1166  fft_dim=1; istride=1; ostride=1;
1167  !JLG, FEB 4, 2011
1168 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
1169 !!$ odist=m_max; onembed(1)=m_max
1170  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1171  odist=m_max_pad; onembed(1)=m_max_pad
1172  !JLG, FEB 4, 2011
1173 
1174  howmany=bloc_size*(nb_field1+nb_field2)/2
1175 
1176  t = mpi_wtime()
1177  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1178  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1179  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
1180  CALL dfftw_execute(fftw_plan_multi_c2r)
1181 
1182  ! clean up
1183  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
1184 
1185  IF (nb_field1 == 6 .AND. nb_field2 == 2) THEN
1186  scalar_min = minval(scalar_bounds)
1187  scalar_max = maxval(scalar_bounds)
1188  IF (pb==1) THEN
1189  prod_ru(:,1,:) = max(scalar_min,min(ru(:,4,:),scalar_max))*ru(:,1,:)
1190  prod_ru(:,2,:) = max(scalar_min,min(ru(:,4,:),scalar_max))*ru(:,2,:)
1191  prod_ru(:,3,:) = max(scalar_min,min(ru(:,4,:),scalar_max))*ru(:,3,:)
1192  ELSE IF (pb==2) THEN
1193  prod_ru(:,1,:) = ru(:,1,:)/max(scalar_min,min(ru(:,4,:),scalar_max))
1194  prod_ru(:,2,:) = ru(:,2,:)/max(scalar_min,min(ru(:,4,:),scalar_max))
1195  prod_ru(:,3,:) = ru(:,3,:)/max(scalar_min,min(ru(:,4,:),scalar_max))
1196  ELSE
1197  CALL error_petsc('error in problem type while calling FFT_SCALAR_VECT_DCL ')
1198  END IF
1199  ELSE
1200  CALL error_petsc('error in dimension of INPUT variables while calling FFT_SCALAR_VECT_DCL ')
1201  END IF
1202 
1203  howmany = bloc_size*nb_field1/2
1204 
1205  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
1206  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
1207  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
1208  CALL dfftw_execute(fftw_plan_multi_r2c)
1209  ! clean up
1210  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
1211 
1212 
1213 
1214  !JLG, FEB 4, 2011
1215 !!$ prod_cu = prod_cu/N_r !Scaling
1216  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
1217  !JLG, FEB 4, 2011
1218  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
1219 
1220  !Now we need to redistribute the Fourier coefficients on each processor
1221 
1222  t = mpi_wtime()
1223  combined_prod_cu(:,:,1)=prod_cu(1,:,:)
1224  DO n=2, m_max
1225  !combined_prod_cu(:,:,n)=prod_cu(n,:,:)
1226  combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
1227  END DO
1228 
1229  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1230 
1231  longueur_tranche=bloc_size*m_max_c*nb_field1
1232 
1233  t = mpi_wtime()
1234  mpid=mpi_double_precision
1235  CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1236  mpid,communicator,code)
1237 
1238  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1239  ! dimensions:
1240  t = mpi_wtime()
1241 
1242  DO i = 1, m_max_c
1243  DO nb = 1, nb_procs
1244  shiftc = (nb-1)*bloc_size
1245  shiftl = (nb-1)*m_max_c
1246  intermediate = dist_prod_cu(:,:,shiftl+i)
1247  DO n = 1, bloc_size
1248  IF (n+shiftc > np ) cycle
1249  DO i_field = 1, nb_field1/2
1250  v_out(n+shiftc, i_field*2-1, i) = REAL (intermediate(i_field,n),KIND=8)
1251  v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
1252  END DO
1253  END DO
1254  END DO
1255  END DO
1256  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1257 
1258  END SUBROUTINE fft_scalar_vect_no_overshoot
1259 
1260  SUBROUTINE fft_max_vel_dcl(communicator, V1_in, V_out, nb_procs, bloc_size, m_max_pad)
1261  !This a de-aliased version of the code, FEB 4, 2011, JLG
1262  USE my_util
1263  IMPLICIT NONE
1264  include 'fftw3.f'
1265  ! Format: V_1in(1:np,1:6,1:m_max_c)
1266  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1267  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1268  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V1_in ! VECTOR
1269  REAL(KIND=8), INTENT(OUT) :: V_out
1270  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
1271  INTEGER :: np, np_tot, nb_field1, m_max, m_max_c, MPID, N_r_pad
1272  INTEGER(KIND=8) :: fftw_plan_multi_c2r
1273  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(V1_in,2)/2, bloc_size) :: cu
1274  REAL(KIND=8), DIMENSION(2*m_max_pad-1, SIZE(V1_in,2)/2,bloc_size) :: ru
1275  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
1276  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1277  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size) :: norm_vel_loc
1278  REAL(KIND=8) :: max_velocity
1279  ! FFTW parameters
1280  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1281  INTEGER, DIMENSION(1) :: dim, inembed, onembed
1282  ! Recall complexes must be rescaled
1283  ! End FFTW parameters
1284 !#include "petsc/finclude/petsc.h"
1285  mpi_comm :: communicator
1286 
1287  np = SIZE(v1_in,1)
1288  nb_field1 = SIZE(v1_in,2) ! Number of fields
1289  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
1290  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
1291  n_r_pad = 2*m_max_pad-1
1292  np_tot = nb_procs*bloc_size
1293 
1294  IF (mod(nb_field1,2)/=0 .OR. m_max_c==0) THEN
1295  WRITE(*,*) ' BUG '
1296  stop
1297  END IF
1298 
1299  ! Bloc_size is the number of points that are handled by one processor
1300  ! once the Fourier modes are all collected
1301  ! Computation of bloc_size and np_tot
1302  ! fin de la repartition des points
1303 
1304  ! Packing all 3 complex components of both v1 and v2 input fields
1305  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
1306  ! so that after distributing the data to the processes, each one will obtain a part
1307  ! on nodal points
1308  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
1309 
1310  DO i = 1, m_max_c
1311  dist_field(i,1:nb_field1,1:np) = transpose(v1_in(:,:,i))
1312  END DO
1313 
1314  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
1315 
1316  longueur_tranche=bloc_size*m_max_c*nb_field1
1317 
1318  mpid=mpi_double_precision
1319  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1320  mpid, communicator, code)
1321 
1322  !JLG, FEB 4, 2011
1323  cu = 0.d0
1324  !JLG, FEB 4, 2011
1325  DO n = 1, bloc_size
1326  DO nb = 1, nb_procs
1327  shiftc = (nb-1)*bloc_size
1328  shiftl = (nb-1)*m_max_c
1329  jindex = n + shiftc
1330  DO nf = 1, nb_field1/2
1331  ! Put real and imaginary parts in a complex
1332  ! nf=1,2,3 => V1_in
1333  ! nf=4 => V2_in
1334  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1335  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1336  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
1337  -combined_field(:,2*nf,jindex),kind=8)
1338  END DO
1339  END DO
1340  END DO
1341  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
1342  !JLG, FEB 4, 2011
1343  !Padding is done by initialization of cu: cu = 0
1344  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
1345  !JLG, FEB 4, 2011
1346 
1347  ! Set the parameters for dfftw
1348  fft_dim=1; istride=1; ostride=1;
1349  !JLG, FEB 4, 2011
1350 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
1351 !!$ odist=m_max; onembed(1)=m_max
1352  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1353  odist=m_max_pad; onembed(1)=m_max_pad
1354  !JLG, FEB 4, 2011
1355 
1356  howmany=bloc_size*nb_field1/2
1357 
1358  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1359  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1360  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
1361  CALL dfftw_execute(fftw_plan_multi_c2r)
1362 
1363  ! clean up
1364  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
1365 
1366  IF (nb_field1==6) THEN
1367  norm_vel_loc(:,:) = sqrt(ru(:,1,:)**2 + ru(:,2,:)**2 + ru(:,3,:)**2)
1368  ELSE
1369  norm_vel_loc(:,:) = sqrt(ru(:,1,:)**2)
1370  END IF
1371  max_velocity = maxval(norm_vel_loc)
1372  CALL mpi_allreduce(max_velocity, v_out, 1, mpi_double_precision, &
1373  mpi_max, communicator, code)
1374 
1375  END SUBROUTINE fft_max_vel_dcl
1376 
1377  SUBROUTINE fft_tensor_dcl(communicator,V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps, opt_tension)
1378  !This a de-aliased version of the code, FEB 4, 2011, JLG
1379  USE my_util
1380  IMPLICIT NONE
1381  include 'fftw3.f'
1382  ! Format: V_1in(1:np,1:6,1:m_max_c)
1383  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1384  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1385  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V1_in, V2_in
1386  REAL(KIND=8), DIMENSION(:,:,:,:), INTENT(OUT) :: V_out
1387  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
1388  LOGICAL, OPTIONAL, INTENT(IN) :: opt_tension
1389  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
1390  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, MPID, N_r_pad
1391  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1392  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(V1_in,2), bloc_size) :: cu
1393  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2),bloc_size) :: ru
1394  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu1, prod_cu2, prod_cu3
1395  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru1, prod_ru2, prod_ru3
1396  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field
1397  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2),bloc_size*nb_procs) :: combined_field
1398  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu1
1399  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu2
1400  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu3
1401  COMPLEX(KIND=8), DIMENSION(3,SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_tot
1402  COMPLEX(KIND=8), DIMENSION(3,SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_tot
1403  COMPLEX(KIND=8), DIMENSION(3,SIZE(V1_in,2)/2,bloc_size) :: intermediate_tot
1404  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: norm_grad_tension
1405  INTEGER :: i_field
1406  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1407  REAL(KIND=8) :: t
1408  ! FFTW parameters
1409  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1410  INTEGER, DIMENSION(1) :: dim, inembed, onembed
1411  ! Recall complexes must be rescaled
1412  ! End FFTW parameters
1413 !#include "petsc/finclude/petsc.h"
1414  mpi_comm :: communicator
1415 
1416  IF (PRESENT(temps)) temps = 0.d0
1417 
1418  np = SIZE(v1_in,1)
1419  nb_field= SIZE(v1_in,2) ! Number of fields
1420  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
1421  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
1422  n_r_pad=2*m_max_pad-1
1423  np_tot = nb_procs*bloc_size
1424 
1425  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
1426  WRITE(*,*) ' BUG '
1427  stop
1428  END IF
1429 
1430  ! Bloc_size is the number of points that are handled by one processor
1431  ! once the Fourier modes are all collected
1432  ! Computation of bloc_size and np_tot
1433  ! fin de la repartition des points
1434 
1435  ! Packing all 3 complex components of both v1 and v2 input fields
1436  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
1437  ! so that after distributing the data to the processes, each one will obtain a part
1438  ! on nodal points
1439  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
1440  t = mpi_wtime()
1441 
1442  DO i = 1, m_max_c
1443  dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
1444  dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
1445  END DO
1446  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1447 
1448  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1449 
1450  longueur_tranche=bloc_size*m_max_c*nb_field*2
1451 
1452  t = mpi_wtime()
1453  mpid=mpi_double_precision
1454  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1455  mpid, communicator, code)
1456  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1457 
1458  t = mpi_wtime()
1459  !JLG, FEB 4, 2011
1460  cu = 0.d0
1461  !JLG, FEB 4, 2011
1462  DO n = 1, bloc_size
1463  DO nb = 1, nb_procs
1464  shiftc = (nb-1)*bloc_size
1465  shiftl = (nb-1)*m_max_c
1466  jindex = n + shiftc
1467  DO nf = 1, nb_field
1468  ! Put real and imaginary parts in a complex
1469  ! nf=1,2,3 => V1_in
1470  ! nf=4,5,6 => V2_in
1471  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1472  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1473  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
1474  -combined_field(:,2*nf,jindex),kind=8)
1475  END DO
1476  END DO
1477  END DO
1478  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
1479  !JLG, FEB 4, 2011
1480  !Padding is done by initialization of cu: cu = 0
1481  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
1482  !JLG, FEB 4, 2011
1483 
1484  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1485 
1486  ! Set the parameters for dfftw
1487  fft_dim=1; istride=1; ostride=1;
1488  !JLG, FEB 4, 2011
1489 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
1490 !!$ odist=m_max; onembed(1)=m_max
1491  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1492  odist=m_max_pad; onembed(1)=m_max_pad
1493  !JLG, FEB 4, 2011
1494 
1495  howmany=bloc_size*nb_field
1496 
1497 
1498  t = mpi_wtime()
1499  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1500  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1501  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
1502  CALL dfftw_execute(fftw_plan_multi_c2r)
1503 
1504  ! clean up
1505  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
1506 
1507 
1508  ! TENSOR PRODDUCT
1509  IF(.NOT.PRESENT(opt_tension)) THEN
1510  IF (nb_field==6) THEN
1511  prod_ru1(:,1,:) = ru(:,1,:)*ru(:,4,:)
1512  prod_ru1(:,2,:) = ru(:,1,:)*ru(:,5,:)
1513  prod_ru1(:,3,:) = ru(:,1,:)*ru(:,6,:)
1514 
1515  prod_ru2(:,1,:) = ru(:,2,:)*ru(:,4,:)
1516  prod_ru2(:,2,:) = ru(:,2,:)*ru(:,5,:)
1517  prod_ru2(:,3,:) = ru(:,2,:)*ru(:,6,:)
1518 
1519  prod_ru3(:,1,:) = ru(:,3,:)*ru(:,4,:)
1520  prod_ru3(:,2,:) = ru(:,3,:)*ru(:,5,:)
1521  prod_ru3(:,3,:) = ru(:,3,:)*ru(:,6,:)
1522  END IF
1523  ELSE IF (opt_tension) THEN
1524  IF (nb_field==6) THEN
1525  norm_grad_tension = sqrt(ru(:,4,:)**2 + ru(:,5,:)**2 + ru(:,6,:)**2)
1526  prod_ru1(:,1,:) = ru(:,1,:)*ru(:,4,:)/(norm_grad_tension + 1.d-14) &
1527  - norm_grad_tension
1528  prod_ru1(:,2,:) = ru(:,1,:)*ru(:,5,:)/(norm_grad_tension + 1.d-14)
1529  prod_ru1(:,3,:) = ru(:,1,:)*ru(:,6,:)/(norm_grad_tension + 1.d-14)
1530 
1531  prod_ru2(:,1,:) = ru(:,2,:)*ru(:,4,:)/(norm_grad_tension + 1.d-14)
1532  prod_ru2(:,2,:) = ru(:,2,:)*ru(:,5,:)/(norm_grad_tension + 1.d-14) &
1533  - norm_grad_tension
1534  prod_ru2(:,3,:) = ru(:,2,:)*ru(:,6,:)/(norm_grad_tension + 1.d-14)
1535 
1536  prod_ru3(:,1,:) = ru(:,3,:)*ru(:,4,:)/(norm_grad_tension + 1.d-14)
1537  prod_ru3(:,2,:) = ru(:,3,:)*ru(:,5,:)/(norm_grad_tension + 1.d-14)
1538  prod_ru3(:,3,:) = ru(:,3,:)*ru(:,6,:)/(norm_grad_tension + 1.d-14) &
1539  - norm_grad_tension
1540  END IF
1541  ELSE
1542  CALL error_petsc('BUG in FFT_TENSOR_DCL : opt_tension should be true if used')
1543  END IF
1544  ! TENSOR PRODUCT
1545 
1546  ! Set the parameters for dfftw
1547  fft_dim=1; istride=1; ostride=1;
1548  !JLG, FEB 4, 2011
1549 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
1550 !!$ odist=m_max; onembed(1)=m_max
1551  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1552  odist=m_max_pad; onembed(1)=m_max_pad
1553  !JLG, FEB 4, 2011
1554 
1555  howmany=bloc_size*nb_field/2
1556 
1557  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru1, &
1558  inembed, istride, idist, prod_cu1, onembed, ostride, odist, fftw_estimate)
1559  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
1560  CALL dfftw_execute(fftw_plan_multi_r2c)
1561  ! clean up
1562  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
1563 
1564  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru2, &
1565  inembed, istride, idist, prod_cu2, onembed, ostride, odist, fftw_estimate)
1566  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
1567  CALL dfftw_execute(fftw_plan_multi_r2c)
1568  ! clean up
1569  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
1570 
1571  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru3, &
1572  inembed, istride, idist, prod_cu3, onembed, ostride, odist, fftw_estimate)
1573  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
1574  CALL dfftw_execute(fftw_plan_multi_r2c)
1575  ! clean up
1576  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
1577 
1578  !JLG, FEB 4, 2011
1579 !!$ prod_cu = prod_cu/N_r !Scaling
1580 !!$ prod_cu = prod_cu/N_r_pad !Scaling
1581 
1582  prod_cu1 = prod_cu1*(1.d0/n_r_pad) !Scaling
1583  prod_cu2 = prod_cu2*(1.d0/n_r_pad) !Scaling
1584  prod_cu3 = prod_cu3*(1.d0/n_r_pad) !Scaling
1585 
1586  !JLG, FEB 4, 2011
1587  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
1588 
1589  !Now we need to redistribute the Fourier coefficients on each processor
1590  t = mpi_wtime()
1591  combined_prod_cu1(:,:,1)=prod_cu1(1,:,:)
1592  combined_prod_cu2(:,:,1)=prod_cu2(1,:,:)
1593  combined_prod_cu3(:,:,1)=prod_cu3(1,:,:)
1594  DO n=2, m_max
1595  !combined_prod_cu(:,:,n)=prod_cu(n,:,:)
1596  combined_prod_cu1(:,:,n)=2*conjg(prod_cu1(n,:,:))
1597  combined_prod_cu2(:,:,n)=2*conjg(prod_cu2(n,:,:))
1598  combined_prod_cu3(:,:,n)=2*conjg(prod_cu3(n,:,:))
1599  END DO
1600 
1601  combined_prod_cu_tot(1,:,:,:) = combined_prod_cu1(:,:,:)
1602  combined_prod_cu_tot(2,:,:,:) = combined_prod_cu2(:,:,:)
1603  combined_prod_cu_tot(3,:,:,:) = combined_prod_cu3(:,:,:)
1604 
1605  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1606 
1607  t = mpi_wtime()
1608  longueur_tranche=bloc_size*m_max_c*nb_field*3
1609  mpid=mpi_double_precision
1610  CALL mpi_alltoall (combined_prod_cu_tot,longueur_tranche,mpid, dist_prod_cu_tot,longueur_tranche, &
1611  mpid,communicator,code)
1612  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1613  ! dimensions:
1614  t = mpi_wtime()
1615  DO i = 1, m_max_c
1616  DO nb = 1, nb_procs
1617  shiftc = (nb-1)*bloc_size
1618  shiftl = (nb-1)*m_max_c
1619  intermediate_tot = dist_prod_cu_tot(:,:,:,shiftl+i)
1620  DO n = 1, bloc_size
1621  IF (n+shiftc > np ) cycle
1622  DO i_field = 1, nb_field/2
1623  v_out(:, n+shiftc, i_field*2-1, i) = REAL (intermediate_tot(:,i_field,n),KIND=8)
1624  v_out(:, n+shiftc, i_field*2 , i) = aimag(intermediate_tot(:,i_field,n))
1625  END DO
1626  END DO
1627  END DO
1628  END DO
1629  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1630  END SUBROUTINE fft_tensor_dcl
1631 
1632  SUBROUTINE fft_par_var_eta_prod_t_dcl(communicator, eta, H_mesh, &
1633  c_in, c_out, nb_procs, bloc_size, m_max_pad, time, temps)
1634  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
1635  !This a de-aliased version of the code, FEB 4, 2011, JLG
1636  USE my_util
1637  USE def_type_mesh
1638  IMPLICIT NONE
1639  include 'fftw3.f'
1640  TYPE(mesh_type) :: H_mesh
1641  INTERFACE
1642  FUNCTION eta(H_mesh,angles,nb_angles,nb,ne,time) RESULT(vv)
1643  USE def_type_mesh
1644  USE input_data
1645  USE my_util
1646  IMPLICIT NONE
1647  TYPE(mesh_type), INTENT(IN) :: H_mesh
1648  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: angles
1649  INTEGER, INTENT(IN) :: nb_angles
1650  INTEGER, INTENT(IN) :: nb, ne
1651  REAL(KIND=8), INTENT(IN) :: time
1652  REAL(KIND=8), DIMENSION(nb_angles,ne-nb+1) :: vv
1653  END FUNCTION eta
1654  END INTERFACE
1655  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
1656  REAL(KIND=8), DIMENSION(2*m_max_pad-1) :: angles
1657  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: eta_n
1658  REAL(KIND=8) :: pi
1659  INTEGER :: rank_F
1660  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: c_in
1661  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: c_out
1662  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
1663  REAL(KIND=8) :: time
1664  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(c_in,2)/2, bloc_size) :: cu
1665  REAL(KIND=8), DIMENSION(2*m_max_pad-1, SIZE(c_in,2)/2, bloc_size) :: ru
1666  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(c_in,2)/2, bloc_size) :: prod_cu
1667  REAL(KIND=8), DIMENSION(2*m_max_pad-1, SIZE(c_in,2)/2,bloc_size) :: prod_ru
1668  REAL(KIND=8), DIMENSION(SIZE(c_in,3),SIZE(c_in,2), bloc_size*nb_procs) :: dist_field, combined_field
1669  COMPLEX(KIND=8), DIMENSION(SIZE(c_in,2)/2,bloc_size,SIZE(c_in,3)*nb_procs):: dist_prod_cu
1670  COMPLEX(KIND=8), DIMENSION(SIZE(c_in,2)/2,bloc_size,SIZE(c_in,3)*nb_procs):: combined_prod_cu
1671  COMPLEX(KIND=8), DIMENSION(SIZE(c_in,2)/2,bloc_size) :: intermediate
1672  INTEGER :: np, np_tot, m_max, m_max_c, MPID, N_r_pad, nb_field, i_field
1673  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1674  INTEGER :: nb, ne, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1675  REAL(KIND=8) :: t
1676  ! FFTW parameters
1677  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1678  INTEGER, DIMENSION(1) :: dim, inembed, onembed
1679  ! Recall complexes must be rescaled
1680  ! End FFTW parameters
1681 !#include "petsc/finclude/petsc.h"
1682  petscerrorcode :: ierr
1683  mpi_comm :: communicator
1684 
1685  IF (PRESENT(temps)) temps = 0.d0
1686 
1687  np = SIZE(c_in,1)
1688  nb_field = SIZE(c_in,2)
1689  m_max_c = SIZE(c_in,3) ! Number of complex (cosines + sines) coefficients per point
1690  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
1691  np_tot = nb_procs*bloc_size
1692  n_r_pad = 2*m_max_pad-1
1693 
1694  IF (m_max_c==0) THEN
1695  WRITE(*,*) ' BUG '
1696  stop
1697  END IF
1698 
1699  pi = acos(-1.d0)
1700  DO n = 1, n_r_pad
1701  angles(n) = 2*pi*(n-1)/n_r_pad
1702  END DO
1703 
1704  ! Packing all 3 complex components of both v1 and v2 input fields
1705  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
1706  ! so that after distributing the data to the processes, each one will obtain a part
1707  ! on nodal points
1708  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
1709  t = mpi_wtime()
1710  DO i = 1, m_max_c
1711  dist_field(i,:,1:np) = transpose(c_in(:,:,i))
1712  END DO
1713  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1714 
1715  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1716 
1717  longueur_tranche=bloc_size*m_max_c*nb_field
1718 
1719  t = mpi_wtime()
1720  mpid=mpi_double_precision
1721  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1722  mpid, communicator, code)
1723  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1724 
1725  t = mpi_wtime()
1726  !JLG, FEB 4, 2011
1727  cu = 0.d0
1728  !JLG, FEB 4, 2011
1729  DO n = 1, bloc_size
1730  DO nb = 1, nb_procs
1731  shiftc = (nb-1)*bloc_size
1732  shiftl = (nb-1)*m_max_c
1733  jindex = n + shiftc
1734  DO nf = 1, nb_field/2
1735  ! Put real and imaginary parts in a complex
1736  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1737  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1738  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
1739  -combined_field(:,2*nf,jindex),kind=8)
1740  END DO
1741  END DO
1742  END DO
1743  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
1744  !JLG, FEB 4, 2011
1745  !Padding is done by initialization of cu: cu = 0
1746  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
1747  !JLG, FEB 4, 2011
1748 
1749  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1750 
1751  ! Set the parameters for dfftw
1752  fft_dim=1; istride=1; ostride=1;
1753  !JLG, FEB 4, 2011
1754 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
1755 !!$ odist=m_max; onembed(1)=m_max
1756  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1757  odist=m_max_pad; onembed(1)=m_max_pad
1758  !JLG, FEB 4, 2011
1759 
1760  howmany=bloc_size*nb_field/2
1761 
1762  t = mpi_wtime()
1763  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1764  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1765  CALL dfftw_execute(fftw_plan_multi_c2r)
1766 
1767  ! clean up
1768  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
1769 
1770 !!!!!!!!!!!!!!!!!!!!!!!!!! PRODUCT with eta !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1771  ! Check if the proc is the last one to know if you need to add 1.d0 to eta
1772  CALL mpi_comm_rank(communicator, rank_f, ierr)
1773  IF (rank_f==nb_procs-1) THEN
1774  IF (np-rank_f*bloc_size.GE.1) THEN
1775  nb = rank_f*bloc_size+1
1776  ne = np
1777  eta_n(:,1:np-rank_f*bloc_size) = eta(h_mesh,angles,n_r_pad,nb,ne,time)
1778  eta_n(:,np-rank_f*bloc_size+1:bloc_size) = 1.d0
1779  ELSE
1780  eta_n=1.d0
1781  END IF
1782  ELSE
1783  nb = rank_f*bloc_size+1
1784  ne = rank_f*bloc_size+bloc_size
1785  eta_n(:,:) = eta(h_mesh,angles,n_r_pad,nb,ne,time)
1786  END IF
1787 
1788  IF (nb_field==2) THEN
1789  prod_ru(:,1,:) = eta_n*ru(:,1,:)
1790  ELSE IF (nb_field==6) THEN
1791  prod_ru(:,1,:) = eta_n*ru(:,1,:)
1792  prod_ru(:,2,:) = eta_n*ru(:,2,:)
1793  prod_ru(:,3,:) = eta_n*ru(:,3,:)
1794  ELSE
1795  CALL error_petsc('error in nb_field of c_in while calling FFT_PAR_VAR_ETA_PROD_T_DCL')
1796  END IF
1797 !!!!!!!!!!!!!!!!!!!!!!!!!! PRODUCT with eta !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1798 
1799  howmany = bloc_size*nb_field/2
1800 
1801  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
1802  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
1803  CALL dfftw_execute(fftw_plan_multi_r2c)
1804 
1805  ! clean up
1806  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
1807 
1808  !JLG, FEB 4, 2011
1809 !!$ prod_cu = prod_cu/N_r !Scaling
1810  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
1811  !JLG, FEB 4, 2011
1812  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
1813 
1814  !Now we need to redistribute the Fourier coefficients on each processor
1815  t = mpi_wtime()
1816  combined_prod_cu(:,:,1)=prod_cu(1,:,:)
1817  DO n=2, m_max
1818  combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
1819  END DO
1820 
1821  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1822 
1823  t = mpi_wtime()
1824  longueur_tranche=bloc_size*m_max_c*nb_field
1825  mpid=mpi_double_precision
1826  CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
1827  mpid,communicator,code)
1828  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1829 
1830  t = mpi_wtime()
1831  DO i = 1, m_max_c
1832  DO nb = 1, nb_procs
1833  shiftc = (nb-1)*bloc_size
1834  shiftl = (nb-1)*m_max_c
1835  intermediate = dist_prod_cu(:,:,shiftl+i)
1836  DO n = 1, bloc_size
1837  IF (n+shiftc > np ) cycle
1838  DO i_field = 1, nb_field/2
1839  c_out(n+shiftc, 2*i_field-1, i) = REAL (intermediate(i_field,n),KIND=8)
1840  c_out(n+shiftc, 2*i_field, i) = aimag(intermediate(i_field,n))
1841  END DO
1842  END DO
1843  END DO
1844  END DO
1845  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1846 
1847  END SUBROUTINE fft_par_var_eta_prod_t_dcl
1848 
1849  SUBROUTINE fft_par_var_eta_prod_gauss_dcl(communicator, eta, H_mesh, &
1850  c_in, c_out, nb_procs, bloc_size, m_max_pad, rr_gauss, time, temps)
1851  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
1852  !This a de-aliased version of the code, FEB 4, 2011, JLG
1853  USE my_util
1854  USE def_type_mesh
1855  IMPLICIT NONE
1856  include 'fftw3.f'
1857  TYPE(mesh_type) :: H_mesh
1858  INTERFACE
1859  FUNCTION eta(H_mesh,rr_gauss,angles,nb_angles,nb,ne,time) RESULT(vv)
1860  USE def_type_mesh
1861  USE input_data
1862  USE my_util
1863  IMPLICIT NONE
1864  TYPE(mesh_type) :: H_mesh
1865  REAL(KIND=8), DIMENSION(:,:), INTENT(IN) :: rr_gauss
1866  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: angles
1867  INTEGER, INTENT(IN) :: nb_angles
1868  INTEGER, INTENT(IN) :: nb, ne
1869  REAL(KIND=8), INTENT(IN) :: time
1870  REAL(KIND=8), DIMENSION(nb_angles,ne-nb+1) :: vv
1871  END FUNCTION eta
1872  END INTERFACE
1873  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
1874  REAL(KIND=8), DIMENSION(2*m_max_pad-1) :: angles
1875  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: eta_n
1876  REAL(KIND=8) :: pi
1877  INTEGER :: rank_F
1878  REAL(KIND=8) :: time
1879  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: c_in
1880  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: c_out
1881  REAL(KIND=8), DIMENSION(:,:), INTENT(IN) :: rr_gauss
1882  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
1883  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(c_in,2)/2, bloc_size) :: cu
1884  REAL(KIND=8), DIMENSION(2*m_max_pad-1, SIZE(c_in,2)/2, bloc_size) :: ru
1885  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(c_in,2)/2, bloc_size) :: prod_cu
1886  REAL(KIND=8), DIMENSION(2*m_max_pad-1, SIZE(c_in,2)/2,bloc_size) :: prod_ru
1887  REAL(KIND=8), DIMENSION(SIZE(c_in,3),SIZE(c_in,2), bloc_size*nb_procs) :: dist_field, combined_field
1888  COMPLEX(KIND=8), DIMENSION(SIZE(c_in,2)/2,bloc_size,SIZE(c_in,3)*nb_procs):: dist_prod_cu
1889  COMPLEX(KIND=8), DIMENSION(SIZE(c_in,2)/2,bloc_size,SIZE(c_in,3)*nb_procs):: combined_prod_cu
1890  COMPLEX(KIND=8), DIMENSION(SIZE(c_in,2)/2,bloc_size) :: intermediate
1891  INTEGER :: np, np_tot, m_max, m_max_c, MPID, N_r_pad, nb_field, i_field
1892  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
1893  INTEGER :: nb, ne, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
1894  REAL(KIND=8) :: t
1895  ! FFTW parameters
1896  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
1897  INTEGER, DIMENSION(1) :: dim, inembed, onembed
1898  ! Recall complexes must be rescaled
1899  ! End FFTW parameters
1900 !#include "petsc/finclude/petsc.h"
1901  petscerrorcode :: ierr
1902  mpi_comm :: communicator
1903 
1904  IF (PRESENT(temps)) temps = 0.d0
1905 
1906  np = SIZE(c_in,1)
1907  nb_field = SIZE(c_in,2)
1908  m_max_c = SIZE(c_in,3) ! Number of complex (cosines + sines) coefficients per point
1909  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
1910  np_tot = nb_procs*bloc_size
1911  n_r_pad = 2*m_max_pad-1
1912 
1913  IF (m_max_c==0) THEN
1914  WRITE(*,*) ' BUG '
1915  stop
1916  END IF
1917 
1918  pi = acos(-1.d0)
1919  DO n = 1, n_r_pad
1920  angles(n) = 2*pi*(n-1)/n_r_pad
1921  END DO
1922 
1923  ! Packing all 3 complex components of both v1 and v2 input fields
1924  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
1925  ! so that after distributing the data to the processes, each one will obtain a part
1926  ! on nodal points
1927  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
1928  t = mpi_wtime()
1929  DO i = 1, m_max_c
1930  dist_field(i,:,1:np) = transpose(c_in(:,:,i))
1931  END DO
1932  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1933 
1934  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
1935 
1936  longueur_tranche=bloc_size*m_max_c*nb_field
1937 
1938  t = mpi_wtime()
1939  mpid=mpi_double_precision
1940  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
1941  mpid, communicator, code)
1942  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
1943 
1944  t = mpi_wtime()
1945  !JLG, FEB 4, 2011
1946  cu = 0.d0
1947  !JLG, FEB 4, 2011
1948  DO n = 1, bloc_size
1949  DO nb = 1, nb_procs
1950  shiftc = (nb-1)*bloc_size
1951  shiftl = (nb-1)*m_max_c
1952  jindex = n + shiftc
1953  DO nf = 1, nb_field/2
1954  ! Put real and imaginary parts in a complex
1955  ! INPUT ARE COSINE AND SINE COEFFICIENTS
1956  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
1957  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
1958  -combined_field(:,2*nf,jindex),kind=8)
1959  END DO
1960  END DO
1961  END DO
1962  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
1963  !JLG, FEB 4, 2011
1964  !Padding is done by initialization of cu: cu = 0
1965  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
1966  !JLG, FEB 4, 2011
1967 
1968  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
1969 
1970  ! Set the parameters for dfftw
1971  fft_dim=1; istride=1; ostride=1;
1972  !JLG, FEB 4, 2011
1973 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
1974 !!$ odist=m_max; onembed(1)=m_max
1975  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
1976  odist=m_max_pad; onembed(1)=m_max_pad
1977  !JLG, FEB 4, 2011
1978 
1979  howmany=bloc_size*nb_field/2
1980 
1981  t = mpi_wtime()
1982  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
1983  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
1984  CALL dfftw_execute(fftw_plan_multi_c2r)
1985 
1986  ! clean up
1987  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
1988 
1989 !!!!!!!!!!!!!!!!!!!!!!!!!! PRODUCT with eta !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1990 ! Check if the proc is the last one to know if you need to add 1.d0 to eta
1991  CALL mpi_comm_rank(communicator, rank_f, ierr)
1992  IF (rank_f==nb_procs-1) THEN
1993  IF (np-rank_f*bloc_size.GE.1) THEN
1994  nb = rank_f*bloc_size+1
1995  ne = np
1996  eta_n(:,1:np-rank_f*bloc_size) = eta(h_mesh,rr_gauss(:,nb:ne),angles,n_r_pad,nb,ne,time)
1997  eta_n(:,np-rank_f*bloc_size+1:bloc_size) = 1.d0
1998  ELSE
1999  eta_n=1.d0
2000  END IF
2001  ELSE
2002  nb = rank_f*bloc_size+1
2003  ne = rank_f*bloc_size+bloc_size
2004  eta_n(:,:) = eta(h_mesh,rr_gauss(:,nb:ne),angles,n_r_pad,nb,ne,time)
2005  END IF
2006 
2007  IF (nb_field==2) THEN
2008  prod_ru(:,1,:) = eta_n*ru(:,1,:)
2009  ELSE IF (nb_field==6) THEN
2010  prod_ru(:,1,:) = eta_n*ru(:,1,:)
2011  prod_ru(:,2,:) = eta_n*ru(:,2,:)
2012  prod_ru(:,3,:) = eta_n*ru(:,3,:)
2013  ELSE
2014  CALL error_petsc('error in nb_field of c_in while calling FFT_PAR_VAR_ETA_PROD_GAUSS_DCL')
2015  END IF
2016 !!!!!!!!!!!!!!!!!!!!!!!!!! PRODUCT with eta !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2017 
2018  howmany = bloc_size*nb_field/2
2019 
2020  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
2021  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
2022  CALL dfftw_execute(fftw_plan_multi_r2c)
2023 
2024  ! clean up
2025  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
2026 
2027  !JLG, FEB 4, 2011
2028 !!$ prod_cu = prod_cu/N_r !Scaling
2029  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
2030  !JLG, FEB 4, 2011
2031  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
2032 
2033  !Now we need to redistribute the Fourier coefficients on each processor
2034  t = mpi_wtime()
2035  combined_prod_cu(:,:,1)=prod_cu(1,:,:)
2036  DO n=2, m_max
2037  combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
2038  END DO
2039 
2040  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2041 
2042  t = mpi_wtime()
2043  longueur_tranche=bloc_size*m_max_c*nb_field
2044  mpid=mpi_double_precision
2045  CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
2046  mpid,communicator,code)
2047  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
2048 
2049  t = mpi_wtime()
2050  DO i = 1, m_max_c
2051  DO nb = 1, nb_procs
2052  shiftc = (nb-1)*bloc_size
2053  shiftl = (nb-1)*m_max_c
2054  intermediate = dist_prod_cu(:,:,shiftl+i)
2055  DO n = 1, bloc_size
2056  IF (n+shiftc > np ) cycle
2057  DO i_field = 1, nb_field/2
2058  c_out(n+shiftc, 2*i_field-1, i) = REAL (intermediate(i_field,n),KIND=8)
2059  c_out(n+shiftc, 2*i_field, i) = aimag(intermediate(i_field,n))
2060  END DO
2061  END DO
2062  END DO
2063  END DO
2064  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2065 
2066  END SUBROUTINE fft_par_var_eta_prod_gauss_dcl
2067 
2068  FUNCTION regul(phi,eps) RESULT(r)
2069  IMPLICIT NONE
2070  REAL(KIND=8), INTENT(IN) :: phi, eps
2071  REAL(KIND=8) :: x, r
2072  x = phi - 0.5d0
2073  IF (x .LE. -eps) THEN
2074  r = 0.d0
2075  ELSE IF (x .LE. eps) THEN
2076  r = (1+ x*(x**2 - 3*eps**2)/(-2*eps**3))/2
2077  ELSE
2078  r = 1.d0
2079  END IF
2080  END FUNCTION regul
2081 
2082  FUNCTION regul_tab(phi,eps) RESULT(r)
2083  IMPLICIT NONE
2084  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: phi
2085  REAL(KIND=8), INTENT(IN) :: eps
2086  REAL(KIND=8) :: x
2087  REAL(KIND=8), DIMENSION(SIZE(phi)) :: r
2088  INTEGER :: n
2089  DO n = 1, SIZE(phi)
2090  x = phi(n) - 0.5d0
2091  IF (x .LE. -eps) THEN
2092  r(n) = 0.d0
2093  ELSE IF (x .LE. eps) THEN
2094  r(n) = (1.d0 + x*(x**2 - 3*eps**2)/(-2*eps**3))/2
2095  ELSE
2096  r(n) = 1.d0
2097  END IF
2098  END DO
2099  END FUNCTION regul_tab
2100 
2101  SUBROUTINE fft_check_interface(communicator, V1_in, nb_fluids, interface_ok, &
2102  nb_procs, bloc_size, m_max_pad, temps)
2103  !This a de-aliased version of the code, FEB 4, 2011, JLG
2104  USE my_util
2105  IMPLICIT NONE
2106  include 'fftw3.f'
2107  ! Format: V_1in(1:np,1:6,1:m_max_c)
2108  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2109  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2110  REAL(KIND=8), DIMENSION(:,:,:,:),INTENT(IN) :: V1_in
2111  INTEGER, INTENT(IN) :: nb_fluids
2112  INTEGER, INTENT(OUT) :: interface_ok
2113  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
2114  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
2115  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, MPID, N_r_pad
2116  INTEGER(KIND=8) :: fftw_plan_multi_c2r
2117  COMPLEX(KIND=8), DIMENSION(m_max_pad, bloc_size) :: cu
2118  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size) :: ru
2119  REAL(KIND=8), DIMENSION(SIZE(V1_in,4),SIZE(V1_in,3),bloc_size*nb_procs) :: dist_field, combined_field
2120  REAL(KIND=8), DIMENSION(nb_fluids-1,2*m_max_pad-1, bloc_size) :: V1_real_loc
2121  INTEGER :: n1, n2
2122  INTEGER :: nb, shiftc, shiftl, jindex, longueur_tranche, i, n, code, interface_nb
2123  REAL(KIND=8) :: t
2124  ! FFTW parameters
2125  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
2126  INTEGER, DIMENSION(1) :: dim, inembed, onembed
2127  ! Recall complexes must be rescaled
2128  ! End FFTW parameters
2129 !#include "petsc/finclude/petsc.h"
2130  mpi_comm :: communicator
2131 
2132  IF (PRESENT(temps)) temps = 0.d0
2133 
2134  np = SIZE(v1_in,2)
2135  nb_field= SIZE(v1_in,3) ! Number of fields
2136  m_max_c = SIZE(v1_in,4) ! Number of complex (cosines + sines) coefficients per point
2137  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
2138  n_r_pad=2*m_max_pad-1
2139  np_tot = nb_procs*bloc_size
2140 
2141  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
2142  WRITE(*,*) ' BUG '
2143  stop
2144  END IF
2145 
2146  ! Bloc_size is the number of points that are handled by one processor
2147  ! once the Fourier modes are all collected
2148  ! Computation of bloc_size and np_tot
2149  ! fin de la repartition des points
2150 
2151  ! Packing all 3 complex components of both v1 and v2 input fields
2152  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
2153  ! so that after distributing the data to the processes, each one will obtain a part
2154  ! on nodal points
2155  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
2156  t = mpi_wtime()
2157 
2158  DO interface_nb = 1, nb_fluids-1
2159  dist_field = 0.d0
2160  DO i = 1, m_max_c
2161  dist_field(i,1:nb_field,1:np) = transpose(v1_in(interface_nb,:,:,i))
2162  END DO
2163  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2164 
2165  !IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
2166  !TEST LC
2167  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
2168 
2169  longueur_tranche=bloc_size*m_max_c*nb_field
2170 
2171  t = mpi_wtime()
2172  mpid=mpi_double_precision
2173  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
2174  mpid, communicator, code)
2175 
2176  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
2177 
2178  t = mpi_wtime()
2179  !JLG, FEB 4, 2011
2180  cu = 0.d0
2181  !JLG, FEB 4, 2011
2182  DO n = 1, bloc_size
2183  DO nb = 1, nb_procs
2184  shiftc = (nb-1)*bloc_size
2185  shiftl = (nb-1)*m_max_c
2186  jindex = n + shiftc
2187  ! Put real and imaginary parts in a complex
2188  ! nf=1,2,3 => V1_in
2189  ! nf=4,5,6 => V2_in
2190  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2191  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2192  cu(shiftl+1:shiftl+m_max_c,n) = 0.5d0*cmplx(combined_field(:,1,jindex),&
2193  -combined_field(:,2,jindex),kind=8)
2194  END DO
2195  END DO
2196  cu(1,:) = 2*cmplx(REAL(cu(1,:),KIND=8),0.d0,KIND=8)
2197 
2198 
2199  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2200 
2201  ! Set the parameters for dfftw
2202  fft_dim=1; istride=1; ostride=1;
2203  !JLG, FEB 4, 2011
2204 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
2205 !!$ odist=m_max; onembed(1)=m_max
2206  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
2207  odist=m_max_pad; onembed(1)=m_max_pad
2208  !JLG, FEB 4, 2011
2209 
2210  howmany=bloc_size*nb_field/2
2211 
2212  t = mpi_wtime()
2213  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2214  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
2215  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
2216  CALL dfftw_execute(fftw_plan_multi_c2r)
2217 
2218  ! clean up
2219  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
2220  v1_real_loc(interface_nb,:,:)=ru
2221  END DO
2222 
2223 
2224  ! CHECK IF INTERFACES CROSS EACH OTHER
2225  ! no crossing > interface_ok=1 / crossing > interface_ok=0
2226  interface_ok = 1
2227  IF (nb_field==2) THEN
2228  IF (nb_fluids==2) THEN
2229  RETURN
2230  ELSE
2231  DO interface_nb = 1, nb_fluids-2
2232  DO n1 = 1, 2*m_max_pad-1
2233  DO n2 = 1, bloc_size
2234  IF((0.1d0.LE.v1_real_loc(interface_nb,n1,n2)).AND. &
2235  (v1_real_loc(interface_nb,n1,n2).LE.0.9d0)) THEN
2236  IF(v1_real_loc(interface_nb,n1,n2).LT.v1_real_loc(interface_nb+1,n1,n2)) THEN
2237  interface_ok = 0
2238  RETURN
2239  END IF
2240  END IF
2241  END DO
2242  END DO
2243  END DO
2244  END IF
2245  ELSE
2246  CALL error_petsc('BUG in FFT_CHECK_INTERFACE: pb with V1_in dimensions')
2247  END IF
2248  ! CHECK IF INTERFACE CROSS EACH OTHER
2249 
2250  END SUBROUTINE fft_check_interface
2251 
2252  SUBROUTINE fft_compute_entropy_visc(communicator, communicator_S, V1_in, V2_in, V3_in, V4_in, V5_in, &
2253  hloc_gauss, v1_out, v2_out, v3_out, &
2254  nb_procs, bloc_size, m_max_pad, residual_normalization, l_g, temps)
2255  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
2256  !This a de-aliased version of the code, FEB 4, 2011, JLG
2257  USE my_util
2258  USE input_data
2259  IMPLICIT NONE
2260  include 'fftw3.f'
2261  ! Format: V_1in(1:np,1:6,1:m_max_c)
2262  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2263  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2264  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V1_in, V2_in, V3_in, V4_in, V5_in
2265  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: hloc_gauss
2266  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: V1_out, V2_out, V3_out
2267  REAL(KIND=8), INTENT(IN) :: residual_normalization
2268  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad, l_G
2269  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
2270  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(V1_in,2)*5/2, bloc_size) :: cu
2271  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)*5/2,bloc_size) :: ru
2272  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: visco_entro
2273  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru_1, prod_ru_2, prod_ru_3
2274  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu_1, prod_cu_2, prod_cu_3
2275  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate_1
2276  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate_2
2277  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate_3
2278  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: norm_vel
2279  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size/l_G) :: norm_vel_int
2280  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),5*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field
2281  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),5*SIZE(V1_in,2),bloc_size*nb_procs) :: combined_field
2282  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_1
2283  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_2
2284  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_3
2285  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_1
2286  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_2
2287  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_3
2288  REAL(KIND=8), DIMENSION(bloc_size*nb_procs) :: hloc_gauss_tot
2289  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, MPID, N_r_pad
2290  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
2291  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code, rank, l, i_field
2292  REAL(KIND=8) :: t, hh, x
2293  REAL(KIND=8) :: max_norm_vel_loc, max_norm_vel_loc_F, max_norm_vel_tot
2294  ! FFTW parameters
2295  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
2296  INTEGER, DIMENSION(1) :: dim, inembed, onembed
2297  ! Recall complexes must be rescaled
2298  ! End FFTW parameters
2299 !#include "petsc/finclude/petsc.h"
2300  mpi_comm :: communicator
2301  mpi_comm :: communicator_s
2302  CALL mpi_comm_rank(communicator, rank, code)
2303 
2304  IF (PRESENT(temps)) temps = 0.d0
2305 
2306  np = SIZE(v1_in,1)
2307  nb_field= SIZE(v1_in,2) ! Number of fields
2308  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
2309  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
2310  np_tot = nb_procs*bloc_size
2311  n_r_pad=2*m_max_pad-1
2312 
2313  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
2314  WRITE(*,*) ' BUG '
2315  stop
2316  END IF
2317 
2318  ! Packing all 3 complex components of both v1 and v2 input fields
2319  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
2320  ! so that after distributing the data to the processes, each one will obtain a part
2321  ! on nodal points
2322  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
2323  t = mpi_wtime()
2324 
2325  DO i = 1, m_max_c
2326  dist_field(i, 1:nb_field, 1:np) = transpose(v1_in(:,:,i))
2327  dist_field(i, nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
2328  dist_field(i,2*nb_field+1:3*nb_field,1:np) = transpose(v3_in(:,:,i))
2329  dist_field(i,3*nb_field+1:4*nb_field,1:np) = transpose(v4_in(:,:,i))
2330  dist_field(i,4*nb_field+1:5*nb_field,1:np) = transpose(v5_in(:,:,i))
2331  END DO
2332  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2333 
2334  !IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
2335  !LC 2016/02/16 (we do a max on vel later)
2336  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
2337  !LC 2016/02/16
2338 
2339  hloc_gauss_tot(1:np) = hloc_gauss
2340  IF (np/=np_tot) hloc_gauss_tot(np+1:np_tot) = 1.d100
2341 
2342  longueur_tranche=bloc_size*m_max_c*nb_field*5
2343 
2344  t = mpi_wtime()
2345  mpid=mpi_double_precision
2346  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
2347  mpid, communicator, code)
2348  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
2349 
2350  t = mpi_wtime()
2351  !JLG, FEB 4, 2011
2352  cu = 0.d0
2353  !JLG, FEB 4, 2011
2354  DO n = 1, bloc_size
2355  DO nb = 1, nb_procs
2356  shiftc = (nb-1)*bloc_size
2357  shiftl = (nb-1)*m_max_c
2358  jindex = n + shiftc
2359  DO nf = 1, nb_field*5/2
2360  ! Put real and imaginary parts in a complex
2361  ! nf=1,2,3 => V1_in
2362  ! nf=4,5,6 => V2_in
2363  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2364  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2365  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
2366  -combined_field(:,2*nf,jindex),kind=8)
2367  END DO
2368  END DO
2369  END DO
2370  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
2371  !JLG, FEB 4, 2011
2372  !Padding is done by initialization of cu: cu = 0
2373  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
2374  !JLG, FEB 4, 2011
2375 
2376  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2377 
2378  ! Set the parameters for dfftw
2379  fft_dim=1; istride=1; ostride=1;
2380  !JLG, FEB 4, 2011
2381 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
2382 !!$ odist=m_max; onembed(1)=m_max
2383  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
2384  odist=m_max_pad; onembed(1)=m_max_pad
2385  !JLG, FEB 4, 2011
2386 
2387  howmany=bloc_size*nb_field*5/2
2388 
2389  t = mpi_wtime()
2390  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2391  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
2392  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
2393  CALL dfftw_execute(fftw_plan_multi_c2r)
2394 
2395  ! clean up
2396  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
2397 
2398  !===ru(:,1:3,:) is velocity
2399  !===ru(:,4:6,:) is res_ns
2400  !===ru(:,7:9,:) is first line of velocity gradient
2401  !===ru(:,10:12,:) is second line of velocity gradient
2402  !===ru(:,10:12,:) is third line of velocity gradient
2403 
2404  !===Compute local velocity
2405  norm_vel(:,:) = sqrt(ru(:,1,:)**2 + ru(:,2,:)**2 + ru(:,3,:)**2)
2406  DO i = 1, 2*m_max_pad - 1
2407  DO l = 1, bloc_size/l_g
2408  x = maxval(norm_vel(i,(l-1)*l_g+1:l*l_g))
2409  norm_vel_int(i,l) = x
2410  END DO
2411  END DO
2412  IF (2*m_max_pad - 1 .GE. 3) THEN
2413  DO l = 1, bloc_size/l_g
2414  DO i = 2, 2*m_max_pad - 2
2415  norm_vel(i,(l-1)*l_g+1:l*l_g) = maxval(norm_vel_int(i-1:i+1,l))
2416  END DO
2417  norm_vel(1,(l-1)*l_g+1:l*l_g) = max(norm_vel_int(1,l),norm_vel_int(2,l),norm_vel_int(2*m_max_pad - 1,l))
2418  norm_vel(2*m_max_pad - 1,(l-1)*l_g+1:l*l_g) = &
2419  max(norm_vel_int(2*m_max_pad - 2,l),norm_vel_int(2*m_max_pad - 1,l),norm_vel_int(1,l))
2420  END DO
2421  ELSE
2422  DO l = 1, bloc_size/l_g
2423  norm_vel(1,(l-1)*l_g+1:l*l_g) = norm_vel_int(1,l)
2424  END DO
2425  END IF
2426  !===End compute local velocity
2427 
2428  !==Compute maximum of velocity
2429  max_norm_vel_loc=maxval(norm_vel)
2430  CALL mpi_allreduce(max_norm_vel_loc,max_norm_vel_loc_f,1,mpi_double_precision, mpi_max, communicator, code)
2431  CALL mpi_allreduce(max_norm_vel_loc_f,max_norm_vel_tot,1,mpi_double_precision, mpi_max, communicator_s, code)
2432  !==End compute maximum of velocity
2433 
2434  !===Compute entropy viscosity
2435  DO n = 1, bloc_size
2436  hh = hloc_gauss_tot(n+rank*bloc_size)
2437  visco_entro(:,n) = -inputs%LES_coeff1 + min(inputs%LES_coeff4*norm_vel(:,n), &
2438  inputs%LES_coeff2*hh*abs(ru(:,1,n)*ru(:,4,n) + ru(:,2,n)*ru(:,5,n) + ru(:,3,n)*ru(:,6,n))&
2439  /(residual_normalization+1.d-14))
2440  END DO
2441  !===End compute entropy viscosity
2442 
2443  !===Compute visco_entro times gradient(velocity)
2444  prod_ru_1(:,1,:) = visco_entro*ru(:,7,:)
2445  prod_ru_1(:,2,:) = visco_entro*ru(:,8,:)
2446  prod_ru_1(:,3,:) = visco_entro*ru(:,9,:)
2447 
2448  prod_ru_2(:,1,:) = visco_entro*ru(:,10,:)
2449  prod_ru_2(:,2,:) = visco_entro*ru(:,11,:)
2450  prod_ru_2(:,3,:) = visco_entro*ru(:,12,:)
2451 
2452  prod_ru_3(:,1,:) = visco_entro*ru(:,13,:)
2453  prod_ru_3(:,2,:) = visco_entro*ru(:,14,:)
2454  prod_ru_3(:,3,:) = visco_entro*ru(:,15,:)
2455  !===End compute visco_entro times gradient(velocity)
2456 
2457  howmany = bloc_size*nb_field/2
2458  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_1, &
2459  inembed, istride, idist, prod_cu_1, onembed, ostride, odist, fftw_estimate)
2460  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
2461  CALL dfftw_execute(fftw_plan_multi_r2c)
2462  ! clean up
2463  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
2464 
2465  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_2, &
2466  inembed, istride, idist, prod_cu_2, onembed, ostride, odist, fftw_estimate)
2467  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
2468  CALL dfftw_execute(fftw_plan_multi_r2c)
2469  ! clean up
2470  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
2471 
2472  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_3, &
2473  inembed, istride, idist, prod_cu_3, onembed, ostride, odist, fftw_estimate)
2474  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
2475  CALL dfftw_execute(fftw_plan_multi_r2c)
2476  ! clean up
2477  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
2478 
2479  prod_cu_1 = prod_cu_1*(1.d0/n_r_pad) !Scaling
2480  prod_cu_2 = prod_cu_2*(1.d0/n_r_pad) !Scaling
2481  prod_cu_3 = prod_cu_3*(1.d0/n_r_pad) !Scaling
2482 
2483  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
2484 
2485  !Now we need to redistribute the Fourier coefficients on each processor
2486  t = mpi_wtime()
2487  combined_prod_cu_1(:,:,1)=prod_cu_1(1,:,:)
2488  combined_prod_cu_2(:,:,1)=prod_cu_2(1,:,:)
2489  combined_prod_cu_3(:,:,1)=prod_cu_3(1,:,:)
2490 
2491  DO n=2, m_max
2492  combined_prod_cu_1(:,:,n)=2*conjg(prod_cu_1(n,:,:))
2493  combined_prod_cu_2(:,:,n)=2*conjg(prod_cu_2(n,:,:))
2494  combined_prod_cu_3(:,:,n)=2*conjg(prod_cu_3(n,:,:))
2495  END DO
2496 
2497  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2498 
2499  t = mpi_wtime()
2500  longueur_tranche=bloc_size*m_max_c*nb_field
2501  mpid=mpi_double_precision
2502  CALL mpi_alltoall (combined_prod_cu_1,longueur_tranche,mpid, dist_prod_cu_1,longueur_tranche, &
2503  mpid,communicator,code)
2504  CALL mpi_alltoall (combined_prod_cu_2,longueur_tranche,mpid, dist_prod_cu_2,longueur_tranche, &
2505  mpid,communicator,code)
2506  CALL mpi_alltoall (combined_prod_cu_3,longueur_tranche,mpid, dist_prod_cu_3,longueur_tranche, &
2507  mpid,communicator,code)
2508  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
2509 
2510  t = mpi_wtime()
2511  DO i = 1, m_max_c
2512  DO nb = 1, nb_procs
2513  shiftc = (nb-1)*bloc_size
2514  shiftl = (nb-1)*m_max_c
2515  intermediate_1 = dist_prod_cu_1(:,:,shiftl+i)
2516  intermediate_2 = dist_prod_cu_2(:,:,shiftl+i)
2517  intermediate_3 = dist_prod_cu_3(:,:,shiftl+i)
2518  DO n = 1, bloc_size
2519  IF (n+shiftc > np ) cycle
2520  DO i_field = 1, nb_field/2
2521  v1_out(n+shiftc, i_field*2-1, i) = REAL (intermediate_1(i_field,n),KIND=8)
2522  v1_out(n+shiftc, i_field*2 , i) = aimag(intermediate_1(i_field,n))
2523  v2_out(n+shiftc, i_field*2-1, i) = REAL (intermediate_2(i_field,n),KIND=8)
2524  v2_out(n+shiftc, i_field*2 , i) = aimag(intermediate_2(i_field,n))
2525  v3_out(n+shiftc, i_field*2-1, i) = REAL (intermediate_3(i_field,n),KIND=8)
2526  v3_out(n+shiftc, i_field*2 , i) = aimag(intermediate_3(i_field,n))
2527  END DO
2528  END DO
2529  END DO
2530  END DO
2531  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2532 
2533  END SUBROUTINE fft_compute_entropy_visc
2534 
2535  SUBROUTINE fft_compute_entropy_visc_mom(communicator,communicator_S, V1_in, V2_in, V3_in, c1_in, hloc_gauss, &
2536  c1_real_out, nb_procs, bloc_size, m_max_pad, l_g, opt_c2_real_out, temps)
2537  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
2538  !This a de-aliased version of the code, FEB 4, 2011, JLG
2539  USE my_util
2540  USE input_data
2541  IMPLICIT NONE
2542  include 'fftw3.f'
2543  ! Format: V_1in(1:np,1:6,1:m_max_c)
2544  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2545  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2546  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V1_in, V2_in, V3_in
2547  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: c1_in
2548  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: hloc_gauss
2549  REAL(KIND=8), DIMENSION(:,:), INTENT(OUT) :: c1_real_out
2550  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad, l_G
2551  REAL(KIND=8), DIMENSION(:,:), OPTIONAL, INTENT(OUT) :: opt_c2_real_out
2552  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
2553  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),3*SIZE(V1_in,2)+SIZE(c1_in,2),bloc_size*nb_procs) :: dist_field
2554  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),3*SIZE(V1_in,2)+SIZE(c1_in,2),bloc_size*nb_procs) :: combined_field
2555  COMPLEX(KIND=8), DIMENSION(m_max_pad, (3*SIZE(V1_in,2)+SIZE(c1_in,2))/2, bloc_size) :: cu
2556  REAL(KIND=8), DIMENSION(2*m_max_pad-1,(3*SIZE(V1_in,2)+SIZE(c1_in,2))/2,bloc_size) :: ru
2557  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: norm_vel, norm_mom
2558  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size/l_G) :: norm_vel_int, norm_mom_int
2559  REAL(KIND=8), DIMENSION(bloc_size*nb_procs) :: hloc_gauss_tot
2560  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: visco_entro
2561  INTEGER :: np, np_tot, nb_field, nb_field2, m_max, m_max_c, MPID, N_r_pad
2562  INTEGER(KIND=8) :: fftw_plan_multi_c2r
2563  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code, rank, l
2564 ! REAL(KIND=8) :: t, hh, x, max_vel_loc, max_vel_F, max_vel_tot
2565 ! CN 21/01/2018
2566  REAL(KIND=8) :: t, hh, x, max_vel_loc, max_vel_F, max_vel_tot, max_mom_loc, max_mom_F, max_mom_tot, max_v_m
2567  ! FFTW parameters
2568  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
2569  INTEGER, DIMENSION(1) :: dim, inembed, onembed
2570  ! Recall complexes must be rescaled
2571  ! End FFTW parameters
2572 !#include "petsc/finclude/petsc.h"
2573  mpi_comm :: communicator
2574  mpi_comm :: communicator_s
2575 
2576  CALL mpi_comm_rank(communicator, rank, code)
2577 
2578  IF (PRESENT(temps)) temps = 0.d0
2579 
2580  np = SIZE(v1_in,1)
2581  nb_field = SIZE(v1_in,2) ! Number of fields for vector
2582  nb_field2 = SIZE(c1_in,2) ! Number of fields for scalar
2583  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
2584  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
2585  np_tot = nb_procs*bloc_size
2586  n_r_pad = 2*m_max_pad-1
2587 
2588  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
2589  WRITE(*,*) ' BUG '
2590  stop
2591  END IF
2592 
2593  ! Packing all 3 complex components of both v1 and v2 input fields
2594  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
2595  ! so that after distributing the data to the processes, each one will obtain a part
2596  ! on nodal points
2597  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
2598  t = mpi_wtime()
2599 
2600  DO i = 1, m_max_c
2601  dist_field(i, 1:nb_field, 1:np) = transpose(v1_in(:,:,i))
2602  dist_field(i, nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
2603  dist_field(i,2*nb_field+1:3*nb_field,1:np) = transpose(v3_in(:,:,i))
2604  dist_field(i,3*nb_field+1:3*nb_field+nb_field2,1:np) = transpose(c1_in(:,:,i))
2605  END DO
2606  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2607 
2608  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
2609 
2610  hloc_gauss_tot(1:np) = hloc_gauss
2611  IF (np/=np_tot) hloc_gauss_tot(np+1:np_tot) = 1.d100
2612 
2613  longueur_tranche=bloc_size*m_max_c*(nb_field*3+nb_field2)
2614 
2615  t = mpi_wtime()
2616  mpid=mpi_double_precision
2617  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
2618  mpid, communicator, code)
2619  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
2620 
2621  t = mpi_wtime()
2622  !JLG, FEB 4, 2011
2623  cu = 0.d0
2624  !JLG, FEB 4, 2011
2625  DO n = 1, bloc_size
2626  DO nb = 1, nb_procs
2627  shiftc = (nb-1)*bloc_size
2628  shiftl = (nb-1)*m_max_c
2629  jindex = n + shiftc
2630  DO nf = 1, (3*nb_field+nb_field2)/2
2631  ! Put real and imaginary parts in a complex
2632  ! nf=1,2,3 => V1_in
2633  ! nf=4,5,6 => V2_in
2634  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2635  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2636  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
2637  -combined_field(:,2*nf,jindex),kind=8)
2638  END DO
2639  END DO
2640  END DO
2641  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
2642  !JLG, FEB 4, 2011
2643  !Padding is done by initialization of cu: cu = 0
2644  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
2645  !JLG, FEB 4, 2011
2646 
2647  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2648 
2649  ! Set the parameters for dfftw
2650  fft_dim=1; istride=1; ostride=1;
2651  !JLG, FEB 4, 2011
2652  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
2653  odist=m_max_pad; onembed(1)=m_max_pad
2654  !JLG, FEB 4, 2011
2655 
2656  howmany=bloc_size*(nb_field*3+nb_field2)/2
2657 
2658  t = mpi_wtime()
2659  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2660  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
2661  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
2662  CALL dfftw_execute(fftw_plan_multi_c2r)
2663 
2664  ! clean up
2665  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
2666 
2667  !===ru(:,1:3,:) is velocity
2668  !===ru(:,4:6,:) is momentum
2669  !===ru(:,7:9,:) is res_ns
2670  !===ru(:,10,:) is res_mass
2671 
2672  !===Compute norm_vel and norm_mom
2673  norm_vel(:,:) = sqrt(ru(:,1,:)**2 + ru(:,2,:)**2 + ru(:,3,:)**2)
2674  norm_mom(:,:) = sqrt(ru(:,4,:)**2 + ru(:,5,:)**2 + ru(:,6,:)**2)
2675  DO i = 1, 2*m_max_pad - 1
2676  DO l = 1, bloc_size/l_g
2677  x = maxval(norm_vel(i,(l-1)*l_g+1:l*l_g))
2678  norm_vel_int(i,l) = x
2679  x = maxval(norm_mom(i,(l-1)*l_g+1:l*l_g))
2680  norm_mom_int(i,l) = x
2681  END DO
2682  END DO
2683  IF (2*m_max_pad - 1 .GE. 3) THEN
2684  DO l = 1, bloc_size/l_g
2685  DO i = 2, 2*m_max_pad - 2
2686  norm_vel(i,(l-1)*l_g+1:l*l_g) = maxval(norm_vel_int(i-1:i+1,l))
2687  norm_mom(i,(l-1)*l_g+1:l*l_g) = maxval(norm_mom_int(i-1:i+1,l))
2688  END DO
2689  norm_vel(1,(l-1)*l_g+1:l*l_g) = max(norm_vel_int(1,l),norm_vel_int(2,l),norm_vel_int(2*m_max_pad - 1,l))
2690  norm_vel(2*m_max_pad - 1,(l-1)*l_g+1:l*l_g) = &
2691  max(norm_vel_int(2*m_max_pad - 2,l),norm_vel_int(2*m_max_pad - 1,l),norm_vel_int(1,l))
2692  norm_mom(1,(l-1)*l_g+1:l*l_g) = max(norm_mom_int(1,l),norm_mom_int(2,l),norm_mom_int(2*m_max_pad - 1,l))
2693  norm_mom(2*m_max_pad - 1,(l-1)*l_g+1:l*l_g) = &
2694  max(norm_mom_int(2*m_max_pad - 2,l),norm_mom_int(2*m_max_pad - 1,l),norm_mom_int(1,l))
2695  END DO
2696  ELSE
2697  DO l = 1, bloc_size/l_g
2698  norm_vel(1,(l-1)*l_g+1:l*l_g) = norm_vel_int(1,l)
2699  norm_mom(1,(l-1)*l_g+1:l*l_g) = norm_mom_int(1,l)
2700  END DO
2701  END IF
2702  !===End compute norm_vel and norm_mom
2703 
2704  !===Compute MAX(norm_vel) on domain
2705  max_vel_loc=maxval(norm_vel)
2706  CALL mpi_allreduce(max_vel_loc,max_vel_f, 1, mpi_double_precision, &
2707  mpi_max, communicator, code)
2708  CALL mpi_allreduce(max_vel_f, max_vel_tot, 1, mpi_double_precision, &
2709  mpi_max, communicator_s, code)
2710 ! !===End compute MAX(norm_vel) on domain
2711 ! CN 21/01/2018
2712  max_mom_loc=maxval(norm_mom)
2713  CALL mpi_allreduce(max_mom_loc,max_mom_f, 1, mpi_double_precision, &
2714  mpi_max, communicator, code)
2715  CALL mpi_allreduce(max_mom_f, max_mom_tot, 1, mpi_double_precision, &
2716  mpi_max, communicator_s, code)
2717  max_v_m = max_vel_tot*max_mom_tot
2718  !===End compute MAX(norm_vel), MAX(norm_mom) on domain
2719 
2720  !===Compute entropy viscosities for momentum and level set equations
2721  DO n = 1, bloc_size
2722  hh = hloc_gauss_tot(n+rank*bloc_size)
2723 
2724  visco_entro(:,n) = inputs%LES_coeff2*hh* &
2725  max(abs(ru(:,1,n)*ru(:,7,n) + ru(:,2,n)*ru(:,8,n) + ru(:,3,n)*ru(:,9,n)), &
2726  abs(ru(:,10,n)*(ru(:,1,n)**2 + ru(:,2,n)**2 + ru(:,3,n)**2)))
2727  END DO
2728 
2729  !JLG Sept 21 2016
2730  !===Average entropy viscosity
2731  IF (2*m_max_pad - 1 .GE. 3) THEN
2732  DO l = 1, bloc_size/l_g
2733  DO i = 2, 2*m_max_pad - 2
2734  norm_vel_int(i,l) = maxval(visco_entro(i-1:i+1,(l-1)*l_g+1:l*l_g))
2735  END DO
2736  norm_vel_int(1,l) = max(maxval(visco_entro(1,(l-1)*l_g+1:l*l_g)),maxval(visco_entro(2,(l-1)*l_g+1:l*l_g)),&
2737  maxval(visco_entro(2*m_max_pad - 1,(l-1)*l_g+1:l*l_g)))
2738  norm_vel_int(2*m_max_pad - 1,l) = max(maxval(visco_entro(2*m_max_pad - 2,(l-1)*l_g+1:l*l_g)),&
2739  maxval(visco_entro(2*m_max_pad - 1,(l-1)*l_g+1:l*l_g)),maxval(visco_entro(1,(l-1)*l_g+1:l*l_g)))
2740  END DO
2741  ELSE
2742  DO l = 1, bloc_size/l_g
2743  norm_vel_int(1,l) = maxval(visco_entro(1,(l-1)*l_g+1:l*l_g))
2744  END DO
2745  END IF
2746  DO i = 1, 2*m_max_pad - 1
2747  DO l = 1, bloc_size/l_g
2748  visco_entro(i,(l-1)*l_g+1:l*l_g)=norm_vel_int(i,l)
2749  END DO
2750  END DO
2751  !===End Average entropy viscosity
2752  !End JLG Sept 21 2016
2753 
2754  DO n = 1, bloc_size
2755  IF (l_g==3) THEN !===Level set viscosity
2756  c1_real_out(:,n) = min(4*inputs%LES_coeff4*max_vel_tot, &
2757  16*visco_entro(:,n)/(norm_vel(:,n)*norm_mom(:,n)+1.d-30))
2758  ELSE !===Momentum viscosity
2759  c1_real_out(:,n) = min(inputs%LES_coeff4*norm_vel(:,n), &
2760  visco_entro(:,n)/(norm_vel(:,n)*norm_mom(:,n)+1.d-30))
2761  IF (PRESENT(opt_c2_real_out)) THEN
2762  opt_c2_real_out(:,n) = min(inputs%LES_coeff4*max_vel_tot, &
2763  visco_entro(:,n)/(norm_vel(:,n)*norm_mom(:,n)+1.d-30))
2764  END IF
2765  END IF
2766  END DO
2767  !===End compute entropy viscosities for momentum and level set equations
2768 
2769  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2770 
2771  END SUBROUTINE fft_compute_entropy_visc_mom
2772 
2773  SUBROUTINE fft_compute_diffu_mom(communicator, V1_in, V2_in, V3_in, &
2774  v1_out, v2_out, nb_procs, bloc_size, m_max_pad, temps)
2775  !This a de-aliased version of the code, FEB 4, 2011, JLG
2776  USE my_util
2777  USE input_data
2778  IMPLICIT NONE
2779  include 'fftw3.f'
2780  ! Format: V_1in(1:np,1:6,1:m_max_c)
2781  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2782  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2783  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V1_in, V2_in !vectors
2784  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V3_in !scalar
2785  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: V1_out
2786  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: V2_out
2787  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
2788  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
2789  INTEGER :: np, np_tot, nb_field1, nb_field2, nb_field3
2790  INTEGER :: m_max, m_max_c, MPID, N_r_pad
2791  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
2792  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2)+SIZE(V3_in,2), bloc_size*nb_procs) :: dist_field
2793  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2)+SIZE(V3_in,2), bloc_size*nb_procs) :: combined_field
2794  COMPLEX(KIND=8), DIMENSION(m_max_pad, (2*SIZE(V1_in,2)+SIZE(V3_in,2))/2, bloc_size) :: cu
2795  REAL(KIND=8), DIMENSION(2*m_max_pad-1,(2*SIZE(V1_in,2)+SIZE(V3_in,2))/2,bloc_size) :: ru
2796  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru_1, prod_ru_2
2797  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu_1, prod_cu_2
2798  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate_1, intermediate_2
2799  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_1
2800  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_2
2801  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_1
2802  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_2
2803  INTEGER :: i_field, rank
2804  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
2805  REAL(KIND=8) :: t
2806  ! FFTW parameters
2807  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
2808  INTEGER, DIMENSION(1) :: dim, inembed, onembed
2809  ! Recall complexes must be rescaled
2810  ! End FFTW parameters
2811 !#include "petsc/finclude/petsc.h"
2812  mpi_comm :: communicator
2813  petscerrorcode :: ierr
2814 
2815  IF (PRESENT(temps)) temps = 0.d0
2816 
2817  CALL mpi_comm_rank(communicator, rank, ierr)
2818 
2819  np = SIZE(v1_in,1)
2820  nb_field1= SIZE(v1_in,2) ! Number of fields for vector
2821  nb_field2= SIZE(v2_in,2) ! Number of fields
2822  nb_field3= SIZE(v3_in,2) ! Number of fields
2823  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
2824  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
2825  n_r_pad = 2*m_max_pad-1
2826  np_tot = nb_procs*bloc_size
2827 
2828  IF (mod(nb_field1,2)/=0 .OR. mod(nb_field2,2)/=0 .OR. m_max_c==0) THEN
2829  WRITE(*,*) ' BUG '
2830  stop
2831  END IF
2832 
2833  ! Bloc_size is the number of points that are handled by one processor
2834  ! once the Fourier modes are all collected
2835  ! Computation of bloc_size and np_tot
2836  ! fin de la repartition des points
2837 
2838  ! Packing all 3 complex components of both v1 and v2 input fields
2839  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
2840  ! so that after distributing the data to the processes, each one will obtain a part
2841  ! on nodal points
2842  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
2843  t = mpi_wtime()
2844 
2845  DO i = 1, m_max_c
2846  dist_field(i,1:nb_field1,1:np) = transpose(v1_in(:,:,i))
2847  dist_field(i,nb_field1+1:2*nb_field1,1:np) = transpose(v2_in(:,:,i))
2848  dist_field(i,2*nb_field1+1:2*nb_field1+nb_field3,1:np) = transpose(v3_in(:,:,i))
2849  END DO
2850  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2851 
2852  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
2853 
2854  longueur_tranche=bloc_size*m_max_c*(nb_field1+nb_field2+nb_field3)
2855 
2856  t = mpi_wtime()
2857  mpid=mpi_double_precision
2858  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
2859  mpid, communicator, code)
2860  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
2861 
2862  t = mpi_wtime()
2863  !JLG, FEB 4, 2011
2864  cu = 0.d0
2865  !JLG, FEB 4, 2011
2866  DO n = 1, bloc_size
2867  DO nb = 1, nb_procs
2868  shiftc = (nb-1)*bloc_size
2869  shiftl = (nb-1)*m_max_c
2870  jindex = n + shiftc
2871  DO nf = 1, (nb_field1+nb_field2+nb_field3)/2
2872  ! Put real and imaginary parts in a complex
2873  ! nf=1,2,3 => V1_in
2874  ! nf=4 => V2_in
2875  ! INPUT ARE COSINE AND SINE COEFFICIENTS
2876  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
2877  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
2878  -combined_field(:,2*nf,jindex),kind=8)
2879  END DO
2880  END DO
2881  END DO
2882  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
2883  !JLG, FEB 4, 2011
2884  !Padding is done by initialization of cu: cu = 0
2885  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
2886  !JLG, FEB 4, 2011
2887 
2888  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2889 
2890  ! Set the parameters for dfftw
2891  fft_dim=1; istride=1; ostride=1;
2892  !JLG, FEB 4, 2011
2893  !idist=N_r; inembed(1)=N_r; DIM(1)=N_r
2894  !odist=m_max; onembed(1)=m_max
2895  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
2896  odist=m_max_pad; onembed(1)=m_max_pad
2897  !JLG, FEB 4, 2011
2898 
2899  howmany=bloc_size*(nb_field1+nb_field2+nb_field3)/2
2900 
2901  t = mpi_wtime()
2902  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
2903  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
2904  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
2905  CALL dfftw_execute(fftw_plan_multi_c2r)
2906 
2907  ! clean up
2908  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
2909 
2910  !===ru(:,1:3,:) is first line of strain rate tensor
2911  !===ru(:,4:6,:) is 2nd line/column 2 and 3 of strain rate tensor + 3rd line/3rd column
2912  !===ru(:,7,:) is dynamical viscosity
2913 
2914  IF (nb_field1==6 .AND. nb_field2==6 .AND. nb_field3==2) THEN
2915  !===Compute visc_dyn times strain rate tensor
2916  prod_ru_1(:,1,:) = ru(:,7,:)*ru(:,1,:)
2917  prod_ru_1(:,2,:) = ru(:,7,:)*ru(:,2,:)
2918  prod_ru_1(:,3,:) = ru(:,7,:)*ru(:,3,:)
2919 
2920  prod_ru_2(:,1,:) = ru(:,7,:)*ru(:,4,:)
2921  prod_ru_2(:,2,:) = ru(:,7,:)*ru(:,5,:)
2922  prod_ru_2(:,3,:) = ru(:,7,:)*ru(:,6,:)
2923  !===End Compute visc_dyn times strain rate tensor
2924  ELSE
2925  CALL error_petsc('error on inputs while calling FFT_COMPUTE_DIFFU_MOM')
2926  END IF
2927 
2928  howmany = bloc_size*nb_field1/2 !vector
2929  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_1, &
2930  inembed, istride, idist, prod_cu_1, onembed, ostride, odist, fftw_estimate)
2931  CALL dfftw_execute(fftw_plan_multi_r2c)
2932  ! clean up
2933  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
2934 
2935  howmany = bloc_size*nb_field1/2 !vector
2936  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_2, &
2937  inembed, istride, idist, prod_cu_2, onembed, ostride, odist, fftw_estimate)
2938  CALL dfftw_execute(fftw_plan_multi_r2c)
2939  ! clean up
2940  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
2941 
2942  !JLG, FEB 4, 2011
2943  !prod_cu = prod_cu/N_r !Scaling
2944  prod_cu_1 = prod_cu_1*(1.d0/n_r_pad) !Scaling
2945  prod_cu_2 = prod_cu_2*(1.d0/n_r_pad) !Scaling
2946  !JLG, FEB 4, 2011
2947  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
2948 
2949  !Now we need to redistribute the Fourier coefficients on each processor
2950 
2951  t = mpi_wtime()
2952  combined_prod_cu_1(:,:,1)=prod_cu_1(1,:,:)
2953  combined_prod_cu_2(:,:,1)=prod_cu_2(1,:,:)
2954  DO n=2, m_max
2955  combined_prod_cu_1(:,:,n)=2*conjg(prod_cu_1(n,:,:))
2956  combined_prod_cu_2(:,:,n)=2*conjg(prod_cu_2(n,:,:))
2957  END DO
2958 
2959  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2960 
2961  t = mpi_wtime()
2962  longueur_tranche=bloc_size*m_max_c*nb_field1 !vectors
2963  mpid=mpi_double_precision
2964  CALL mpi_alltoall (combined_prod_cu_1,longueur_tranche,mpid, dist_prod_cu_1,longueur_tranche, &
2965  mpid,communicator,code)
2966  CALL mpi_alltoall (combined_prod_cu_2,longueur_tranche,mpid, dist_prod_cu_2,longueur_tranche, &
2967  mpid,communicator,code)
2968 
2969  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
2970 
2971  t = mpi_wtime()
2972  DO i = 1, m_max_c
2973  DO nb = 1, nb_procs
2974  shiftc = (nb-1)*bloc_size
2975  shiftl = (nb-1)*m_max_c
2976  intermediate_1 = dist_prod_cu_1(:,:,shiftl+i)
2977  intermediate_2 = dist_prod_cu_2(:,:,shiftl+i)
2978  DO n = 1, bloc_size
2979  IF (n+shiftc > np ) cycle
2980  DO i_field = 1, nb_field1/2
2981  v1_out(n+shiftc, i_field*2-1, i) = REAL (intermediate_1(i_field,n),KIND=8)
2982  v1_out(n+shiftc, i_field*2 , i) = aimag(intermediate_1(i_field,n))
2983  v2_out(n+shiftc, i_field*2-1, i) = REAL (intermediate_2(i_field,n),KIND=8)
2984  v2_out(n+shiftc, i_field*2 , i) = aimag(intermediate_2(i_field,n))
2985  END DO
2986  END DO
2987  END DO
2988  END DO
2989 
2990  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
2991 
2992  END SUBROUTINE fft_compute_diffu_mom
2993 
2994  SUBROUTINE fft_par_compr_entro_visc_dcl(communicator, V1_in, V2_in, c1_in, c_in_real, hloc_gauss, &
2995  coeff1_in_level, v_out, c_out, nb_procs, bloc_size, m_max_pad, temps)
2996  !This a de-aliased version of the code, FEB 4, 2011, JLG
2997  USE my_util
2998  USE input_data
2999  IMPLICIT NONE
3000  include 'fftw3.f'
3001  ! Format: V_1in(1:np,1:6,1:m_max_c)
3002  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3003  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3004  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V1_in, V2_in !vector
3005  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: c1_in !scalar
3006  REAL(KIND=8), DIMENSION(:,:), INTENT(IN) :: c_in_real
3007  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: hloc_gauss
3008  REAL(KIND=8), INTENT(IN) :: coeff1_in_level
3009  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: V_out
3010  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: c_out
3011  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
3012  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
3013  INTEGER :: np, np_tot, nb_field1, nb_field2, nb_field3
3014  INTEGER :: m_max, m_max_c, MPID, N_r_pad
3015  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
3016  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2)+SIZE(c1_in,2),bloc_size*nb_procs) :: dist_field
3017  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2)+SIZE(c1_in,2),bloc_size*nb_procs) :: combined_field
3018  COMPLEX(KIND=8), DIMENSION(m_max_pad, (2*SIZE(V1_in,2)+SIZE(c1_in,2))/2, bloc_size) :: cu
3019  REAL(KIND=8), DIMENSION(2*m_max_pad-1,(2*SIZE(V1_in,2)+SIZE(c1_in,2))/2,bloc_size) :: ru
3020  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size) :: norm_grad_phi
3021  REAL(KIND=8), DIMENSION(bloc_size*nb_procs) :: hloc_gauss_tot
3022  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size) :: visc_comp, visc_entro
3023  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
3024  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
3025  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2, bloc_size) :: intermediate
3026  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
3027  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
3028  COMPLEX(KIND=8), DIMENSION(m_max_pad,bloc_size) :: prod_cu2
3029  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru2
3030  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu2
3031  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu2
3032  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate2
3033  INTEGER :: i_field
3034  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
3035  REAL(KIND=8) :: t
3036  INTEGER :: rank
3037  ! FFTW parameters
3038  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
3039  INTEGER, DIMENSION(1) :: dim, inembed, onembed
3040  ! Recall complexes must be rescaled
3041  ! End FFTW parameters
3042 !#include "petsc/finclude/petsc.h"
3043  mpi_comm :: communicator
3044 
3045  CALL mpi_comm_rank(communicator, rank, code)
3046 
3047  IF (PRESENT(temps)) temps = 0.d0
3048 
3049  np = SIZE(v1_in,1)
3050  nb_field1= SIZE(v1_in,2) ! Number of fields
3051  nb_field2= SIZE(v2_in,2) ! Number of fields
3052  nb_field3= SIZE(c1_in,2) ! Number of fields
3053  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
3054  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
3055  n_r_pad = 2*m_max_pad-1
3056  np_tot = nb_procs*bloc_size
3057 
3058  IF (mod(nb_field1,2)/=0 .OR. mod(nb_field2,2)/=0 .OR. m_max_c==0) THEN
3059  WRITE(*,*) ' BUG '
3060  stop
3061  END IF
3062 
3063  ! Bloc_size is the number of points that are handled by one processor
3064  ! once the Fourier modes are all collected
3065  ! Computation of bloc_size and np_tot
3066  ! fin de la repartition des points
3067 
3068  ! Packing all 3 complex components of both v1 and v2 input fields
3069  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
3070  ! so that after distributing the data to the processes, each one will obtain a part
3071  ! on nodal points
3072  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
3073  t = mpi_wtime()
3074 
3075  DO i = 1, m_max_c
3076  dist_field(i,1:nb_field1,1:np) = transpose(v1_in(:,:,i))
3077  dist_field(i,nb_field1+1:2*nb_field1,1:np) = transpose(v2_in(:,:,i))
3078  dist_field(i,2*nb_field1+1:2*nb_field1+nb_field3,1:np) = transpose(c1_in(:,:,i))
3079  END DO
3080  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3081 
3082  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
3083 
3084  hloc_gauss_tot(1:np) = hloc_gauss
3085  IF (np/=np_tot) hloc_gauss_tot(np+1:np_tot) = 1.d100
3086 
3087  longueur_tranche=bloc_size*m_max_c*(nb_field1+nb_field2+nb_field3)
3088 
3089  t = mpi_wtime()
3090  mpid=mpi_double_precision
3091  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
3092  mpid, communicator, code)
3093  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
3094 
3095  t = mpi_wtime()
3096  !JLG, FEB 4, 2011
3097  cu = 0.d0
3098  !JLG, FEB 4, 2011
3099  DO n = 1, bloc_size
3100  DO nb = 1, nb_procs
3101  shiftc = (nb-1)*bloc_size
3102  shiftl = (nb-1)*m_max_c
3103  jindex = n + shiftc
3104  DO nf = 1, (nb_field1+nb_field2+nb_field3)/2
3105  ! Put real and imaginary parts in a complex
3106  ! nf=1,2,3 => V1_in
3107  ! nf=4 => V2_in
3108  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3109  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3110  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
3111  -combined_field(:,2*nf,jindex),kind=8)
3112  END DO
3113  END DO
3114  END DO
3115  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
3116  !JLG, FEB 4, 2011
3117  !Padding is done by initialization of cu: cu = 0
3118  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
3119  !JLG, FEB 4, 2011
3120 
3121  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3122 
3123  ! Set the parameters for dfftw
3124  fft_dim=1; istride=1; ostride=1;
3125  !JLG, FEB 4, 2011
3126  !idist=N_r; inembed(1)=N_r; DIM(1)=N_r
3127  !odist=m_max; onembed(1)=m_max
3128  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
3129  odist=m_max_pad; onembed(1)=m_max_pad
3130  !JLG, FEB 4, 2011
3131 
3132  howmany=bloc_size*(nb_field1+nb_field2+nb_field3)/2
3133 
3134  t = mpi_wtime()
3135  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
3136  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
3137  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
3138  CALL dfftw_execute(fftw_plan_multi_c2r)
3139 
3140  ! clean up
3141  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
3142 
3143  IF (nb_field1 == 6 .AND. nb_field2 == 6 .AND. nb_field3 == 2) THEN
3144  !===ru(:,1:3,:) = h*Grad(phi)
3145  !===ru(:,4:6,:) = h*Grad(phi_reg)
3146  !===ru(:,7,:) = phi
3147  !===c_in_real = visc_entro(:,:)
3148 
3149  !===Compute MAX(0,phi*(1-phi))
3150  prod_ru2(:,:) = max(0.d0, ru(:,7,:)*(1.d0 - ru(:,7,:)))
3151 
3152  !===Compute |Grad(phi_reg)|
3153  norm_grad_phi(:,:) = sqrt(ru(:,4,:)**2 + ru(:,5,:)**2 + ru(:,6,:)**2) + 1.d-14
3154 
3155  !===Compute visc_comp
3156  DO n = 1, bloc_size
3157  visc_entro(:,n) = -coeff1_in_level + c_in_real(:,n)
3158  visc_comp(:,n)= c_in_real(:,n)*inputs%level_set_comp_factor*prod_ru2(:,n)/norm_grad_phi(:,n)
3159  END DO
3160 
3161  !===Compute (coeff1_in_level-visc_entro)*Grad(phi) + visc_comp*Grad(phi_reg)
3162  prod_ru(:,1,:) = -visc_entro*ru(:,1,:) + visc_comp*ru(:,4,:)
3163  prod_ru(:,2,:) = -visc_entro*ru(:,2,:) + visc_comp*ru(:,5,:)
3164  prod_ru(:,3,:) = -visc_entro*ru(:,3,:) + visc_comp*ru(:,6,:)
3165 !TEST COUPEZ
3166 ! !===Compute MAX(0,phi*(1-phi))
3167 ! prod_ru2(:,:) = MAX(0.d0, ru(:,7,:)*(1.d0 - ru(:,7,:)))
3168 !
3169 ! !===Compute visc_comp
3170 ! DO n = 1, bloc_size
3171 ! visc_entro(:,n) = -coeff1_in_level + c_in_real(:,n)
3172 ! END DO
3173 !
3174 ! !===Compute (coeff1_in_level-visc_entro)*(h*Grad(phi))
3175 ! prod_ru(:,1,:) = -visc_entro*ru(:,1,:)
3176 ! prod_ru(:,2,:) = -visc_entro*ru(:,2,:)
3177 ! prod_ru(:,3,:) = -visc_entro*ru(:,3,:)
3178 !TEST COUPEZ
3179  ELSE
3180  CALL error_petsc('error in problem type while calling FFT_PAR_COMPR_ENTRO_VISC_DCL')
3181  END IF
3182 
3183  howmany = bloc_size*nb_field1/2
3184  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
3185  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
3186  CALL dfftw_execute(fftw_plan_multi_r2c)
3187  ! clean up
3188  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
3189 
3190  howmany = bloc_size*1
3191  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru2, &
3192  inembed, istride, idist, prod_cu2, onembed, ostride, odist, fftw_estimate)
3193  CALL dfftw_execute(fftw_plan_multi_r2c)
3194  ! clean up
3195  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
3196 
3197  !JLG, FEB 4, 2011
3198  !prod_cu = prod_cu/N_r !Scaling
3199  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
3200  prod_cu2 = prod_cu2*(1.d0/n_r_pad) !Scaling
3201  !JLG, FEB 4, 2011
3202  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
3203 
3204  !Now we need to redistribute the Fourier coefficients on each processor
3205 
3206  t = mpi_wtime()
3207  combined_prod_cu(:,:,1)=prod_cu(1,:,:)
3208  combined_prod_cu2(:,1)=prod_cu2(1,:)
3209  DO n=2, m_max
3210  combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
3211  combined_prod_cu2(:,n)=2*conjg(prod_cu2(n,:))
3212  END DO
3213 
3214  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3215 
3216  t = mpi_wtime()
3217  longueur_tranche=bloc_size*m_max_c*nb_field1 !vector
3218  mpid=mpi_double_precision
3219  CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
3220  mpid,communicator,code)
3221 
3222  longueur_tranche=bloc_size*m_max_c*2 !scalar
3223  mpid=mpi_double_precision
3224  CALL mpi_alltoall (combined_prod_cu2,longueur_tranche,mpid, dist_prod_cu2,longueur_tranche, &
3225  mpid,communicator,code)
3226 
3227  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
3228  ! dimensions:
3229  t = mpi_wtime()
3230 
3231  DO i = 1, m_max_c
3232  DO nb = 1, nb_procs
3233  shiftc = (nb-1)*bloc_size
3234  shiftl = (nb-1)*m_max_c
3235  intermediate = dist_prod_cu(:,:,shiftl+i)
3236  intermediate2 = dist_prod_cu2(:,shiftl+i)
3237  DO n = 1, bloc_size
3238  IF (n+shiftc > np ) cycle
3239  DO i_field = 1, nb_field1/2
3240  v_out(n+shiftc, i_field*2-1, i) = REAL (intermediate(i_field,n),KIND=8)
3241  v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
3242  END DO
3243  c_out(n+shiftc, 1, i) = REAL (intermediate2(n),KIND=8)
3244  c_out(n+shiftc, 2, i) = aimag(intermediate2(n))
3245  END DO
3246  END DO
3247  END DO
3248 
3249  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3250 
3251  END SUBROUTINE fft_par_compr_entro_visc_dcl
3252 
3253  SUBROUTINE fft_compute_entropy_visc_grad_mom(communicator, V1_in, V2_in, V3_in, c_in_real, &
3254  v_out, nb_procs, bloc_size, m_max_pad, temps)
3255  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
3256  !This a de-aliased version of the code, FEB 4, 2011, JLG
3257  USE my_util
3258  USE input_data
3259  IMPLICIT NONE
3260  include 'fftw3.f'
3261  ! Format: V_1in(1:np,1:6,1:m_max_c)
3262  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3263  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3264  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V1_in, V2_in, V3_in !Gradient of momentum
3265  REAL(KIND=8), DIMENSION(:,:), INTENT(IN) :: c_in_real !entropy viscosity
3266  REAL(KIND=8), DIMENSION(:,:,:,:),INTENT(OUT) :: V_out
3267  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
3268  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
3269  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(V1_in,2)*3/2, bloc_size) :: cu
3270  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)*3/2,bloc_size) :: ru
3271  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: visco_entro
3272  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru_1, prod_ru_2, prod_ru_3
3273  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu_1, prod_cu_2, prod_cu_3
3274 
3275  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size) :: intermediate_1, intermediate_2, intermediate_3
3276  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),3*SIZE(V1_in,2),bloc_size*nb_procs) :: dist_field, combined_field
3277  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_1
3278  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_2
3279  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu_3
3280  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_1
3281  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_2
3282  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu_3
3283  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, MPID, N_r_pad
3284  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
3285  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code, rank, i_field
3286  REAL(KIND=8) :: t
3287  ! FFTW parameters
3288  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
3289  INTEGER, DIMENSION(1) :: dim, inembed, onembed
3290  ! Recall complexes must be rescaled
3291  ! End FFTW parameters
3292 !#include "petsc/finclude/petsc.h"
3293  mpi_comm :: communicator
3294  CALL mpi_comm_rank(communicator, rank, code)
3295 
3296  IF (PRESENT(temps)) temps = 0.d0
3297 
3298  np = SIZE(v1_in,1)
3299  nb_field= SIZE(v1_in,2) ! Number of fields
3300  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
3301  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
3302  np_tot = nb_procs*bloc_size
3303  n_r_pad=2*m_max_pad-1
3304 
3305  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
3306  WRITE(*,*) ' BUG '
3307  stop
3308  END IF
3309 
3310  ! Packing all 3 complex components of both v1 and v2 input fields
3311  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
3312  ! so that after distributing the data to the processes, each one will obtain a part
3313  ! on nodal points
3314  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
3315  t = mpi_wtime()
3316 
3317  DO i = 1, m_max_c
3318  dist_field(i, 1:nb_field, 1:np) = transpose(v1_in(:,:,i))
3319  dist_field(i, nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
3320  dist_field(i,2*nb_field+1:3*nb_field,1:np) = transpose(v3_in(:,:,i))
3321  END DO
3322  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3323 
3324  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
3325 
3326  longueur_tranche=bloc_size*m_max_c*nb_field*3
3327 
3328  t = mpi_wtime()
3329  mpid=mpi_double_precision
3330  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
3331  mpid, communicator, code)
3332  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
3333 
3334  t = mpi_wtime()
3335  !JLG, FEB 4, 2011
3336  cu = 0.d0
3337  !JLG, FEB 4, 2011
3338  DO n = 1, bloc_size
3339  DO nb = 1, nb_procs
3340  shiftc = (nb-1)*bloc_size
3341  shiftl = (nb-1)*m_max_c
3342  jindex = n + shiftc
3343  DO nf = 1, nb_field*3/2
3344  ! Put real and imaginary parts in a complex
3345  ! nf=1,2,3 => V1_in
3346  ! nf=4,5,6 => V2_in
3347  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3348  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3349  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
3350  -combined_field(:,2*nf,jindex),kind=8)
3351  END DO
3352  END DO
3353  END DO
3354  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
3355  !JLG, FEB 4, 2011
3356  !Padding is done by initialization of cu: cu = 0
3357  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
3358  !JLG, FEB 4, 2011
3359 
3360  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3361 
3362  ! Set the parameters for dfftw
3363  fft_dim=1; istride=1; ostride=1;
3364  !JLG, FEB 4, 2011
3365 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
3366 !!$ odist=m_max; onembed(1)=m_max
3367  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
3368  odist=m_max_pad; onembed(1)=m_max_pad
3369  !JLG, FEB 4, 2011
3370 
3371  howmany=bloc_size*nb_field*3/2
3372 
3373  t = mpi_wtime()
3374  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
3375  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
3376  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
3377  CALL dfftw_execute(fftw_plan_multi_c2r)
3378  ! clean up
3379  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
3380 
3381  !===ru(:,1:3,:) is first line of momentum gradient
3382  !===ru(:,4:6,:) is second line of momentum gradient
3383  !===ru(:,7:9,:) is third line of momentum gradient
3384 
3385  !===Compute -LES_coeff1_mom + entropy viscosity
3386  DO n = 1, bloc_size
3387  visco_entro(:,n) = -inputs%LES_coeff1_mom + c_in_real(:,n)
3388  END DO
3389  !===End compute -LES_coeff1_mom + entropy viscosity
3390 
3391  !===Compute (-LES_coeff1_mom + visco_entro) times gradient(momentum)
3392  prod_ru_1(:,1,:) = visco_entro*ru(:,1,:)
3393  prod_ru_1(:,2,:) = visco_entro*ru(:,2,:)
3394  prod_ru_1(:,3,:) = visco_entro*ru(:,3,:)
3395 
3396  prod_ru_2(:,1,:) = visco_entro*ru(:,4,:)
3397  prod_ru_2(:,2,:) = visco_entro*ru(:,5,:)
3398  prod_ru_2(:,3,:) = visco_entro*ru(:,6,:)
3399 
3400  prod_ru_3(:,1,:) = visco_entro*ru(:,7,:)
3401  prod_ru_3(:,2,:) = visco_entro*ru(:,8,:)
3402  prod_ru_3(:,3,:) = visco_entro*ru(:,9,:)
3403  !===End compute (-LES_coeff1_mom + visco_entro) times gradient(momentum)
3404 
3405  howmany = bloc_size*nb_field/2
3406  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_1, &
3407  inembed, istride, idist, prod_cu_1, onembed, ostride, odist, fftw_estimate)
3408  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
3409  CALL dfftw_execute(fftw_plan_multi_r2c)
3410  ! clean up
3411  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
3412 
3413  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_2, &
3414  inembed, istride, idist, prod_cu_2, onembed, ostride, odist, fftw_estimate)
3415  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
3416  CALL dfftw_execute(fftw_plan_multi_r2c)
3417  ! clean up
3418  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
3419 
3420  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_3, &
3421  inembed, istride, idist, prod_cu_3, onembed, ostride, odist, fftw_estimate)
3422  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
3423  CALL dfftw_execute(fftw_plan_multi_r2c)
3424  ! clean up
3425  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
3426 
3427  prod_cu_1 = prod_cu_1*(1.d0/n_r_pad) !Scaling
3428  prod_cu_2 = prod_cu_2*(1.d0/n_r_pad) !Scaling
3429  prod_cu_3 = prod_cu_3*(1.d0/n_r_pad) !Scaling
3430 
3431  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
3432 
3433  !Now we need to redistribute the Fourier coefficients on each processor
3434  t = mpi_wtime()
3435  combined_prod_cu_1(:,:,1)=prod_cu_1(1,:,:)
3436  combined_prod_cu_2(:,:,1)=prod_cu_2(1,:,:)
3437  combined_prod_cu_3(:,:,1)=prod_cu_3(1,:,:)
3438 
3439  DO n=2, m_max
3440  combined_prod_cu_1(:,:,n)=2*conjg(prod_cu_1(n,:,:))
3441  combined_prod_cu_2(:,:,n)=2*conjg(prod_cu_2(n,:,:))
3442  combined_prod_cu_3(:,:,n)=2*conjg(prod_cu_3(n,:,:))
3443  END DO
3444 
3445  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3446 
3447  t = mpi_wtime()
3448  longueur_tranche=bloc_size*m_max_c*nb_field
3449  mpid=mpi_double_precision
3450  CALL mpi_alltoall (combined_prod_cu_1,longueur_tranche,mpid, dist_prod_cu_1,longueur_tranche, &
3451  mpid,communicator,code)
3452  CALL mpi_alltoall (combined_prod_cu_2,longueur_tranche,mpid, dist_prod_cu_2,longueur_tranche, &
3453  mpid,communicator,code)
3454  CALL mpi_alltoall (combined_prod_cu_3,longueur_tranche,mpid, dist_prod_cu_3,longueur_tranche, &
3455  mpid,communicator,code)
3456  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
3457 
3458  t = mpi_wtime()
3459  DO i = 1, m_max_c
3460  DO nb = 1, nb_procs
3461  shiftc = (nb-1)*bloc_size
3462  shiftl = (nb-1)*m_max_c
3463  intermediate_1 = dist_prod_cu_1(:,:,shiftl+i)
3464  intermediate_2 = dist_prod_cu_2(:,:,shiftl+i)
3465  intermediate_3 = dist_prod_cu_3(:,:,shiftl+i)
3466  DO n = 1, bloc_size
3467  IF (n+shiftc > np ) cycle
3468  DO i_field = 1, nb_field/2
3469  v_out(1,n+shiftc, i_field*2-1, i) = REAL (intermediate_1(i_field,n),KIND=8)
3470  v_out(1,n+shiftc, i_field*2 , i) = aimag(intermediate_1(i_field,n))
3471  v_out(2,n+shiftc, i_field*2-1, i) = REAL (intermediate_2(i_field,n),KIND=8)
3472  v_out(2,n+shiftc, i_field*2 , i) = aimag(intermediate_2(i_field,n))
3473  v_out(3,n+shiftc, i_field*2-1, i) = REAL (intermediate_3(i_field,n),KIND=8)
3474  v_out(3,n+shiftc, i_field*2 , i) = aimag(intermediate_3(i_field,n))
3475  END DO
3476  END DO
3477  END DO
3478  END DO
3479  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3480 
3481  END SUBROUTINE fft_compute_entropy_visc_grad_mom
3482 
3483  SUBROUTINE fft_no_overshoot_level_set(communicator, c1_inout, nb_procs, bloc_size, m_max_pad, temps)
3484  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
3485  !This a de-aliased version of the code, FEB 4, 2011, JLG
3486  USE my_util
3487  USE input_data
3488  IMPLICIT NONE
3489  include 'fftw3.f'
3490  ! Format: V_1in(1:np,1:6,1:m_max_c)
3491  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3492  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3493  REAL(KIND=8), DIMENSION(:,:,:), INTENT(INOUT) :: c1_inout
3494  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
3495  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
3496  REAL(KIND=8), DIMENSION(SIZE(c1_inout,3),SIZE(c1_inout,2),bloc_size*nb_procs) :: dist_field
3497  REAL(KIND=8), DIMENSION(SIZE(c1_inout,3),SIZE(c1_inout,2),bloc_size*nb_procs) :: combined_field
3498  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(c1_inout,2)/2, bloc_size) :: cu
3499  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(c1_inout,2)/2,bloc_size) :: ru
3500  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru_1
3501  COMPLEX(KIND=8), DIMENSION(m_max_pad,bloc_size) :: prod_cu_1
3502  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(c1_inout,3)*nb_procs) :: combined_prod_cu_1
3503  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(c1_inout,3)*nb_procs) :: dist_prod_cu_1
3504  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate_1
3505  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, MPID, N_r_pad
3506  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
3507  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code, rank
3508  REAL(KIND=8) :: t
3509  ! FFTW parameters
3510  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
3511  INTEGER, DIMENSION(1) :: dim, inembed, onembed
3512  ! Recall complexes must be rescaled
3513  ! End FFTW parameters
3514 !#include "petsc/finclude/petsc.h"
3515  mpi_comm :: communicator
3516 
3517  CALL mpi_comm_rank(communicator, rank, code)
3518 
3519  IF (PRESENT(temps)) temps = 0.d0
3520 
3521  np = SIZE(c1_inout,1)
3522  nb_field = SIZE(c1_inout,2) ! Number of fields for scalar
3523  m_max_c = SIZE(c1_inout,3) ! Number of complex (cosines + sines) coefficients per point
3524  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
3525  np_tot = nb_procs*bloc_size
3526  n_r_pad = 2*m_max_pad-1
3527 
3528  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
3529  WRITE(*,*) ' BUG '
3530  stop
3531  END IF
3532 
3533  ! Packing all 3 complex components of both v1 and v2 input fields
3534  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
3535  ! so that after distributing the data to the processes, each one will obtain a part
3536  ! on nodal points
3537  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
3538  t = mpi_wtime()
3539 
3540  DO i = 1, m_max_c
3541  dist_field(i, 1:nb_field, 1:np) = transpose(c1_inout(:,:,i))
3542  END DO
3543  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3544 
3545  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
3546 
3547  longueur_tranche=bloc_size*m_max_c*nb_field
3548 
3549  t = mpi_wtime()
3550  mpid=mpi_double_precision
3551  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
3552  mpid, communicator, code)
3553  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
3554 
3555  t = mpi_wtime()
3556  !JLG, FEB 4, 2011
3557  cu = 0.d0
3558  !JLG, FEB 4, 2011
3559  DO n = 1, bloc_size
3560  DO nb = 1, nb_procs
3561  shiftc = (nb-1)*bloc_size
3562  shiftl = (nb-1)*m_max_c
3563  jindex = n + shiftc
3564  DO nf = 1, nb_field/2
3565  ! Put real and imaginary parts in a complex
3566  ! nf=1,2,3 => V1_in
3567  ! nf=4,5,6 => V2_in
3568  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3569  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3570  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
3571  -combined_field(:,2*nf,jindex),kind=8)
3572  END DO
3573  END DO
3574  END DO
3575  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
3576  !JLG, FEB 4, 2011
3577  !Padding is done by initialization of cu: cu = 0
3578  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
3579  !JLG, FEB 4, 2011
3580 
3581  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3582 
3583  ! Set the parameters for dfftw
3584  fft_dim=1; istride=1; ostride=1;
3585  !JLG, FEB 4, 2011
3586 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
3587 !!$ odist=m_max; onembed(1)=m_max
3588  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
3589  odist=m_max_pad; onembed(1)=m_max_pad
3590  !JLG, FEB 4, 2011
3591 
3592  howmany=bloc_size*nb_field/2
3593 
3594  t = mpi_wtime()
3595  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
3596  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
3597  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
3598  CALL dfftw_execute(fftw_plan_multi_c2r)
3599 
3600  ! clean up
3601  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
3602 
3603  !===Compute MAX(0, MIN(1, c1_inout))
3604  DO n = 1, bloc_size
3605  prod_ru_1(:,n) = max(0.d0, min(1.d0, ru(:,1,n)))
3606  END DO
3607  !===End compute MAX(0, MIN(1, c1_inout))
3608 
3609  howmany = bloc_size*1
3610  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_1, &
3611  inembed, istride, idist, prod_cu_1, onembed, ostride, odist, fftw_estimate)
3612  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
3613  CALL dfftw_execute(fftw_plan_multi_r2c)
3614  ! clean up
3615  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
3616 
3617  prod_cu_1 = prod_cu_1*(1.d0/n_r_pad) !Scaling
3618 
3619  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
3620 
3621  !Now we need to redistribute the Fourier coefficients on each processor
3622  t = mpi_wtime()
3623  combined_prod_cu_1(:,1)=prod_cu_1(1,:)
3624 
3625  DO n=2, m_max
3626  combined_prod_cu_1(:,n)=2*conjg(prod_cu_1(n,:))
3627  END DO
3628 
3629  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3630 
3631  t = mpi_wtime()
3632  longueur_tranche=bloc_size*m_max_c*2
3633  mpid=mpi_double_precision
3634  CALL mpi_alltoall (combined_prod_cu_1,longueur_tranche,mpid, dist_prod_cu_1,longueur_tranche, &
3635  mpid,communicator,code)
3636  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
3637 
3638  t = mpi_wtime()
3639  DO i = 1, m_max_c
3640  DO nb = 1, nb_procs
3641  shiftc = (nb-1)*bloc_size
3642  shiftl = (nb-1)*m_max_c
3643  intermediate_1 = dist_prod_cu_1(:,shiftl+i)
3644  DO n = 1, bloc_size
3645  IF (n+shiftc > np ) cycle
3646  c1_inout(n+shiftc, 1, i) = REAL (intermediate_1(n),KIND=8)
3647  c1_inout(n+shiftc, 2, i) = aimag(intermediate_1(n))
3648  END DO
3649  END DO
3650  END DO
3651  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3652 
3653  END SUBROUTINE fft_no_overshoot_level_set
3654 
3655  SUBROUTINE fft_compression_level_set_dcl(communicator_F,communicator_S, V1_in, V2_in, c_in, c_out, &
3656  hloc_gauss, l_g, nb_procs, bloc_size, m_max_pad, temps)
3658 !JLG 09/ 2016
3659 USE user_data
3660 !JLG 09/ 2016
3661  !FFT (FFT(-1) V1 . FFT(-1) V2) = c_out
3662  !This a de-aliased version of the code, FEB 4, 2011, JLG
3663  IMPLICIT NONE
3664  include 'fftw3.f'
3665  ! Format: V_1in(1:np,1:6,1:m_max_c)
3666  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3667  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3668  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V1_in, V2_in
3669  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: c_in
3670  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: c_out
3671  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: hloc_gauss
3672  INTEGER, INTENT(IN) :: l_G
3673  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
3674  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
3675  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(V1_in,2)+SIZE(c_in,2)/2, bloc_size) :: cu
3676  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)+SIZE(c_in,2)/2,bloc_size) :: ru
3677  COMPLEX(KIND=8), DIMENSION(m_max_pad,bloc_size) :: prod_cu
3678  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru
3679  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate
3680  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2)+SIZE(c_in,2),bloc_size*nb_procs) :: dist_field
3681  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),2*SIZE(V1_in,2)+SIZE(c_in,2),bloc_size*nb_procs) :: combined_field
3682  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
3683  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
3684  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: norm_vel
3685  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size/l_G) :: norm_vel_int
3686  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: norm_grad_phi
3687  REAL(KIND=8), DIMENSION(bloc_size*nb_procs) :: hloc_gauss_tot
3688  REAL(KIND=8) :: hh, x, max_norm_vel_loc, max_norm_vel_loc_F, max_norm_vel_tot
3689  INTEGER :: rank, l
3690  INTEGER :: np, np_tot, nb_field, nb_field2, m_max, m_max_c, MPID, N_r_pad
3691  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
3692  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
3693  REAL(KIND=8) :: t
3694  ! FFTW parameters
3695  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
3696  INTEGER, DIMENSION(1) :: dim, inembed, onembed
3697  ! Recall complexes must be rescaled
3698  ! End FFTW parameters
3699 !#include "petsc/finclude/petsc.h"
3700  mpi_comm :: communicator_f
3701  mpi_comm :: communicator_s
3702 
3703  IF (PRESENT(temps)) temps = 0.d0
3704 
3705  CALL mpi_comm_rank(communicator_f, rank, code)
3706 
3707  np = SIZE(v1_in,1)
3708  nb_field = SIZE(v1_in,2) ! Number of fields
3709  nb_field2= SIZE(c_in,2) ! Number of fields
3710  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
3711  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
3712  np_tot = nb_procs*bloc_size
3713  n_r_pad = 2*m_max_pad-1
3714 
3715  IF (mod(nb_field,2)/=0 .OR. mod(nb_field2,2)/=0 .OR. m_max_c==0) THEN
3716  WRITE(*,*) ' BUG '
3717  stop
3718  END IF
3719 
3720  ! Packing all 3 complex components of both v1 and v2 input fields
3721  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
3722  ! so that after distributing the data to the processes, each one will obtain a part
3723  ! on nodal points
3724  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
3725  t = mpi_wtime()
3726 
3727  DO i = 1, m_max_c
3728  dist_field(i,1:nb_field,1:np) = transpose(v1_in(:,:,i))
3729  dist_field(i,nb_field+1:2*nb_field,1:np) = transpose(v2_in(:,:,i))
3730  dist_field(i,2*nb_field+1:2*nb_field+nb_field2,1:np) = transpose(c_in(:,:,i))
3731  END DO
3732  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3733 
3734  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
3735 
3736  hloc_gauss_tot(1:np) = hloc_gauss
3737  IF (np/=np_tot) hloc_gauss_tot(np+1:np_tot) = 1.d100
3738 
3739  longueur_tranche=bloc_size*m_max_c*(nb_field*2+nb_field2)
3740 
3741  t = mpi_wtime()
3742  mpid=mpi_double_precision
3743  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
3744  mpid, communicator_f, code)
3745  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
3746 
3747  t = mpi_wtime()
3748  !JLG, FEB 4, 2011
3749  cu = 0.d0
3750  !JLG, FEB 4, 2011
3751  DO n = 1, bloc_size
3752  DO nb = 1, nb_procs
3753  shiftc = (nb-1)*bloc_size
3754  shiftl = (nb-1)*m_max_c
3755  jindex = n + shiftc
3756  DO nf = 1, (nb_field+nb_field2/2)
3757  ! Put real and imaginary parts in a complex
3758  ! nf=1,2,3 => V1_in
3759  ! nf=4,5,6 => V2_in
3760  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3761  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3762  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
3763  -combined_field(:,2*nf,jindex),kind=8)
3764  END DO
3765  END DO
3766  END DO
3767  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
3768  !JLG, FEB 4, 2011
3769  !Padding is done by initialization of cu: cu = 0
3770  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
3771  !JLG, FEB 4, 2011
3772 
3773  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3774 
3775  ! Set the parameters for dfftw
3776  fft_dim=1; istride=1; ostride=1;
3777  !JLG, FEB 4, 2011
3778 !!$ idist=N_r; inembed(1)=N_r; DIM(1)=N_r
3779 !!$ odist=m_max; onembed(1)=m_max
3780  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
3781  odist=m_max_pad; onembed(1)=m_max_pad
3782  !JLG, FEB 4, 2011
3783 
3784  howmany=bloc_size*(nb_field+nb_field2/2)
3785 
3786 
3787  t = mpi_wtime()
3788  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
3789  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
3790  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
3791  CALL dfftw_execute(fftw_plan_multi_c2r)
3792  ! clean up
3793  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
3794 
3795  !===ru(:,1:3,:) is gradient(level_set)
3796  !===ru(:,4:6,:) is velocity
3797  !===ru(:,7,:) is level set
3798 
3799  !===Compute local velocity
3800  norm_vel(:,:) = sqrt(ru(:,4,:)**2 + ru(:,5,:)**2 + ru(:,6,:)**2)
3801  DO i = 1, 2*m_max_pad - 1
3802  DO l = 1, bloc_size/l_g
3803  x = maxval(norm_vel(i,(l-1)*l_g+1:l*l_g))
3804  norm_vel_int(i,l) = x
3805  END DO
3806  END DO
3807  IF (2*m_max_pad - 1 .GE. 3) THEN
3808  DO l = 1, bloc_size/l_g
3809  DO i = 2, 2*m_max_pad - 2
3810  norm_vel(i,(l-1)*l_g+1:l*l_g) = maxval(norm_vel_int(i-1:i+1,l))
3811  END DO
3812  norm_vel(1,(l-1)*l_g+1:l*l_g) = max(norm_vel_int(1,l),norm_vel_int(2,l),norm_vel_int(2*m_max_pad - 1,l))
3813  norm_vel(2*m_max_pad - 1,(l-1)*l_g+1:l*l_g) = &
3814  max(norm_vel_int(2*m_max_pad - 2,l),norm_vel_int(2*m_max_pad - 1,l),norm_vel_int(1,l))
3815  END DO
3816  ELSE
3817  DO l = 1, bloc_size/l_g
3818  norm_vel(1,(l-1)*l_g+1:l*l_g) = norm_vel_int(1,l)
3819  END DO
3820  END IF
3821  !===End compute local velocity
3822 
3823  !==Compute maximum of velocity
3824  max_norm_vel_loc=maxval(norm_vel)
3825  CALL mpi_allreduce(max_norm_vel_loc,max_norm_vel_loc_f,1,mpi_double_precision, mpi_max, communicator_f, code)
3826  CALL mpi_allreduce(max_norm_vel_loc_f,max_norm_vel_tot,1,mpi_double_precision, mpi_max, communicator_s, code)
3827  !==End compute maximum of velocity
3828 
3829  !===Compute |Grad(level_set)|
3830  norm_grad_phi(:,:) = sqrt(ru(:,1,:)**2 + ru(:,2,:)**2 + ru(:,3,:)**2)
3831 
3832  !===Compute compression term
3833 !JLG Sept 30 2016
3834  hh = inputs%h_min_distance
3835 !JLG 09/2016
3836 ! hh = user%ep
3837 !JLG 09/2016
3838  !hh = 2.d0*h_min !does not work well with small reg.
3839  max_norm_vel_tot=min(1.d0,max_norm_vel_tot)
3840  DO n = 1, bloc_size
3841 !TEST JLG LC 2016
3842  prod_ru(:,n) = inputs%level_set_comp_factor*4*inputs%LES_coeff4* &
3843  max_norm_vel_tot*(2.d0*regul_tab(ru(:,7,n),0.01d0)-1.d0)* &
3844  (max((1.d0 - (2*ru(:,7,n)-1.d0)**2),0.d0)/(2*hh)-norm_grad_phi(:,n))
3845 !TEST JLG LC 2016
3846  END DO
3847  !=End compute compression term
3848 
3849  howmany = bloc_size*1
3850  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
3851  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
3852  !write(*,*) ' FFT_PAR_DOT_PROD: fftw_plan_multi_r2c', fftw_plan_multi_r2c
3853  CALL dfftw_execute(fftw_plan_multi_r2c)
3854  ! clean up
3855  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
3856 
3857  !JLG, FEB 4, 2011
3858 !!$ prod_cu = prod_cu/N_r !Scaling
3859  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
3860  !JLG, FEB 4, 2011
3861  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
3862 
3863  !Now we need to redistribute the Fourier coefficients on each processor
3864  t = mpi_wtime()
3865  combined_prod_cu(:,1)=prod_cu(1,:)
3866  DO n=2, m_max
3867  combined_prod_cu(:,n)=2*conjg(prod_cu(n,:))
3868  END DO
3869 
3870  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3871 
3872  t = mpi_wtime()
3873  longueur_tranche=bloc_size*m_max_c*2
3874  mpid=mpi_double_precision
3875  CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
3876  mpid,communicator_f,code)
3877  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
3878 
3879  t = mpi_wtime()
3880  DO i = 1, m_max_c
3881  DO nb = 1, nb_procs
3882  shiftc = (nb-1)*bloc_size
3883  shiftl = (nb-1)*m_max_c
3884  intermediate = dist_prod_cu(:,shiftl+i)
3885  DO n = 1, bloc_size
3886  IF (n+shiftc > np ) cycle
3887  c_out(n+shiftc, 1, i) = REAL (intermediate(n),KIND=8)
3888  c_out(n+shiftc, 2 , i) = aimag(intermediate(n))
3889  END DO
3890  END DO
3891  END DO
3892  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3893 
3894  END SUBROUTINE fft_compression_level_set_dcl
3895 
3896  SUBROUTINE fft_par_entro_visc_dcl(communicator, V1_in, c1_in, c_in_real, hloc_gauss, &
3897  coeff1_in_level, v_out, c_out, nb_procs, bloc_size, m_max_pad, temps)
3898  !This a de-aliased version of the code, FEB 4, 2011, JLG
3899  USE my_util
3900  USE input_data
3901  IMPLICIT NONE
3902  include 'fftw3.f'
3903  ! Format: V_1in(1:np,1:6,1:m_max_c)
3904  ! INPUT ARE COSINE AND SINE COEFFICIENTS
3905  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
3906  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: V1_in !vector
3907  REAL(KIND=8), DIMENSION(:,:,:), INTENT(IN) :: c1_in !scalar
3908  REAL(KIND=8), DIMENSION(:,:), INTENT(IN) :: c_in_real
3909  REAL(KIND=8), DIMENSION(:), INTENT(IN) :: hloc_gauss
3910  REAL(KIND=8), INTENT(IN) :: coeff1_in_level
3911  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: V_out
3912  REAL(KIND=8), DIMENSION(:,:,:), INTENT(OUT) :: c_out
3913  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
3914  INTEGER, INTENT(IN) :: bloc_size, m_max_pad, nb_procs
3915  INTEGER :: np, np_tot, nb_field1, nb_field2
3916  INTEGER :: m_max, m_max_c, MPID, N_r_pad
3917  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
3918 
3919  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2)+SIZE(c1_in,2),bloc_size*nb_procs) :: dist_field
3920  REAL(KIND=8), DIMENSION(SIZE(V1_in,3),SIZE(V1_in,2)+SIZE(c1_in,2),bloc_size*nb_procs) :: combined_field
3921  COMPLEX(KIND=8), DIMENSION(m_max_pad, (SIZE(V1_in,2)+SIZE(c1_in,2))/2, bloc_size) :: cu
3922  REAL(KIND=8), DIMENSION(2*m_max_pad-1,(SIZE(V1_in,2)+SIZE(c1_in,2))/2,bloc_size) :: ru
3923  REAL(KIND=8), DIMENSION(bloc_size*nb_procs) :: hloc_gauss_tot
3924  REAL(KIND=8), DIMENSION(2*m_max_pad-1, bloc_size) :: visc_entro
3925  COMPLEX(KIND=8), DIMENSION(m_max_pad,SIZE(V1_in,2)/2,bloc_size) :: prod_cu
3926  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(V1_in,2)/2,bloc_size) :: prod_ru
3927  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2, bloc_size) :: intermediate
3928  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu
3929  COMPLEX(KIND=8), DIMENSION(SIZE(V1_in,2)/2,bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu
3930  COMPLEX(KIND=8), DIMENSION(m_max_pad,bloc_size) :: prod_cu2
3931  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru2
3932  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: combined_prod_cu2
3933  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(V1_in,3)*nb_procs) :: dist_prod_cu2
3934  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate2
3935  INTEGER :: i_field
3936  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code
3937  REAL(KIND=8) :: t
3938  INTEGER :: rank
3939 
3940  ! FFTW parameters
3941  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
3942  INTEGER, DIMENSION(1) :: dim, inembed, onembed
3943  ! Recall complexes must be rescaled
3944  ! End FFTW parameters
3945 !#include "petsc/finclude/petsc.h"
3946  mpi_comm :: communicator
3947 
3948  CALL mpi_comm_rank(communicator, rank, code)
3949 
3950  IF (PRESENT(temps)) temps = 0.d0
3951 
3952  np = SIZE(v1_in,1)
3953  nb_field1= SIZE(v1_in,2) ! Number of fields
3954  nb_field2= SIZE(c1_in,2) ! Number of fields
3955  m_max_c = SIZE(v1_in,3) ! Number of complex (cosines + sines) coefficients per point
3956  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
3957  n_r_pad = 2*m_max_pad-1
3958  np_tot = nb_procs*bloc_size
3959 
3960  IF (mod(nb_field1,2)/=0 .OR. mod(nb_field2,2)/=0 .OR. m_max_c==0) THEN
3961  WRITE(*,*) ' BUG '
3962  stop
3963  END IF
3964 
3965  ! Bloc_size is the number of points that are handled by one processor
3966  ! once the Fourier modes are all collected
3967  ! Computation of bloc_size and np_tot
3968  ! fin de la repartition des points
3969 
3970  ! Packing all 3 complex components of both v1 and v2 input fields
3971  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
3972  ! so that after distributing the data to the processes, each one will obtain a part
3973  ! on nodal points
3974  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
3975  t = mpi_wtime()
3976 
3977  DO i = 1, m_max_c
3978  dist_field(i,1:nb_field1,1:np) = transpose(v1_in(:,:,i))
3979  dist_field(i,nb_field1+1:nb_field1+nb_field2,1:np) = transpose(c1_in(:,:,i))
3980  END DO
3981  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
3982 
3983  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 1.d100
3984 
3985  hloc_gauss_tot(1:np) = hloc_gauss
3986  IF (np/=np_tot) hloc_gauss_tot(np+1:np_tot) = 1.d100
3987 
3988  longueur_tranche=bloc_size*m_max_c*(nb_field1+nb_field2)
3989 
3990  t = mpi_wtime()
3991  mpid=mpi_double_precision
3992  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
3993  mpid, communicator, code)
3994  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
3995 
3996  t = mpi_wtime()
3997  !JLG, FEB 4, 2011
3998  cu = 0.d0
3999  !JLG, FEB 4, 2011
4000  DO n = 1, bloc_size
4001  DO nb = 1, nb_procs
4002  shiftc = (nb-1)*bloc_size
4003  shiftl = (nb-1)*m_max_c
4004  jindex = n + shiftc
4005  DO nf = 1, (nb_field1+nb_field2)/2
4006  ! Put real and imaginary parts in a complex
4007  ! nf=1,2,3 => V1_in
4008  ! nf=4 => V2_in
4009  ! INPUT ARE COSINE AND SINE COEFFICIENTS
4010  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
4011  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
4012  -combined_field(:,2*nf,jindex),kind=8)
4013  END DO
4014  END DO
4015  END DO
4016  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
4017  !JLG, FEB 4, 2011
4018  !Padding is done by initialization of cu: cu = 0
4019  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
4020  !JLG, FEB 4, 2011
4021 
4022  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
4023 
4024  ! Set the parameters for dfftw
4025  fft_dim=1; istride=1; ostride=1;
4026  !JLG, FEB 4, 2011
4027  !idist=N_r; inembed(1)=N_r; DIM(1)=N_r
4028  !odist=m_max; onembed(1)=m_max
4029  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
4030  odist=m_max_pad; onembed(1)=m_max_pad
4031  !JLG, FEB 4, 2011
4032 
4033  howmany=bloc_size*(nb_field1+nb_field2)/2
4034 
4035  t = mpi_wtime()
4036  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
4037  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
4038  !write(*,*) ' FFT_PAR_CROSS_PROD: fftw_plan_multi_c2r', fftw_plan_multi_c2r
4039  CALL dfftw_execute(fftw_plan_multi_c2r)
4040 
4041  ! clean up
4042  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
4043 
4044  IF (nb_field1 == 6 .AND. nb_field2 == 2) THEN
4045  !===ru(:,1:3,:) = h*Grad(phi)
4046  !===ru(:,4,:) = phi
4047  !===c_in_real = visc_entro(:,:)
4048 
4049  !===Compute MAX(0,phi*(1-phi))
4050  prod_ru2(:,:) = max(0.d0, ru(:,4,:)*(1.d0 - ru(:,4,:)))
4051 
4052  !===Compute visc_entro
4053  DO n = 1, bloc_size
4054  visc_entro(:,n) = -coeff1_in_level + c_in_real(:,n)
4055  END DO
4056 
4057  !===Compute (coeff1_in_level-visc_entro)*(h*Grad(phi))
4058  prod_ru(:,1,:) = -visc_entro*ru(:,1,:)
4059  prod_ru(:,2,:) = -visc_entro*ru(:,2,:)
4060  prod_ru(:,3,:) = -visc_entro*ru(:,3,:)
4061  ELSE
4062  CALL error_petsc('error in problem type while calling FFT_PAR_ENTRO_VISC_DCL')
4063  END IF
4064 
4065  howmany = bloc_size*nb_field1/2
4066  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru, &
4067  inembed, istride, idist, prod_cu, onembed, ostride, odist, fftw_estimate)
4068  CALL dfftw_execute(fftw_plan_multi_r2c)
4069  ! clean up
4070  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
4071 
4072  howmany = bloc_size*1
4073  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru2, &
4074  inembed, istride, idist, prod_cu2, onembed, ostride, odist, fftw_estimate)
4075  CALL dfftw_execute(fftw_plan_multi_r2c)
4076  ! clean up
4077  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
4078 
4079  !JLG, FEB 4, 2011
4080  !prod_cu = prod_cu/N_r !Scaling
4081  prod_cu = prod_cu*(1.d0/n_r_pad) !Scaling
4082  prod_cu2 = prod_cu2*(1.d0/n_r_pad) !Scaling
4083  !JLG, FEB 4, 2011
4084  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
4085 
4086  !Now we need to redistribute the Fourier coefficients on each processor
4087 
4088  t = mpi_wtime()
4089  combined_prod_cu(:,:,1)=prod_cu(1,:,:)
4090  combined_prod_cu2(:,1)=prod_cu2(1,:)
4091  DO n=2, m_max
4092  combined_prod_cu(:,:,n)=2*conjg(prod_cu(n,:,:))
4093  combined_prod_cu2(:,n)=2*conjg(prod_cu2(n,:))
4094  END DO
4095 
4096  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
4097 
4098  t = mpi_wtime()
4099  longueur_tranche=bloc_size*m_max_c*nb_field1 !vector
4100  mpid=mpi_double_precision
4101  CALL mpi_alltoall (combined_prod_cu,longueur_tranche,mpid, dist_prod_cu,longueur_tranche, &
4102  mpid,communicator,code)
4103 
4104  longueur_tranche=bloc_size*m_max_c*2 !scalar
4105  mpid=mpi_double_precision
4106  CALL mpi_alltoall (combined_prod_cu2,longueur_tranche,mpid, dist_prod_cu2,longueur_tranche, &
4107  mpid,communicator,code)
4108 
4109  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
4110  ! dimensions:
4111  t = mpi_wtime()
4112 
4113  DO i = 1, m_max_c
4114  DO nb = 1, nb_procs
4115  shiftc = (nb-1)*bloc_size
4116  shiftl = (nb-1)*m_max_c
4117  intermediate = dist_prod_cu(:,:,shiftl+i)
4118  intermediate2 = dist_prod_cu2(:,shiftl+i)
4119  DO n = 1, bloc_size
4120  IF (n+shiftc > np ) cycle
4121  DO i_field = 1, nb_field1/2
4122  v_out(n+shiftc, i_field*2-1, i) = REAL (intermediate(i_field,n),KIND=8)
4123  v_out(n+shiftc, i_field*2 , i) = aimag(intermediate(i_field,n))
4124  END DO
4125  c_out(n+shiftc, 1, i) = REAL (intermediate2(n),KIND=8)
4126  c_out(n+shiftc, 2, i) = aimag(intermediate2(n))
4127  END DO
4128  END DO
4129  END DO
4130 
4131  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
4132 
4133  END SUBROUTINE fft_par_entro_visc_dcl
4134 
4135  SUBROUTINE fft_par_scal_funct(communicator, c1_inout, funct, nb_procs, bloc_size, m_max_pad, temps)
4136  ! MODIFICATION: based on FFT_NO_OVERSHOOT_LEVEL_SET
4137  ! used to compute a scalar field which is a function of another one
4138  ! uses an user defined function of the condlim
4139  USE my_util
4140  USE input_data
4141  USE boundary
4142  IMPLICIT NONE
4143  include 'fftw3.f'
4144  ! Format: V_1in(1:np,1:6,1:m_max_c)
4145  ! INPUT ARE COSINE AND SINE COEFFICIENTS
4146  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
4147  REAL(KIND=8), DIMENSION(:,:,:), INTENT(INOUT) :: c1_inout
4148  INTERFACE
4149  FUNCTION funct(x) RESULT(vv)
4150  IMPLICIT NONE
4151  REAL(KIND=8) :: x
4152  REAL(KIND=8) :: vv
4153  END FUNCTION funct
4154  END INTERFACE
4155  INTEGER, INTENT(IN) :: nb_procs, bloc_size, m_max_pad
4156  REAL(KIND=8), DIMENSION(:), OPTIONAL, INTENT(INOUT) :: temps
4157  REAL(KIND=8), DIMENSION(SIZE(c1_inout,3),SIZE(c1_inout,2),bloc_size*nb_procs) :: dist_field
4158  REAL(KIND=8), DIMENSION(SIZE(c1_inout,3),SIZE(c1_inout,2),bloc_size*nb_procs) :: combined_field
4159  COMPLEX(KIND=8), DIMENSION(m_max_pad, SIZE(c1_inout,2)/2, bloc_size) :: cu
4160  REAL(KIND=8), DIMENSION(2*m_max_pad-1,SIZE(c1_inout,2)/2,bloc_size) :: ru
4161  REAL(KIND=8), DIMENSION(2*m_max_pad-1,bloc_size) :: prod_ru_1
4162  COMPLEX(KIND=8), DIMENSION(m_max_pad,bloc_size) :: prod_cu_1
4163  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(c1_inout,3)*nb_procs) :: combined_prod_cu_1
4164  COMPLEX(KIND=8), DIMENSION(bloc_size,SIZE(c1_inout,3)*nb_procs) :: dist_prod_cu_1
4165  COMPLEX(KIND=8), DIMENSION(bloc_size) :: intermediate_1
4166  INTEGER :: np, np_tot, nb_field, m_max, m_max_c, MPID, N_r_pad
4167  INTEGER(KIND=8) :: fftw_plan_multi_c2r, fftw_plan_multi_r2c
4168  INTEGER :: nb, nf, shiftc, shiftl, jindex, longueur_tranche, i, n, code, rank
4169  REAL(KIND=8) :: t
4170  ! FFTW parameters
4171  INTEGER :: fft_dim, howmany, istride, ostride, idist, odist
4172  INTEGER, DIMENSION(1) :: dim, inembed, onembed
4173  ! Recall complexes must be rescaled
4174  ! End FFTW parameters
4175 !#include "petsc/finclude/petsc.h"
4176  mpi_comm :: communicator
4177 
4178  CALL mpi_comm_rank(communicator, rank, code)
4179 
4180  IF (PRESENT(temps)) temps = 0.d0
4181 
4182  np = SIZE(c1_inout,1)
4183  nb_field = SIZE(c1_inout,2) ! Number of fields for scalar
4184  m_max_c = SIZE(c1_inout,3) ! Number of complex (cosines + sines) coefficients per point
4185  m_max = m_max_c*nb_procs! Number of comlex coefficients per point per processor
4186  np_tot = nb_procs*bloc_size
4187  n_r_pad = 2*m_max_pad-1
4188 
4189  IF (mod(nb_field,2)/=0 .OR. m_max_c==0) THEN
4190  WRITE(*,*) ' BUG '
4191  stop
4192  END IF
4193 
4194  ! Packing all 3 complex components of both v1 and v2 input fields
4195  ! into dist_field, where the dimension indexing the nodal points varies the least rapidly,
4196  ! so that after distributing the data to the processes, each one will obtain a part
4197  ! on nodal points
4198  ! TRANSPOSE pr que la variable i associee aux modes soit la 1ere sur laquelle on va faire la FFT
4199  t = mpi_wtime()
4200 
4201  DO i = 1, m_max_c
4202  dist_field(i, 1:nb_field, 1:np) = transpose(c1_inout(:,:,i))
4203  END DO
4204  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
4205 
4206  IF (np/=np_tot) dist_field(:,:,np+1:np_tot) = 0.d0
4207 
4208  longueur_tranche=bloc_size*m_max_c*nb_field
4209 
4210  t = mpi_wtime()
4211  mpid=mpi_double_precision
4212  CALL mpi_alltoall (dist_field,longueur_tranche, mpid, combined_field, longueur_tranche, &
4213  mpid, communicator, code)
4214  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
4215 
4216  t = mpi_wtime()
4217  cu = 0.d0
4218  DO n = 1, bloc_size
4219  DO nb = 1, nb_procs
4220  shiftc = (nb-1)*bloc_size
4221  shiftl = (nb-1)*m_max_c
4222  jindex = n + shiftc
4223  DO nf = 1, nb_field/2
4224  ! Put real and imaginary parts in a complex
4225  ! nf=1,2,3 => V1_in
4226  ! nf=4,5,6 => V2_in
4227  ! INPUT ARE COSINE AND SINE COEFFICIENTS
4228  ! THEY ARE PUT IN COMPLEX FORMAT: c_0 = a_0 + i*0 and c_n = (a_n-i*b_n)/2
4229  cu(shiftl+1:shiftl+m_max_c,nf,n) = 0.5d0*cmplx(combined_field(:,2*nf-1,jindex),&
4230  -combined_field(:,2*nf,jindex),kind=8)
4231  END DO
4232  END DO
4233  END DO
4234  cu(1,:,:) = 2*cmplx(REAL(cu(1,:,:),KIND=8),0.d0,KIND=8)
4235  !Padding is done by initialization of cu: cu = 0
4236  !This is eequivalent to cu(m_max+1:m_max_pad,:,:) = 0.d0
4237 
4238  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
4239 
4240  ! Set the parameters for dfftw
4241  fft_dim=1; istride=1; ostride=1;
4242  idist=n_r_pad; inembed(1)=n_r_pad; dim(1)=n_r_pad
4243  odist=m_max_pad; onembed(1)=m_max_pad
4244 
4245  howmany=bloc_size*nb_field/2
4246 
4247  t = mpi_wtime()
4248  CALL dfftw_plan_many_dft_c2r(fftw_plan_multi_c2r, fft_dim, dim, howmany, cu, &
4249  onembed, ostride, odist, ru, inembed, istride, idist, fftw_estimate)
4250  CALL dfftw_execute(fftw_plan_multi_c2r)
4251 
4252  ! clean up
4253  CALL dfftw_destroy_plan(fftw_plan_multi_c2r)
4254 
4255  !===Computation in the physical space
4256  DO i = 1, 2*m_max_pad-1
4257  DO n = 1, bloc_size
4258  prod_ru_1(i,n) = funct(ru(i,1,n))
4259  END DO
4260  END DO
4261  !===End computation in the physical space
4262 
4263  howmany = bloc_size*1
4264  CALL dfftw_plan_many_dft_r2c(fftw_plan_multi_r2c, fft_dim, dim, howmany, prod_ru_1, &
4265  inembed, istride, idist, prod_cu_1, onembed, ostride, odist, fftw_estimate)
4266  CALL dfftw_execute(fftw_plan_multi_r2c)
4267  ! clean up
4268  CALL dfftw_destroy_plan(fftw_plan_multi_r2c)
4269 
4270  prod_cu_1 = prod_cu_1*(1.d0/n_r_pad) !Scaling
4271 
4272  IF (PRESENT(temps)) temps(2) = temps(2) + mpi_wtime() -t
4273 
4274  !Now we need to redistribute the Fourier coefficients on each processor
4275  t = mpi_wtime()
4276  combined_prod_cu_1(:,1)=prod_cu_1(1,:)
4277 
4278  DO n=2, m_max
4279  combined_prod_cu_1(:,n)=2*conjg(prod_cu_1(n,:))
4280  END DO
4281 
4282  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
4283 
4284  t = mpi_wtime()
4285  longueur_tranche=bloc_size*m_max_c*2
4286  mpid=mpi_double_precision
4287  CALL mpi_alltoall (combined_prod_cu_1,longueur_tranche,mpid, dist_prod_cu_1,longueur_tranche, &
4288  mpid,communicator,code)
4289  IF (PRESENT(temps)) temps(1) = temps(1) + mpi_wtime() -t
4290 
4291  t = mpi_wtime()
4292  DO i = 1, m_max_c
4293  DO nb = 1, nb_procs
4294  shiftc = (nb-1)*bloc_size
4295  shiftl = (nb-1)*m_max_c
4296  intermediate_1 = dist_prod_cu_1(:,shiftl+i)
4297  DO n = 1, bloc_size
4298  IF (n+shiftc > np ) cycle
4299  c1_inout(n+shiftc, 1, i) = REAL (intermediate_1(n),KIND=8)
4300  c1_inout(n+shiftc, 2, i) = aimag(intermediate_1(n))
4301  END DO
4302  END DO
4303  END DO
4304  IF (PRESENT(temps)) temps(3) = temps(3) + mpi_wtime() -t
4305 
4306  END SUBROUTINE fft_par_scal_funct
4307 
4308 END MODULE sft_parallele
subroutine, public fft_compute_entropy_visc(communicator, communicator_S, V1_in, V2_in, V3_in, V4_in, V5_in, hloc_gauss, V1_out, V2_out, V3_out, nb_procs, bloc_size, m_max_pad, residual_normalization, l_G, temps)
subroutine, public fft_par_compr_entro_visc_dcl(communicator, V1_in, V2_in, c1_in, c_in_real, hloc_gauss, coeff1_in_level, V_out, c_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_max_vel_dcl(communicator, V1_in, V_out, nb_procs, bloc_size, m_max_pad)
integer, public l_g
subroutine, public fft_par_var_eta_prod_t_dcl(communicator, eta, H_mesh, c_in, c_out, nb_procs, bloc_size, m_max_pad, time, temps)
subroutine, public fft_tensor_dcl(communicator, V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps, opt_tension)
subroutine, public fft_heaviside_dcl(communicator, V1_in, values, V_out, nb_procs, bloc_size, m_max_pad, coeff_tanh, temps)
subroutine, public fft_compute_entropy_visc_grad_mom(communicator, V1_in, V2_in, V3_in, c_in_real, V_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_scalar_vect_no_overshoot(communicator, scalar_bounds, V1_in, V2_in, V_out, pb, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_par_scal_funct(communicator, c1_inout, funct, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_no_overshoot_level_set(communicator, c1_inout, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_par_var_eta_prod_gauss_dcl(communicator, eta, H_mesh, c_in, c_out, nb_procs, bloc_size, m_max_pad, rr_gauss, time, temps)
subroutine error_petsc(string)
Definition: my_util.f90:16
type(my_data), public inputs
subroutine, public fft_par_cross_prod_dcl(communicator, V1_in, V2_in, V_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_check_interface(communicator, V1_in, nb_fluids, interface_ok, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_compute_diffu_mom(communicator, V1_in, V2_in, V3_in, V1_out, V2_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_par_prod_dcl(communicator, c1_in, c2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
real(kind=8) function, public regul(phi, eps)
subroutine, public fft_compute_entropy_visc_mom(communicator, communicator_S, V1_in, V2_in, V3_in, c1_in, hloc_gauss, c1_real_out, nb_procs, bloc_size, m_max_pad, l_G, opt_c2_real_out, temps)
subroutine, public fft_compression_level_set_dcl(communicator_F, communicator_S, V1_in, V2_in, c_in, c_out, hloc_gauss, l_G, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_scalar_vect_dcl(communicator, V1_in, V2_in, V_out, pb, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_par_entro_visc_dcl(communicator, V1_in, c1_in, c_in_real, hloc_gauss, coeff1_in_level, V_out, c_out, nb_procs, bloc_size, m_max_pad, temps)
real(kind=8) function, dimension(size(phi)) regul_tab(phi, eps)
subroutine, public fft_par_dot_prod_dcl(communicator, V1_in, V2_in, c_out, nb_procs, bloc_size, m_max_pad, temps)
subroutine, public fft_par_real(communicator, V1_in, V_out, opt_nb_plane)