! scddns -- Copyright 2022 Paolo Luchini and Maurizio Quadrio
! http://CPLcode.net/Applications/FluidMechanics/Spectral-CD-DNS

!( Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. !)

USE complex
USE rbmat
! USE rtchecks

! Read discretization and simulation parameters from initial field
FILE data=OPENRO[IF COMMANDLINE.HI>0 THEN COMMANDLINE(1) ELSE "scddns0.dat"]
INTEGER CONSTANT nx,ny,nz
REAL CONSTANT alpha0,beta0,htcoeff,zmin=0,zmax=2,t_max,dt_save
REAL Re, nu, meanpx=0, meanpy=0, meanflowx=0, meanflowy=0, deltat, time=0
STRING Vfield
READ BY NAME FROM data nx,ny,nz,alpha0,beta0,htcoeff,Re; nu=1/Re
READ BY NAME FROM data deltat, t_max, dt_save
LOOP UNTIL EOF(data) OR READ BY NAME FROM data Vfield
  IF NOT READ BY NAME FROM data meanpx OR meanflowx OR meanpy OR meanflowy THEN
    READ FROM data
  END IF
REPEAT
! Define wall-normal mesh
! Alternative Options for uniform or channel-type with two walls and finer spacing near the walls
REAL z(-1..nz+1)
!DO z(i)=zmin+(zmax-zmin)*i/nz FOR ALL i ! Uniform mesh
DO z(i)=zmin+0.5*(zmax-zmin)*{tanh[htcoeff*(2*i/nz-1)]/tanh(htcoeff)+1} FOR ALL i ! Channel with two walls

! USE serialconvolutions
USE mpiconvolutions
IF have00 THEN WRITE BY NAME nx,ny,nz,2*PI/alpha0,2*PI/beta0,Re
IF have00 THEN WRITE BY NAME deltat,t_max,dt_save

! Generate random initial field in case none is provided 
LOOP FOR ALL ix EXCEPT ix=0
  DO WITH V(ix,iy,iz)
    eps=0.002; u=~+eps*EXP[RAND()*2*PI*I]; v=~+eps*EXP[RAND()*2*PI*I]; w=~+eps*EXP[RAND()*2*PI*I]
  FOR ALL iy AND iz=1 TO nz-1
REPEAT
IF have00 THEN DO V(0,0,iz).u.REAL=1.2*z(iz)^(1/7); V(0,0,nz-iz).u.REAL=V(0,0,iz).u.REAL FOR iz=0 TO nz DIV 2

! Read the initial velocity field if present
IF Vfield#NULL STRING THEN
  INTEGER oldnx=nx, oldny=ny, oldnz=nz
  STRING oldVfield
  IF Vfield#"" THEN
    data = OPENRO(Vfield)
    WRITE "Reading from "Vfield
    READ FROM data "nx="oldnx,"ny="oldny,"nz="oldnz
    LOOP UNTIL READ BY NAME FROM data "Vfield="oldVfield
      READ FROM data
    REPEAT
  END IF
  POINTER TO STORED STRUCTURE[CHAR skip[1..POSITION(data)]; ARRAY(0..oldnx,-oldny..oldny,-1..oldnz+1) OF VELOCITY oldV] image=data
  LOOP FOR ix=MAX(V.LO1,image.oldV.LO1) TO MIN(V.HI1,image.oldV.HI1) AND iy=MAX(V.LO2,image.oldV.LO2) TO MIN(V.HI2,image.oldV.HI2)
    DO V(ix,iy,iz)=image.oldV[ix,iy,-1+(oldnz+2)*(iz+1)DIV(nz+2)] FOR ALL iz
  REPEAT LOOP
END IF
CLOSE data

STRUCTURE[ARRAY(-2..2) OF REAL d0,d1,d2,d4] derivatives(1..nz-1)
ARRAY(-2..2) OF REAL d040,d140,d14m1, d04n,d14n,d14np1
MODULE setup_derivatives
  REAL M(0..4,0..4),t(0..4)
  LOOP FOR iz=1 TO nz-1 WITH derivatives(iz)
    DO M(i,j)=(z(iz-2+j)-z(iz))**(4-i) FOR ALL i,j; LUdecomp M
    t=0; t(0)=24
    d4(-2+(*))=M\t
    DO M(i,j)=(5-i)*(6-i)*(7-i)*(8-i)*(z(iz-2+j)-z(iz))**(4-i) FOR ALL i,j; LUdecomp M
    DO t(i)=SUM {d4(j)*(z(iz+j)-z(iz))**(8-i)} FOR ALL j FOR ALL i
    d0(-2+(*))=M\t
    DO M(i,j)=(z(iz-2+j)-z(iz))**(4-i) FOR ALL i,j; LUdecomp M
    t=0; DO t(i)=SUM d0(j)*(4-i)*(3-i)*(z(iz+j)-z(iz))**(2-i) FOR ALL j FOR i=0 TO 2
    d2(-2+(*))=M\t
    t=0; DO t(i)=SUM d0(j)*(4-i)*(z(iz+j)-z(iz))**(3-i) FOR ALL j FOR i=0 TO 3
    d1(-2+(*))=M\t
  REPEAT
  DO M(i,j)=(z(-1+j)-z(0))**(4-i) FOR ALL i,j; LUdecomp M
  t=0; t(3)=1; d140(-2+(*))=M\t
  DO M(i,j)=(z(-1+j)-z(-1))**(4-i) FOR ALL i,j; LUdecomp M
  t=0; t(3)=1; d14m1(-2+(*))=M\t
  d04n=0; d04n(1)=1; d040=0; d040(-1)=1
  DO M(i,j)=(z(nz-3+j)-z(nz))**(4-i) FOR ALL i,j; LUdecomp M
  t=0; t(3)=1; d14n(-2+(*))=M\t
  DO M(i,j)=(z(nz-3+j)-z(nz+1))**(4-i) FOR ALL i,j; LUdecomp M
  t=0; t(3)=1; d14np1(-2+(*))=M\t
END setup_derivatives

INLINE REAL FUNCTION D0(REAL f(*)) = d0(-2)*f(-2)+d0(-1)*f(-1)+d0(0)*f(0)+d0(1)*f(1)+d0(2)*f(2)
INLINE REAL FUNCTION D1(REAL f(*)) = d1(-2)*f(-2)+d1(-1)*f(-1)+d1(0)*f(0)+d1(1)*f(1)+d1(2)*f(2)
INLINE REAL FUNCTION D2(REAL f(*)) = d2(-2)*f(-2)+d2(-1)*f(-1)+d2(0)*f(0)+d2(1)*f(1)+d2(2)*f(2)
INLINE REAL FUNCTION D4(REAL f(*)) = d4(-2)*f(-2)+d4(-1)*f(-1)+d4(0)*f(0)+d4(1)*f(1)+d4(2)*f(2)
INLINE COMPLEX FUNCTION D0(COMPLEX f(*))=D0(f.REAL)+I*D0(f.IMAG)
INLINE COMPLEX FUNCTION D1(COMPLEX f(*))=D1(f.REAL)+I*D1(f.IMAG)
INLINE COMPLEX FUNCTION D2(COMPLEX f(*))=D2(f.REAL)+I*D2(f.IMAG)
INLINE COMPLEX FUNCTION D4(COMPLEX f(*))=D4(f.REAL)+I*D4(f.IMAG)

maxtimelevels=1
RHSTYPE=ARRAY(V.LO1..V.HI1,V.LO2..V.HI2) OF STRUCTURE(COMPLEX zeta,D2w)
ARRAY(-2..0) OF POINTER TO RHSTYPE newrhs
DO newrhs(i) = NEW RHSTYPE FOR ALL i
ARRAY(maxtimelevels,1..nz-1) OF RHSTYPE oldrhs=0

INLINE FUNCTION OS(INTEGER iz,i)=nu*[d4(i)-2*k2*d2(i)+k2*k2*d0(i)]
INLINE FUNCTION SQ(INTEGER iz,i)=nu*[d2(i)-k2*d0(i)]
SUBROUTINE buildrhs[SUBROUTINE(COMPLEX rhs^,old^(*),unknown,implicit_part,explicit_part) timescheme]
  VVstart=-3
  convolutions(-1)
  LOOP FOR iz=-2 TO nz-1
    IF iz+2-VVstart>VV.HI3 THEN
      VV(*,*,-2..1)=VV[*,*,nprocy+(-2..1)]
      VVstart=iz
      convolutions(iz+2)
    END IF
    EXCEPT iz<1
    WITH derivatives(iz) LOOP FOR ALL ix,iy
      ialpha=I*alpha0*ix; ibeta=I*beta0*iy
      k2=(alpha0*ix)**2+(beta0*iy)**2
      WITH VV[ix,iy,iz-VVstart+(-2..2)], V[ix,iy,iz+(-2..2)]:
      rhsu=-ialpha*D0(uu)-ibeta*D0(uv)-D1(uw)
      rhsv=-ialpha*D0(uv)-ibeta*D0(vv)-D1(vw)
      rhsw=-ialpha*D0(uw)-ibeta*D0(vw)-D1(ww)
      D2wimpl = SUM OS(iz,i)*w(i) FOR i=-2 TO 2
      timescheme{newrhs(0,ix,iy).D2w,oldrhs(*,iz,ix,iy).D2w,D2(w)-k2*D0(w),
                 D2wimpl,
                 ialpha*[ialpha*D1(uu)+ibeta*D1(uv)+D2(uw)]+
                 ibeta*[ialpha*D1(uv)+ibeta*D1(vv)+D2(vw)]-k2*rhsw}
      IF ix=0 AND iy=0 THEN
        ! u media conservata in zeta.REAL e v media in zeta.IMAG
        timescheme{newrhs(0,0,0).zeta,oldrhs(*,iz,0,0).zeta,D0(u.REAL)+D0(v.REAL)*I,
                   nu*D2(u.REAL)+nu*D2(v.REAL)*I,
                   rhsu.REAL+meanpx+[rhsv.REAL+meanpy]*I}
      ELSE
        zetaimpl=SUM SQ(iz,i)*[ibeta*u(i)-ialpha*v(i)] FOR i=-2 TO 2
        timescheme{newrhs(0,ix,iy).zeta,oldrhs(*,iz,ix,iy).zeta,ibeta*D0(u)-ialpha*D0(v),
          zetaimpl,
          ibeta*rhsu-ialpha*rhsv}
      END IF
      V(ix,iy,iz-2).u=newrhs(-2,ix,iy).zeta; V(ix,iy,iz-2).w=newrhs(-2,ix,iy).D2w
    REPEAT LOOP
    temp=newrhs(-2); newrhs(-2)=newrhs(-1); newrhs(-1)=newrhs(0) ;newrhs(0)=temp
  REPEAT LOOP
  DO V(ix,iy,nz+i).u=newrhs(i,ix,iy).zeta; V(ix,iy,nz+i).w=newrhs(i,ix,iy).D2w FOR ALL ix,iy AND i=-2 TO -1
END buildrhs

SUBROUTINE bcLUdecomp[POINTER TO ARRAY(-1..nz+1,-2..2) OF REAL A]
  A(0,-1..HI)=~-A(0,-2)/A(-1,-2)*A(-1,-1..HI)
  A(1,-1..HI)=~-A(1,-2)/A(-1,-2)*A(-1,-1..HI)
  A(1,0..HI)=~-A(1,-1)/A(0,-1)*A(0,0..HI)
  A[2,-1+(0..HI)]=~-A(2,-2)/A(0,-1)*A(0,0..HI)
  A(nz,LO..1)=~-A(nz,2)/A(nz+1,2)*A(nz+1,LO..1)
  A(nz-1,LO..1)=~-A(nz-1,2)/A(nz+1,2)*A(nz+1,LO..1)
  A(nz-1,LO..0)=~-A(nz-1,1)/A(nz,1)*A(nz,LO..0)
  A[nz-2,1+(LO..0)]=~-A(nz-2,2)/A(nz,1)*A(nz,LO..0)
  LUdecomp A(1..nz-1)
END bcLUdecomp

SUBROUTINE bcLeftDiv[POINTER TO ARRAY(*) OF REAL x; ARRAY(-1..nz+1,-2..2) OF REAL A]
  x(0)=~-A(0,-2)/A(-1,-2)*x(-1)
  x(1)=~-A(1,-2)/A(-1,-2)*x(-1)
  x(1)=~-A(1,-1)/A(0,-1)*x(0)
  x(2)=~-A(2,-2)/A(0,-1)*x(0)
  x(nz)=~-A(nz,2)/A(nz+1,2)*x(nz+1)
  x(nz-1)=~-A(nz-1,2)/A(nz+1,2)*x(nz+1)
  x(nz-1)=~-A(nz-1,1)/A(nz,1)*x(nz)
  x(nz-2)=~-A(nz-2,2)/A(nz,1)*x(nz)
  x(1..nz-1)=A(1..nz-1)\~
  x(0)={~-A(0,0..2)*x[1+(0..2)]}/A(0,-1)
  x(-1)={~-A(-1,-1..2)*x[1+(-1..2)]}/A(-1,-2)
  x(nz)={~-A(nz,-2..0)*x[nz-1+(-2..0)]}/A(nz,1)
  x(nz+1)={~-A(nz+1,-2..1)*x[nz-1+(-2..1)]}/A(nz+1,2)
END bcLeftDiv

ARRAY(-1..nz+1,-2..2) OF REAL D0mat, zetamat, D2wmat
D0mat(1..nz-1)=derivatives.d0
D0mat(-1)=0; D0mat(-1,-2)=1; D0mat(0)=0; D0mat(0,-1)=1
D0mat(nz)=0; D0mat(nz,1)=1; D0mat(nz+1)=0; D0mat(nz+1,2)=1
bcLUdecomp D0mat

REAL FUNCTION zintegr(REAL f(*))
  RESULT=0
  LOOP FOR iz=0 TO nz BY 2 
    zp1=z(iz+1)-z(iz); zm1=z(iz-1)-z(iz) 
    a1=-1/3*zm1+1/6*zp1+1/6*zp1*zp1/zm1
    a3=+1/3*zp1-1/6*zm1-1/6*zm1*zm1/zp1
    a2=zp1-zm1-a1-a3
    RESULT=~+a1*f(iz-1) + a2*f(iz) + a3*f(iz+1)    
  REPEAT
END zintegr

SUBROUTINE deriv(ARRAY(*) OF REAL f0,f1^)
  f1(0)=SUM d140(i)*f0(1+i) FOR i=-2 TO 2
  f1(-1)=SUM d14m1(i)*f0(1+i) FOR i=-2 TO 2
  f1(nz)=SUM d14n(i)*f0(nz-1+i) FOR i=-2 TO 2
  f1(nz+1)=SUM d14np1(i)*f0(nz-1+i) FOR i=-2 TO 2
  DO WITH derivatives(i) f1(i)=D1(f0(i+(*))) FOR i=1 TO nz-1
  bcLeftDiv(f1,D0mat)
END deriv

SUBROUTINE linsolve(REAL lambda)
  LOOP FOR ALL ix,iy
    ialpha=I*alpha0*ix; ibeta=I*beta0*iy
    k2=(alpha0*ix)**2+(beta0*iy)**2
    LOOP FOR iz=1 TO nz-1 AND ALL i WITH derivatives(iz)
      D2wmat(iz,i)=lambda*[d2(i)-k2*d0(i)]-OS(iz,i)
      zetamat(iz,i)=lambda*d0(i)-SQ(iz,i) 
    REPEAT
    D2wmat(0)=d040; D2wmat(-1)=d140
    D2wmat(nz)=d04n; D2wmat(nz+1)=d14n
    zetamat(0)=d040; zetamat(-1)=derivatives(1).d4
    zetamat(nz)=d04n; zetamat(nz+1)=derivatives(nz-1).d4
    bcLUdecomp D2wmat; bcLUdecomp zetamat
    WITH V(ix,iy,*):
    w(0)=0; w(-1)=0; w(nz)=0; w(nz+1)=0
    bcLeftDiv(w.REAL,D2wmat); bcLeftDiv(w.IMAG,D2wmat)
    u(0)=0; u(-1)=0; u(nz)=0; u(nz+1)=0
    bcLeftDiv(u.REAL,zetamat); bcLeftDiv(u.IMAG,zetamat)
    IF ix=0 AND iy=0 THEN
      v=u.IMAG; u.IMAG=0
      IF ABS(meanflowx)>1E-10 OR ABS(meanflowy)>1E-10 THEN
        REAL ucor(-1..nz+1); DO ucor(iz)=1 FOR iz=1 TO nz-1
        ucor(0)=0; ucor(-1)=0; ucor(nz)=0; ucor(nz+1)=0
        bcLeftDiv(ucor,zetamat)
        IF ABS(meanflowx)>1E-10 THEN u.REAL=~+(meanflowx-zintegr(u.REAL))/zintegr(ucor)*ucor
        IF ABS(meanflowy)>1E-10 THEN v.REAL=~+(meanflowy-zintegr(v.REAL))/zintegr(ucor)*ucor
      END IF
    ELSE
      deriv(w.REAL,v.REAL); deriv(w.IMAG,v.IMAG)
      DO temp=(ialpha*v(iz)-ibeta*u(iz))/k2
        v(iz)=(ibeta*v(iz)+ialpha*u(iz))/k2 
        u(iz)=temp
      FOR iz=-1 TO nz+1
    END IF
  REPEAT
END linsolve

SUBROUTINE RK1_rai(COMPLEX rhs^,old^(*),unkn,impl,expl)
  rhs=120/32/deltat*unkn+impl+2*expl
  old(1)=expl
END RK1_rai
CONSTANT REAL RK1_rai_coeff=120/32

SUBROUTINE RK2_rai(COMPLEX rhs^,old^(*),unkn,impl,expl)
  rhs=120/(8*deltat)*unkn+impl+50/8*expl-34/8*old(1)
  old(1)=expl
END RK2_rai
CONSTANT REAL RK2_rai_coeff=120/8

SUBROUTINE RK3_rai(COMPLEX rhs^,old^(*),unkn,impl,expl)
  rhs=120/(20*deltat)*unkn+impl+90/20*expl-50/20*old(1)
END RK3_rai
CONSTANT REAL RK3_rai_coeff=120/20          


! Main time-integration loop
INTEGER count=0
LOOP timeloop WHILE time < t_max-deltat/2
  ! run three Runge-Kutta substeps
  buildrhs(RK1_rai); linsolve(RK1_rai_coeff/deltat)
  time=~+2/RK1_rai_coeff*deltat
  buildrhs(RK2_rai); linsolve(RK2_rai_coeff/deltat)
  time=~+2/RK2_rai_coeff*deltat
  buildrhs(RK3_rai); linsolve(RK3_rai_coeff/deltat)
  time=~+2/RK3_rai_coeff*deltat
  
  IF FLOOR[(time+deltat/2)/dt_save] > FLOOR[(time-deltat/2)/dt_save] THEN
    ! Save the velocity file for later post-processing and/or restart
    INC count
    LOOP FOR i=0 TO totnproc-1
      ! this is here to keep different processes from simultaneous writing;
      ! whether simultaneous writing is allowed depends on the filesystem.
      IF i>0 THEN MPI_Barrier(MPI_COMM_WORLD)
      IF myiproc=i THEN
        field = OPEN("scddns"count".dat")
        WRITE TO field "nx="nx,"ny="ny,"nz="nz,"alpha0="alpha0,"beta0="beta0,"htcoeff="htcoeff,"Re="1/nu
        WRITE BY NAME TO field deltat, t_max, dt_save
        WRITE BY NAME TO field meanpx,meanflowx,meanpy,meanflowy
        WRITE TO field "Vfield="
        POINTER TO STORED STRUCTURE[CHAR skip[1..POSITION(field)]; ARRAY(0..nx,-ny..ny,-1..nz+1) OF VELOCITY saveV] image=field
        image.saveV(V.LO1..V.HI1,V.LO2..V.HI2)=V
        CLOSE field
      END IF
    REPEAT
  END IF
  ! Minimalistic runtime output to screen: 
  ! friction at the two walls, pressure gradient and bulk velocity
  IF have00 THEN WRITE time,nu*SUM d140(i)*V(0,0,1+i).u.REAL FOR i=-2 TO 2, 
  -nu*SUM d14n(i)*V(0,0,nz-1+i).u.REAL FOR i=-2 TO 2,
  meanpx, zintegr(V(0,0,*).u.REAL)/2
REPEAT timeloop