USE parallel
INTEGER iproc,nproc
IF COMMANDLINE.HI=0 THEN iproc=1; nproc=1 ELSE
  iproc=atoi(COMMANDLINE(1)); nproc=atoi(COMMANDLINE(2))
END IF
baseport=IPPORT_USERRESERVED+257
FILE prev,next
IF iproc<nproc THEN
  next=TCPSERVER(baseport+iproc)
  setvbuf(next,NULL,_IOFBF,8192)
END IF
IF iproc>1 THEN
  prev=TCPCLIENT(COMMANDLINE(3),baseport+iproc-1)
  setvbuf(prev,NULL,_IOFBF,8192)
END IF
first==(prev=NULL FILE); last==(next=NULL FILE); has_terminal==last
nsmpmax=8
REAL Reynolds,shift

INTEGER FUNCTION nxl(INTEGER nx)=CEILING[(iproc-1)∗(nx-1)/nproc]-1
INTEGER FUNCTION nxh(INTEGER nx)=CEILING[iproc∗(nx-1)/nproc]+1

USE symbolic
domsize=1

VARS=STRUCTURED ARRAY(p,u,v,w) OF REAL
VFIELD=ARRAY(∗,∗,∗) OF VARS

XPDIR=4; XNDIR=8

SUBROUTINE updatenbrs(VFIELD f^; INTEGER parity)
IF iproc BITAND 1 =0 OR f.LENGTH<=4 THEN
  IF NOT first THEN READ BINARY FROM prev f.u(LO)
  IF parity>1 THEN
    IF NOT first AND parity BITAND XPDIR=0 THEN READ BINARY FROM prev f(LO+1)
    IF NOT first AND parity BITAND XPDIR#0 THEN READ BINARY FROM prev f(LO+1).p,f(LO+1).v,f(LO+1).w
  ELSE
    IF NOT first THEN READ BINARY FROM prev f.v(LO+1),f.w(LO+1)
    IF NOT first THEN DO READ BINARY FROM prev f.p(LO+1,iy,iz),f.u(LO+1,iy,iz)
      FOR iy=1 TO f.HI2-1 AND iz=2-[(f.LO1+1+iy+parity) BITAND 1] TO f.HI3-1 BY 2
  END IF
  IF NOT last THEN WRITE BINARY TO next f.u(HI-2)
  IF parity>1 THEN
    IF NOT last AND parity BITAND XNDIR=0 THEN WRITE BINARY TO next f(HI-1); FLUSH next
    IF NOT last AND parity BITAND XNDIR#0 THEN WRITE BINARY TO next f(HI-1).p,f(HI-1).v,f(HI-1).w; FLUSH next
    IF NOT last AND parity BITAND XNDIR#0 THEN READ BINARY FROM next f(HI-1).u
    IF NOT last THEN READ BINARY FROM next f(HI)
    IF NOT first AND parity BITAND XPDIR#0 THEN WRITE BINARY TO prev f(LO+1).u
    IF NOT first THEN WRITE BINARY TO prev f(LO+2); FLUSH prev
  ELSE
    IF NOT last THEN WRITE BINARY TO next f.v(HI-1),f.w(HI-1)
    IF NOT last THEN DO WRITE BINARY TO next f.p(HI-1,iy,iz),f.u(HI-1,iy,iz)
      FOR iy=1 TO f.HI2-1 AND iz=2-[(f.HI1-1+iy+parity) BITAND 1] TO f.HI3-1 BY 2; FLUSH next
    IF NOT last THEN DO READ BINARY FROM next f.u(HI-1,iy,iz),f.p(HI,iy,iz)
      FOR iy=1 TO f.HI2-1 AND iz=2-[(f.HI1+iy+parity) BITAND 1] TO f.HI3-1 BY 2
    IF NOT last THEN READ BINARY FROM next f(HI,∗,∗,1..3)
    IF NOT first THEN DO WRITE BINARY TO prev f.u(LO+1,iy,iz),f.p(LO+2,iy,iz)
      FOR iy=1 TO f.HI2-1 AND iz=2-[(f.LO1+iy+parity) BITAND 1] TO f.HI3-1 BY 2
    IF NOT first THEN WRITE BINARY TO prev f(LO+2,∗,∗,1..3); FLUSH prev
  END IF
ELSE
  IF NOT last THEN WRITE BINARY TO next f.u(HI-2)
  IF parity>1 THEN
    IF NOT last AND parity BITAND XNDIR=0 THEN WRITE BINARY TO next f(HI-1); FLUSH next
    IF NOT last AND parity BITAND XNDIR#0 THEN WRITE BINARY TO next f(HI-1).p,f(HI-1).v,f(HI-1).w; FLUSH next
  ELSE
    IF NOT last THEN WRITE BINARY TO next f.v(HI-1),f.w(HI-1)
    IF NOT last THEN DO WRITE BINARY TO next f.p(HI-1,iy,iz),f.u(HI-1,iy,iz)
      FOR iy=1 TO f.HI2-1 AND iz=2-[(f.HI1-1+iy+parity) BITAND 1] TO f.HI3-1 BY 2; FLUSH next
  END IF
  IF NOT first THEN READ BINARY FROM prev f.u(LO)
  IF parity>1 THEN
    IF NOT first AND parity BITAND XPDIR=0 THEN READ BINARY FROM prev f(LO+1)
    IF NOT first AND parity BITAND XPDIR#0 THEN READ BINARY FROM prev f(LO+1).p,f(LO+1).v,f(LO+1).w
    IF NOT first AND parity BITAND XPDIR#0 THEN WRITE BINARY TO prev f(LO+1).u
    IF NOT first THEN WRITE BINARY TO prev f(LO+2); FLUSH prev
    IF NOT last AND  parity BITAND XNDIR#0 THEN READ BINARY FROM next f(HI-1).u
    IF NOT last THEN READ BINARY FROM next f(HI)
  ELSE
    IF NOT first THEN READ BINARY FROM prev f.v(LO+1),f.w(LO+1)
    IF NOT first THEN DO READ BINARY FROM prev f.p(LO+1,iy,iz),f.u(LO+1,iy,iz)
      FOR iy=1 TO f.HI2-1 AND iz=2-[(f.LO1+1+iy+parity) BITAND 1] TO f.HI3-1 BY 2
    IF NOT first THEN DO WRITE BINARY TO prev f.u(LO+1,iy,iz),f.p(LO+2,iy,iz)
      FOR iy=1 TO f.HI2-1 AND iz=2-[(f.LO1+iy+parity) BITAND 1] TO f.HI3-1 BY 2
    IF NOT first THEN WRITE BINARY TO prev f(LO+2,∗,∗,1..3); FLUSH prev
    IF NOT last THEN DO READ BINARY FROM next f.u(HI-1,iy,iz),f.p(HI,iy,iz)
      FOR iy=1 TO f.HI2-1 AND iz=2-[(f.HI1+iy+parity) BITAND 1] TO f.HI3-1 BY 2
    IF NOT last THEN READ BINARY FROM next f(HI,∗,∗,1..3)
  END IF
END IF
END updatenbrs

SUBROUTINE accumulate(REAL acc^; INTEGER nx,ny,nz)
  REAL r
  IF NOT first THEN READ BINARY FROM prev r; acc=~+r
  IF last THEN acc=acc/nx/ny/nz ELSE
    WRITE BINARY TO next acc; FLUSH next; READ BINARY FROM next acc
  END IF  
  IF NOT first THEN WRITE BINARY TO prev acc; FLUSH prev
END accumulate

REAL FUNCTION scp(VFIELD f1,f2)
  IF f1.HI-1<f1.LO+2 THEN RESULT=0 ELSE
  !  RESULT=f1(LO+2..HI-1,LO+2..HI-1,LO+2..HI-1,1..3)|f2(LO+2..HI-1,LO+2..HI-1,LO+2..HI-1,1..3)
    nsmp=MIN(nsmpmax,CEILING(f1.HI2/8))
    SHARED REAL sum(0..nsmp-1)=0
    IF f1.HI-1>=f1.LO+2 THEN PARALLEL LOOP FOR ismp=0 TO nsmp-1
      nyl=ROUND[ismp/nsmp∗(f1.HI2-1)]+1; nyh=ROUND[(ismp+1)/nsmp∗(f1.HI2-1)]
      sum(ismp)=f1(LO+2..HI-1,nyl..nyh,LO+2..HI-1)|f2(LO+2..HI-1,nyl..nyh,LO+2..HI-1)
    REPEAT
    RESULT=SUM sum(ismp) FOR ALL ismp
  END IF
  accumulate(RESULT,f1.HI1,f1.HI2,f1.HI3)
END scp

REAL FUNCTION abs(VFIELD ff)=SQRT(scp(ff,ff))

REAL FUNCTION abs(ARRAY(∗,∗,∗) OF REAL ff) WITH ff
  RESULT=NORM(ff(LO+2..HI-1,∗,∗))
  accumulate(RESULT,ff.HI1,ff.HI2,ff.HI3)
  RESULT=SQRT(RESULT)
END abs

relax2=1
cnt==[u(-1,0,0)-u(0,0,0)+v(0,-1,0)-v(0,0,0)+w(0,0,-1)-w(0,0,0)]/dx
! #define limiter(V) 1-V
#define limiter(V) IF V>2 THEN 0 ELSE IF V<-2 THEN -2∗V ELSE 1-V∗(1-V/4)
! #define limiter(V) IF V>1 THEN 0 ELSE IF V<-1 THEN -2∗V ELSE 1-V
! #define limiter(V) IF V>0 THEN 1 ELSE 1-2∗V
!(
thr==0.5∗shift/udxx
#define limiter(V) IF V>1+thr THEN -thr ELSE IF V<-1-thr THEN -2∗V-thr ELSE 1-V
!)
upu==Rdxq∗[b.u(0,0,0)+b.u(1,0,0)]
umu==-Rdxq∗[b.u(-1,0,0)+b.u(0,0,0)]
vpu==Rdxq∗[b.v(0,0,0)+b.v(1,0,0)]
vmu==-Rdxq∗[b.v(0,-1,0)+b.v(1,-1,0)]
wpu==Rdxq∗[b.w(0,0,0)+b.w(1,0,0)]
wmu==-Rdxq∗[b.w(0,0,-1)+b.w(1,0,-1)]
cupu==limiter(upu)∗udxx
cumu==limiter(umu)∗udxx
cvpu==limiter(vpu)∗udxx
cvmu==limiter(vmu)∗udxx
cwpu==limiter(wpu)∗udxx
cwmu==limiter(wmu)∗udxx
c0u==shift+cupu+cumu+cvpu+cvmu+cwpu+cwmu
c0uabs==shift+relax2∗[ABS(cupu)+ABS(cumu)+ABS(cvpu)+ABS(cvmu)+ABS(cwpu)+ABS(cwmu)]
uupw==cupu∗u(1,0,0)+cumu∗u(-1,0,0)+cvpu∗u(0,1,0)+cvmu∗u(0,-1,0)+cwpu∗u(0,0,1)+cwmu∗u(0,0,-1)-c0u∗u(0,0,0)-[p(1,0,0)-p(0,0,0)]/dx
upv==Rdxq∗[b.u(0,0,0)+b.u(0,1,0)]
umv==-Rdxq∗[b.u(-1,0,0)+b.u(-1,1,0)]
vpv==Rdxq∗[b.v(0,0,0)+b.v(0,1,0)]
vmv==-Rdxq∗[b.v(0,-1,0)+b.v(0,0,0)]
wpv==Rdxq∗[b.w(0,0,0)+b.w(0,1,0)]
wmv==-Rdxq∗[b.w(0,0,-1)+b.w(0,1,-1)]
cupv==limiter(upv)∗udxx
cumv==limiter(umv)∗udxx
cvpv==limiter(vpv)∗udxx
cvmv==limiter(vmv)∗udxx
cwpv==limiter(wpv)∗udxx
cwmv==limiter(wmv)∗udxx
c0v==shift+cupv+cumv+cvpv+cvmv+cwpv+cwmv
c0vabs==shift+relax2∗[ABS(cupv)+ABS(cumv)+ABS(cvpv)+ABS(cvmv)+ABS(cwpv)+ABS(cwmv)]
vupw==cupv∗v(1,0,0)+cumv∗v(-1,0,0)+cvpv∗v(0,1,0)+cvmv∗v(0,-1,0)+cwpv∗v(0,0,1)+cwmv∗v(0,0,-1)-c0v∗v(0,0,0)-[p(0,1,0)-p(0,0,0)]/dx
upw==Rdxq∗[b.u(0,0,0)+b.u(0,0,1)]
umw==-Rdxq∗[b.u(-1,0,0)+b.u(-1,0,1)]
vpw==Rdxq∗[b.v(0,0,0)+b.v(0,0,1)]
vmw==-Rdxq∗[b.v(0,-1,0)+b.v(0,-1,1)]
wpw==Rdxq∗[b.w(0,0,0)+b.w(0,0,1)]
wmw==-Rdxq∗[b.w(0,0,-1)+b.w(0,0,0)]
cupw==limiter(upw)∗udxx
cumw==limiter(umw)∗udxx
cvpw==limiter(vpw)∗udxx
cvmw==limiter(vmw)∗udxx
cwpw==limiter(wpw)∗udxx
cwmw==limiter(wmw)∗udxx
c0w==shift+cupw+cumw+cvpw+cvmw+cwpw+cwmw
c0wabs==shift+relax2∗[ABS(cupw)+ABS(cumw)+ABS(cvpw)+ABS(cvmw)+ABS(cwpw)+ABS(cwmw)]
wupw==cupw∗w(1,0,0)+cumw∗w(-1,0,0)+cvpw∗w(0,1,0)+cvmw∗w(0,-1,0)+cwpw∗w(0,0,1)+cwmw∗w(0,0,-1)-c0w∗w(0,0,0)-[p(0,0,1)-p(0,0,0)]/dx

ucd=={(1-Rdxq∗[u(0,0,0)+u(1,0,0)])∗[u(1,0,0)-u(0,0,0)]+(1+Rdxq∗[u(-1,0,0)+u(0,0,0)])∗[u(-1,0,0)-u(0,0,0)]+
  (1-Rdxq∗[v(0,0,0)+v(1,0,0)])∗[u(0,1,0)-u(0,0,0)]+(1+Rdxq∗[v(0,-1,0)+v(1,-1,0)])∗[u(0,-1,0)-u(0,0,0)]+
  (1-Rdxq∗[w(0,0,0)+w(1,0,0)])∗[u(0,0,1)-u(0,0,0)]+(1+Rdxq∗[w(0,0,-1)+w(1,0,-1)])∗[u(0,0,-1)-u(0,0,0)]}∗udxx-[p(1,0,0)-p(0,0,0)]/dx
vcd=={(1-Rdxq∗[u(0,0,0)+u(0,1,0)])∗[v(1,0,0)-v(0,0,0)]+(1+Rdxq∗[u(-1,0,0)+u(-1,1,0)])∗[v(-1,0,0)-v(0,0,0)]+
  (1-Rdxq∗[v(0,0,0)+v(0,1,0)])∗[v(0,1,0)-v(0,0,0)]+(1+Rdxq∗[v(0,-1,0)+v(0,0,0)])∗[v(0,-1,0)-v(0,0,0)]+
  (1-Rdxq∗[w(0,0,0)+w(0,1,0)])∗[v(0,0,1)-v(0,0,0)]+(1+Rdxq∗[w(0,0,-1)+w(0,1,-1)])∗[v(0,0,-1)-v(0,0,0)]}∗udxx-[p(0,1,0)-p(0,0,0)]/dx
wcd=={(1-Rdxq∗[u(0,0,0)+u(0,0,1)])∗[w(1,0,0)-w(0,0,0)]+(1+Rdxq∗[u(-1,0,0)+u(-1,0,1)])∗[w(-1,0,0)-w(0,0,0)]+
  (1-Rdxq∗[v(0,0,0)+v(0,0,1)])∗[w(0,1,0)-w(0,0,0)]+(1+Rdxq∗[v(0,-1,0)+v(0,-1,1)])∗[w(0,-1,0)-w(0,0,0)]+
  (1-Rdxq∗[w(0,0,0)+w(0,0,1)])∗[w(0,0,1)-w(0,0,0)]+(1+Rdxq∗[w(0,0,-1)+w(0,0,0)])∗[w(0,0,-1)-w(0,0,0)]}∗udxx-[p(0,0,1)-p(0,0,0)]/dx
!(
ucd=={u(1,0,0)+u(-1,0,0)+u(0,1,0)+u(0,-1,0)+u(0,0,1)+u(0,0,-1)-6∗u(0,0,0)-
  Rdxq∗([u(1,0,0)+u(0,0,0)]∗[u(1,0,0)+u(0,0,0)]-[u(-1,0,0)+u(0,0,0)]∗[u(-1,0,0)+u(0,0,0)]+[u(0,1,0)+u(0,0,0)]∗[v(0,0,0)+v(1,0,0)]-[u(0,-1,0)+u(0,0,0)]∗[v(0,-1,0)+v(1,-1,0)]+
        [u(0,0,1)+u(0,0,0)]∗[w(0,0,0)+w(1,0,0)]-[u(0,0,-1)+u(0,0,0)]∗[w(0,0,-1)+w(1,0,-1)])}∗udxx-[p(1,0,0)-p(0,0,0)]/dx
vcd=={v(1,0,0)+v(-1,0,0)+v(0,1,0)+v(0,-1,0)+v(0,0,1)+v(0,0,-1)-6∗v(0,0,0)-
  Rdxq∗([v(1,0,0)+v(0,0,0)]∗[u(0,0,0)+u(0,1,0)]-[v(-1,0,0)+v(0,0,0)]∗[u(-1,0,0)+u(-1,1,0)]+[v(0,1,0)+v(0,0,0)]∗[v(0,1,0)+v(0,0,0)]-[v(0,-1,0)+v(0,0,0)]∗[v(0,-1,0)+v(0,0,0)]+
        [v(0,0,1)+v(0,0,0)]∗[w(0,0,0)+w(0,1,0)]-[v(0,0,-1)+v(0,0,0)]∗[w(0,0,-1)+w(0,1,-1)])}∗udxx-[p(0,1,0)-p(0,0,0)]/dx
wcd=={w(1,0,0)+w(-1,0,0)+w(0,1,0)+w(0,-1,0)+w(0,0,1)+w(0,0,-1)-6∗w(0,0,0)-
  Rdxq∗([w(1,0,0)+w(0,0,0)]∗[u(0,0,0)+u(0,0,1)]-[w(-1,0,0)+w(0,0,0)]∗[u(-1,0,0)+u(-1,0,1)]+[w(0,1,0)+w(0,0,0)]∗[v(0,0,0)+v(0,0,1)]-[w(0,-1,0)+w(0,0,0)]∗[v(0,-1,0)+v(0,-1,1)]+
        [w(0,0,0)+w(0,0,1)]∗[w(0,0,0)+w(0,0,1)]-[w(0,0,0)+w(0,0,-1)]∗[w(0,0,0)+w(0,0,-1)])}∗udxx-[p(0,0,1)-p(0,0,0)]/dx
!)
BOOLEAN FUNCTION InBody(REAL x,y,z)=NO

SUBROUTINE bc(VFIELD field^,tn; REAL val) WITH field
  IF first THEN u(0,∗,∗)=0
  IF last THEN u(HI-1..HI,∗,∗)=0
  u(∗,0,∗)=0; DO u(ix,HI,iz)=val FOR ALL ix,iz
  u(∗,∗,0)=0; u(∗,∗,HI)=0
  IF first THEN v(0,∗,∗)=0
  IF last THEN v(HI,∗,∗)=0
  v(∗,0,∗)=0; v(∗,HI-1..HI,∗)=0
  v(∗,∗,0)=0; v(∗,∗,HI)=0
  IF first THEN w(0,∗,∗)=0
  IF last THEN w(HI,∗,∗)=0
  w(∗,0,∗)=0; w(∗,HI,∗)=0
  w(∗,∗,0)=0; w(∗,∗,HI-1..HI)=0
  IF first THEN DO p(0,iy,iz)=p(1,iy,iz) FOR ALL iy,iz
  IF last THEN DO p(HI,iy,iz)=p(HI-1,iy,iz) FOR ALL iy,iz
  DO p(ix,0,iz)=p(ix,1,iz); p(ix,HI,iz)=p(ix,HI-1,iz) FOR ALL ix,iz
  DO p(ix,iy,0)=p(ix,iy,1); p(ix,iy,HI)=p(ix,iy,HI-1) FOR ALL ix,iy
END bc

SUBROUTINE cdresids(VFIELD field^,tn^)
  tn=0
  dx=domsize/field.HI2; udxx=1/dx/dx; Rdxq=Reynolds∗dx/4
  nsmp=MIN(nsmpmax,CEILING(field.HI2/8))
  PARALLEL LOOP FOR ismp=0 TO nsmp-1
  LOOP FOR ix=MAX(1,field.LO1+1) TO field.HI1-1 AND iy=1+ROUND[ismp/nsmp∗(field.HI2-1)] TO ROUND[(ismp+1)/nsmp∗(field.HI2-1)] AND iz=1 TO field.HI3-1
    WITH field(ix+∗,iy+∗,iz+∗)
    IF NOT last OR ix+1#field.HI1 THEN tn.u(ix,iy,iz)=ucd
    IF ix#field.LO1+1 AND iy+1#field.HI2 THEN tn.v(ix,iy,iz)=vcd
    IF ix#field.LO1+1 AND iz+1#field.HI3 THEN tn.w(ix,iy,iz)=wcd
    tn.p(ix,iy,iz)=cnt
  REPEAT; REPEAT
END cdresids

SUBROUTINE lincdresids(VFIELD base^,field^,tn^)
  dx=domsize/field.HI2; udxx=1/dx/dx; Rdxq=Reynolds∗dx/4
!(
  TYPEOF(tn) tn1,tn2
  f=1E-7
  cdresids(base,tn1)
  base=base+f∗field
  cdresids(base,tn2)
  base=base-f∗field
  tn=(tn2-tn1)/f-~
!)  
  nsmp=MIN(nsmpmax,CEILING(field.HI2/8))
  PARALLEL LOOP FOR ismp=0 TO nsmp-1
  LOOP FOR ix=MAX(1,field.LO1+1) TO field.HI1-1 AND iy=1+ROUND[ismp/nsmp∗(field.HI2-1)] TO ROUND[(ismp+1)/nsmp∗(field.HI2-1)] AND iz=1 TO field.HI3-1
    b=^base(ix+∗,iy+∗,iz+∗); f=^field(ix+∗,iy+∗,iz+∗)
    INLINE FUNCTION p(ix,iy,iz)=b.p(ix,iy,iz)+eps∗f.p(ix,iy,iz)
    INLINE FUNCTION u(ix,iy,iz)=b.u(ix,iy,iz)+eps∗f.u(ix,iy,iz)
    INLINE FUNCTION v(ix,iy,iz)=b.v(ix,iy,iz)+eps∗f.v(ix,iy,iz)
    INLINE FUNCTION w(ix,iy,iz)=b.w(ix,iy,iz)+eps∗f.w(ix,iy,iz)
    eps=0
    tn.p(ix,iy,iz)=D(cnt,eps)
    IF NOT last OR ix+1#field.HI1 THEN tn.u(ix,iy,iz)=D(ucd,eps)
    IF ix#field.LO1+1 AND iy+1#field.HI2 THEN tn.v(ix,iy,iz)=D(vcd,eps)
    IF ix#field.LO1+1 AND iz+1#field.HI3 THEN tn.w(ix,iy,iz)=D(wcd,eps)
  REPEAT; REPEAT
END lincdresids

SUBROUTINE upwresids(VFIELD base^,field^,tn^)
  dx=domsize/field.HI2; udxx=1/dx/dx; Rdxq=Reynolds∗dx/4
  LOOP FOR ix=MAX(1,field.LO1+1) TO field.HI1-1 AND iy=1 TO field.HI2-1 AND iz=1 TO field.HI3-1
    b=^base(ix+∗,iy+∗,iz+∗); WITH field(ix+∗,iy+∗,iz+∗)
    tn.p(ix,iy,iz)=cnt
    IF NOT last OR ix+1#field.HI1 THEN tn.u(ix,iy,iz)=uupw+shift∗u(0,0,0)
    IF ix#field.LO1+1 AND iy+1#field.HI2 THEN tn.v(ix,iy,iz)=vupw+shift∗v(0,0,0)
    IF ix#field.LO1+1 AND iz+1#field.HI3 THEN tn.w(ix,iy,iz)=wupw+shift∗w(0,0,0)
  REPEAT
END upwresids

SUBROUTINE smooth(VFIELD base^,field^,tn; INTEGER nx)
  dx=domsize/field.HI2; udxx=1/dx/dx; Rdxq=Reynolds∗dx/4
  relax=0.35; rel1=1
  nsmp=MIN(nsmpmax,CEILING(field.HI2/8))
  PARALLEL LOOP FOR ismp=0 TO nsmp-1
  LOOP FOR 3 TIMES AND parity=0 TO 1
    TYPEOF(field(∗,MAX(0,ROUND[ismp/nsmp∗(field.HI2-1)]-1)..ROUND[(ismp+1)/nsmp∗(field.HI2-1)]+1)) jac=field(∗,MAX(0,ROUND[ismp/nsmp∗(field.HI2-1)]-1)..ROUND[(ismp+1)/nsmp∗(field.HI2-1)]+1); SYNC(ismp,nsmp)
!    POINTER TO VFIELD jac=^field
    LOOP FOR ix=field.LO1+2 TO field.HI1-1 AND iy=1+ROUND[ismp/nsmp∗(field.HI2-1)] TO ROUND[(ismp+1)/nsmp∗(field.HI2-1)] AND iz=2-[(ix+iy+parity) BITAND 1] TO field.HI3-1 BY 2 EXCEPT InBody(ix∗dx,iy∗dx,iz∗dx)
!    LOOP FOR ix=field.LO1+2 TO field.HI1-1 AND iy=1+ismp TO field.HI2-1 BY nsmp AND iz=2-[(ix+iy+parity) BITAND 1] TO field.HI3-1 BY 2 EXCEPT InBody(ix∗dx,iy∗dx,iz∗dx)
     REAL dtu(-1..0),ut(-1..0),dtv(-1..0),vt(-1..0),dtw(-1..0),wt(-1..0)
     IF ix-1=0 OR InBody[(ix-1)∗dx,iy∗dx,iz∗dx]
     THEN ut(-1)=jac.u(ix-1,iy,iz); dtu(-1)=0 ELSE
       b=^base(ix-1+∗,iy+∗,iz+∗)
       WITH jac(ix-1+∗,iy+∗,iz+∗)
       dtu(-1)=1/c0uabs
       ut(-1)=u(0,0,0)+[uupw-tn.u(ix-1,iy,iz)]∗relax∗dtu(-1)
     END IF
     IF ix+1=nx OR InBody[(ix+1)∗dx,iy∗dx,iz∗dx]
     THEN ut(0)=jac.u(ix,iy,iz); dtu(0)=0 ELSE
       b=^base(ix+∗,iy+∗,iz+∗)
       WITH jac(ix+∗,iy+∗,iz+∗)
       dtu(0)=1/c0uabs
       ut(0)=u(0,0,0)+[uupw-tn.u(ix,iy,iz)]∗relax∗dtu(0)
     END IF
     IF iy-1=0 OR InBody[ix∗dx,(iy-1)∗dx,iz∗dx]
     THEN vt(-1)=jac.v(ix,iy-1,iz); dtv(-1)=0 ELSE
       b=^base(ix+∗,iy-1+∗,iz+∗)
       WITH jac(ix+∗,iy-1+∗,iz+∗)
       dtv(-1)=1/c0vabs
       vt(-1)=v(0,0,0)+[vupw-tn.v(ix,iy-1,iz)]∗relax∗dtv(-1)
     END IF
     IF iy+1=field.HI2 OR InBody[ix∗dx,(iy+1)∗dx,iz∗dx]
     THEN vt(0)=jac.v(ix,iy,iz); dtv(0)=0 ELSE
       b=^base(ix+∗,iy+∗,iz+∗)
       WITH jac(ix+∗,iy+∗,iz+∗)
       dtv(0)=1/c0vabs
       vt(0)=v(0,0,0)+[vupw-tn.v(ix,iy,iz)]∗relax∗dtv(0)
     END IF
     IF iz-1=0 OR InBody[ix∗dx,iy∗dx,(iz-1)∗dx]
     THEN wt(-1)=jac.w(ix,iy,iz-1); dtw(-1)=0 ELSE
       b=^base(ix+∗,iy+∗,iz-1+∗)
       WITH jac(ix+∗,iy+∗,iz-1+∗)
       dtw(-1)=1/c0wabs
       wt(-1)=w(0,0,0)+[wupw-tn.w(ix,iy,iz-1)]∗relax∗dtw(-1)
     END IF
     IF iz+1=field.HI3 OR InBody[ix∗dx,iy∗dx,(iz+1)∗dx]
     THEN wt(0)=jac.w(ix,iy,iz); dtw(0)=0 ELSE
       b=^base(ix+∗,iy+∗,iz+∗)
       WITH jac(ix+∗,iy+∗,iz+∗)
       dtw(0)=1/c0wabs
       wt(0)=w(0,0,0)+[wupw-tn.w(ix,iy,iz)]∗relax∗dtw(0)
     END IF
     WITH field(ix+∗,iy+∗,iz+∗)
     cont=[ut(-1)-ut(0)+vt(-1)-vt(0)+wt(-1)-wt(0)-dx∗tn.p(ix,iy,iz)]/[dtu(-1)+dtu(0)+dtv(-1)+dtv(0)+dtw(-1)+dtw(0)]
     IF ix<nx-1 THEN u(0,0,0)=(1-rel1)∗~+rel1∗[ut(0)+dtu(0)∗cont]
     IF ix>1 THEN u(-1,0,0)=(1-rel1)∗~+rel1∗[ut(-1)-dtu(-1)∗cont]
     IF iy<field.HI2-1 THEN v(0,0,0)=(1-rel1)∗~+rel1∗[vt(0)+dtv(0)∗cont]
     IF iy>field.LO2+1 THEN v(0,-1,0)=(1-rel1)∗~+rel1∗[vt(-1)-dtv(-1)∗cont]
     IF iz<field.HI3-1 THEN w(0,0,0)=(1-rel1)∗~+rel1∗[wt(0)+dtw(0)∗cont]
     IF iz>field.LO3+1 THEN w(0,0,-1)=(1-rel1)∗~+rel1∗[wt(-1)-dtw(-1)∗cont]
     p(0,0,0)=~+cont∗dx
    REPEAT
    SYNC(ismp,nsmp)
    IF ismp=nsmp-1 THEN updatenbrs(field,parity)
    SYNC(ismp,nsmp)
  REPEAT
  REPEAT
END smooth

SUBROUTINE oversmooth(VFIELD base^,field^,tn; INTEGER nx)
DO
  f0=field
  smooth(base,field,tn,nx)
  f1=field
  smooth(base,field,tn,nx)
  field=field+0.5∗f0-0.5∗f1
FOR 2 TIMES
END oversmooth

#define uxav0(iy,iz) (0.75∗u(0,iy,iz)+0.25∗u(-1,iy,iz))
#define uxav1(iy,iz) (0.25∗u(0,iy,iz)+0.75∗u(-1,iy,iz))
#define vyav0(ix,iz) (0.75∗v(ix,0,iz)+0.25∗v(ix,-1,iz))
#define vyav1(ix,iz) (0.25∗v(ix,0,iz)+0.75∗v(ix,-1,iz))
#define wzav0(ix,iy) (0.75∗w(ix,iy,0)+0.25∗w(ix,iy,-1))
#define wzav1(ix,iy) (0.25∗w(ix,iy,0)+0.75∗w(ix,iy,-1))
SUBROUTINE interp(VFIELD c,f^) WITH c
  f.u(0,0,0)=~+uxav0(0,0)
  f.u(-1,0,0)=~+uxav1(0,0)
  f.u(0,-1,0)=~+0.5∗[uxav0(0,0)+uxav0(-1,0)]
  f.u(-1,-1,0)=~+0.5∗[uxav1(0,0)+uxav1(-1,0)]
  f.u(0,0,-1)=~+0.5∗[uxav0(0,0)+uxav0(0,-1)]
  f.u(-1,0,-1)=~+0.5∗[uxav1(0,0)+uxav1(0,-1)]
  f.u(0,-1,-1)=~+0.25∗[uxav0(0,0)+uxav0(0,-1)+uxav0(-1,0)+uxav0(-1,-1)]
  f.u(-1,-1,-1)=~+0.25∗[uxav1(0,0)+uxav1(0,-1)+uxav1(-1,0)+uxav1(-1,-1)]
  f.v(0,0,0)=~+vyav0(0,0)
  f.v(0,-1,0)=~+vyav1(0,0)
  f.v(-1,0,0)=~+0.5∗[vyav0(0,0)+vyav0(-1,0)]
  f.v(-1,-1,0)=~+0.5∗[vyav1(0,0)+vyav1(-1,0)]
  f.v(0,0,-1)=~+0.5∗[vyav0(0,0)+vyav0(0,-1)]
  f.v(0,-1,-1)=~+0.5∗[vyav1(0,0)+vyav1(0,-1)]
  f.v(-1,0,-1)=~+0.25∗[vyav0(0,0)+vyav0(0,-1)+vyav0(-1,0)+vyav0(-1,-1)]
  f.v(-1,-1,-1)=~+0.25∗[vyav1(0,0)+vyav1(0,-1)+vyav1(-1,0)+vyav1(-1,-1)]
  f.w(0,0,0)=~+wzav0(0,0)
  f.w(0,0,-1)=~+wzav1(0,0)
  f.w(-1,0,0)=~+0.5∗[wzav0(0,0)+wzav0(-1,0)]
  f.w(-1,0,-1)=~+0.5∗[wzav1(0,0)+wzav1(-1,0)]
  f.w(0,-1,0)=~+0.5∗[wzav0(0,0)+wzav0(0,-1)]
  f.w(0,-1,-1)=~+0.5∗[wzav1(0,0)+wzav1(0,-1)]
  f.w(-1,-1,0)=~+0.25∗[wzav0(0,0)+wzav0(0,-1)+wzav0(-1,0)+wzav0(-1,-1)]
  f.w(-1,-1,-1)=~+0.25∗[wzav1(0,0)+wzav1(0,-1)+wzav1(-1,0)+wzav1(-1,-1)]
  f.p(0,0,0)=~+p(0,0,0)
  f.p(-1,0,0)=~+0.5∗[p(0,0,0)+p(-1,0,0)]
  f.p(0,-1,0)=~+0.5∗[p(0,0,0)+p(0,-1,0)]
  f.p(-1,-1,0)=~+0.25∗[p(0,0,0)+p(-1,0,0)+p(0,-1,0)+p(-1,-1,0)]
  f.p(0,0,-1)=~+0.5∗[p(0,0,0)+p(0,0,-1)]
  f.p(-1,0,-1)=~+0.25∗[p(0,0,0)+p(-1,0,0)+p(0,0,-1)+p(-1,0,-1)]
  f.p(0,-1,-1)=~+0.25∗[p(0,0,0)+p(0,-1,0)+p(0,0,-1)+p(0,-1,-1)]
  f.p(-1,-1,-1)=~+0.125∗[p(0,0,0)+p(-1,0,0)+p(0,-1,0)+p(-1,-1,0)+
                         p(0,0,-1)+p(-1,0,-1)+p(0,-1,-1)+p(-1,-1,-1)]
END interp

SUBROUTINE restrict(VFIELD f,c^) WITH f
  c.u(0,0,0)=~+0.125∗[uxav0(0,0)+0.5∗uxav0(-1,0)+0.5∗uxav0(0,-1)+0.25∗uxav0(-1,-1)]
  c.u(-1,0,0)=~+0.125∗[uxav1(0,0)+0.5∗uxav1(-1,0)+0.5∗uxav1(0,-1)+0.25∗uxav1(-1,-1)]
  c.u(0,-1,0)=~+0.125∗[0.5∗uxav0(-1,0)+0.25∗uxav0(-1,-1)]
  c.u(-1,-1,0)=~+0.125∗[0.5∗uxav1(-1,0)+0.25∗uxav1(-1,-1)]
  c.u(0,0,-1)=~+0.125∗[0.5∗uxav0(0,-1)+0.25∗uxav0(-1,-1)]
  c.u(-1,0,-1)=~+0.125∗[0.5∗uxav1(0,-1)+0.25∗uxav1(-1,-1)]
  c.u(0,-1,-1)=~+0.125∗0.25∗uxav0(-1,-1)
  c.u(-1,-1,-1)=~+0.125∗0.25∗uxav1(-1,-1)
  c.v(0,0,0)=~+0.125∗[vyav0(0,0)+0.5∗vyav0(-1,0)+0.5∗vyav0(0,-1)+0.25∗vyav0(-1,-1)]
  c.v(0,-1,0)=~+0.125∗[vyav1(0,0)+0.5∗vyav1(-1,0)+0.5∗vyav1(0,-1)+0.25∗vyav1(-1,-1)]
  c.v(-1,0,0)=~+0.125∗[0.5∗vyav0(-1,0)+0.25∗vyav0(-1,-1)]
  c.v(-1,-1,0)=~+0.125∗[0.5∗vyav1(-1,0)+0.25∗vyav1(-1,-1)]
  c.v(0,0,-1)=~+0.125∗[0.5∗vyav0(0,-1)+0.25∗vyav0(-1,-1)]
  c.v(0,-1,-1)=~+0.125∗[0.5∗vyav1(0,-1)+0.25∗vyav1(-1,-1)]
  c.v(-1,0,-1)=~+0.125∗0.25∗vyav0(-1,-1)
  c.v(-1,-1,-1)=~+0.125∗0.25∗vyav1(-1,-1)
  c.w(0,0,0)=~+0.125∗[wzav0(0,0)+0.5∗wzav0(-1,0)+0.5∗wzav0(0,-1)+0.25∗wzav0(-1,-1)]
  c.w(0,0,-1)=~+0.125∗[wzav1(0,0)+0.5∗wzav1(-1,0)+0.5∗wzav1(0,-1)+0.25∗wzav1(-1,-1)]
  c.w(-1,0,0)=~+0.125∗[0.5∗wzav0(-1,0)+0.25∗wzav0(-1,-1)]
  c.w(-1,0,-1)=~+0.125∗[0.5∗wzav1(-1,0)+0.25∗wzav1(-1,-1)]
  c.w(0,-1,0)=~+0.125∗[0.5∗wzav0(0,-1)+0.25∗wzav0(-1,-1)]
  c.w(0,-1,-1)=~+0.125∗[0.5∗wzav1(0,-1)+0.25∗wzav1(-1,-1)]
  c.w(-1,-1,0)=~+0.125∗0.25∗wzav0(-1,-1)
  c.w(-1,-1,-1)=~+0.125∗0.25∗wzav1(-1,-1)
  c.p(0,0,0)=~+0.125∗[p(0,0,0)+0.5∗p(-1,0,0)+0.5∗p(0,-1,0)+0.25∗p(-1,-1,0)+
                         0.5∗p(0,0,-1)+0.25∗p(-1,0,-1)+0.25∗p(0,-1,-1)+0.125∗p(-1,-1,-1)]
  c.p(-1,0,0)=~+0.125∗[0.5∗p(-1,0,0)+0.25∗p(-1,-1,0)+0.25∗p(-1,0,-1)+0.125∗p(-1,-1,-1)]
  c.p(0,-1,0)=~+0.125∗[0.5∗p(0,-1,0)+0.25∗p(-1,-1,0)+0.25∗p(0,-1,-1)+0.125∗p(-1,-1,-1)]
  c.p(-1,-1,0)=~+0.125∗[0.25∗p(-1,-1,0)+0.125∗p(-1,-1,-1)]
  c.p(0,0,-1)=~+0.125∗[0.5∗p(0,0,-1)+0.25∗p(-1,0,-1)+0.25∗p(0,-1,-1)+0.125∗p(-1,-1,-1)]
  c.p(-1,0,-1)=~+0.125∗[0.25∗p(-1,0,-1)+0.125∗p(-1,-1,-1)]
  c.p(0,-1,-1)=~+0.125∗[0.25∗p(0,-1,-1)+0.125∗p(-1,-1,-1)]
  c.p(-1,-1,-1)=~+0.125∗0.125∗p(-1,-1,-1)
END restrict

SUBROUTINE restrictloop(VFIELD fvar^,cvar^)
 nsmp=MIN(nsmpmax,CEILING(fvar.HI2/8))
 PARALLEL LOOP FOR ismp=0 TO nsmp-1
  LOOP FOR ixc=MAX[cvar.LO1+1,fvar.LO1 DIV 2 +1] TO MIN[cvar.HI1,fvar.HI1 DIV 2] AND iyc=1+ROUND[ismp/nsmp∗cvar.HI2] TO ROUND[(ismp+1)/nsmp∗cvar.HI2] AND izc=1 TO cvar.HI3
    restrict(fvar(2∗ixc+(-1..0),2∗iyc+(-1..0),2∗izc+(-1..0)),cvar(ixc+(-1..0),iyc+(-1..0),izc+(-1..0)))
  REPEAT
  IF fvar.HI1<2∗cvar.HI1 THEN LOOP FOR iyc=1+ROUND[ismp/nsmp∗cvar.HI2] TO ROUND[(ismp+1)/nsmp∗cvar.HI2] AND izc=cvar.LO3+1 TO cvar.HI3
    VARS fext(-1..0,-1..0,-1..0)=0; fext(-1)=fvar(HI,2∗iyc+(-1..0),2∗izc+(-1..0))
    restrict(fext,cvar(HI+(-1..0),iyc+(-1..0),izc+(-1..0)))
  REPEAT
 REPEAT
END restrictloop

SUBROUTINE mg(VFIELD fbase^,fvar^,ftn^; INTEGER nxf)
IF nxf MOD 2 = 0 AND fvar.HI2 MOD 2 = 0 AND fvar.HI3 MOD 2 = 0 AND
   nxf > 4 AND fvar.HI2 > 4 AND fvar.HI3 > 4 THEN
  smooth(fbase,fvar,ftn,nxf)
  bc(fvar,ftn,0)
  nxc=nxf DIV 2
  SHARED ARRAY[(fvar.LO1+1) DIV 2 - 1..(fvar.HI1+1) DIV 2, (fvar.LO2-1) DIV 2..(fvar.HI2+1) DIV 2, (fvar.LO3-1) DIV 2..(fvar.HI3+1) DIV 2] OF VARS cbase=0, cvar=0, ctn=0
  SHARED ARRAY(fvar.LO1..fvar.HI1,fvar.LO2..fvar.HI2,fvar.LO3..fvar.HI3) OF VARS frsd=0
  restrictloop(fbase,cbase)
  bc(cbase,ctn,1)
  updatenbrs(cbase,2+(fvar.LO1 BITAND 1)∗XPDIR+(fvar.HI1 BITAND 1)∗XNDIR)
  nsmp=MIN(nsmpmax,CEILING(fvar.HI2/8))
  PARALLEL LOOP FOR ismp=0 TO nsmp-1
  LOOP FOR ixf=fvar.LO1+1 TO fvar.HI1-1 AND iyf=1+ROUND[ismp/nsmp∗(fvar.HI2-1)] TO ROUND[(ismp+1)/nsmp∗(fvar.HI2-1)] AND izf=fvar.LO3+1 TO fvar.HI3-1
    dx=domsize/fvar.HI2; udxx=1/dx/dx; Rdxq=Reynolds∗dx/4
    b=^fbase(ixf+(-1..1),iyf+(-1..1),izf+(-1..1))
    WITH fvar(ixf+(-1..1),iyf+(-1..1),izf+(-1..1))
    IF ixf+1=nxf OR InBody[(ixf+1)∗dx,iyf∗dx,izf∗dx] THEN frsd(ixf,iyf,izf).u=0 ELSE
      frsd(ixf,iyf,izf).u=ftn.u(ixf,iyf,izf)-uupw
    END IF
    IF iyf+1=fvar.HI2 OR InBody[ixf∗dx,(iyf+1)∗dx,izf∗dx] THEN frsd(ixf,iyf,izf).v=0 ELSE
      frsd(ixf,iyf,izf).v=ftn.v(ixf,iyf,izf)-vupw
    END IF
    IF izf+1=fvar.HI3 OR InBody[ixf∗dx,iyf∗dx,(izf+1)∗dx] THEN frsd(ixf,iyf,izf).w=0 ELSE
      frsd(ixf,iyf,izf).w=ftn.w(ixf,iyf,izf)-wupw
    END IF
    frsd(ixf,iyf,izf).p=ftn.p(ixf,iyf,izf)-cnt
  REPEAT; REPEAT
  bc(frsd,frsd,0)
  updatenbrs(frsd,2+(fvar.LO1 BITAND 1)∗XPDIR+(fvar.HI1 BITAND 1)∗XNDIR)
  restrictloop(frsd,ctn)
  updatenbrs(ctn,2+(fvar.LO1 BITAND 1)∗XPDIR+(fvar.HI1 BITAND 1)∗XNDIR)
!(
  REAL mcorr=0; INTEGER count=0
  LOOP FOR ixc=LO+1 TO HI-1 AND iyc=LO+1 TO HI-1 AND izc=LO+1 TO HI-1
    EXCEPT InBody(ixc∗domsize/cvar.HI2,iyc∗domsize/cvar.HI2,izc∗domsize/cvar.HI2)
    mcorr=~+ctn(ixc,iyc,izc).p
    count=~+1
  REPEAT
  mcorr=mcorr/count
  DO ctn(ixc,iyc).p=~-mcorr FOR ixc=LO+1 TO HI-1 AND iyc=LO+1 TO HI-1 AND izc=LO+1 TO HI-1
!)
  DO mg(cbase,cvar,ctn,nxc) FOR 2 TIMES
  bc(cvar,ctn,0)
!  nsmp=MIN(nsmpmax,CEILING(fvar.HI2/8))
  PARALLEL LOOP FOR ismp=0 TO nsmp-1
  LOOP FOR ixc=MAX[cvar.LO1+1,fvar.LO1 DIV 2 +1] TO MIN[cvar.HI1,fvar.HI1 DIV 2] AND iyc=1+ROUND[ismp/nsmp∗cvar.HI2] TO ROUND[(ismp+1)/nsmp∗cvar.HI2] AND izc=cvar.LO3+1 TO cvar.HI3
    interp(cvar(ixc+(-1..0),iyc+(-1..0),izc+(-1..0)),fvar(2∗ixc+(-1..0),2∗iyc+(-1..0),2∗izc+(-1..0)))
  REPEAT; REPEAT
  bc(fvar,ftn,0)
  updatenbrs(fvar,2+(fvar.LO1 BITAND 1)∗XPDIR+(fvar.HI1 BITAND 1)∗XNDIR)
  smooth(fbase,fvar,ftn,nxf)
ELSE DO smooth(fbase,fvar,ftn,nxf) FOR 20 TIMES
END mg