! rbmatmod -- Copyright 2000 Paolo Luchini
! http://CPLcode.net/Applications/Numerical/rbmatmod/
!
! parallel implementation of banded LU decomposition
! performs the solution of a banded linear system by the same algorithm used
! in rbmat.cpl. Each node in sequence solves a portion of the system and passes
! boundary data to the next node.

!( Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. !)

SUBROUTINE LUdecompStep(POINTER TO ARRAY(∗,-2..2) OF REAL A)
IF last THEN A(HI)=0; A(HI-1)=0; A(HI-2,1..2)=0; A(HI-3,2)=0 ELSE
END IF
REAL piv
LOOP FOR i=A.HI1-A.HI2 DOWN TO A.LO1
LOOP FOR k=A.HI2 DOWN TO 1
POINTER INTO A(i,∗+k),A(i+k,∗) j=0
piv=A(i,j+k)
DO DEC j; A(i,j+k)=~-piv∗A(i+k,j) FOR -A.LO2 TIMES
REPEAT LOOP
piv=1/A(i,0); A(i,0)=piv
LOOP FOR j INTO A(i,A.LO2..-1): A(i,j)=~∗piv
REPEAT LOOP
IF first THEN A(LO,-2..-1)=0; A(LO+1,-2)=0 ELSE
WRITE BINARY TO prev A(LO),A(LO+1)
END IF
END LUdecompStep

SUBROUTINE LeftLUDivStep1(POINTER TO ARRAY(∗) OF REAL x; ARRAY(∗,-2..2) OF REAL A; ARRAY(∗) OF REAL t)
IF NOT last THEN READ BINARY FROM next x(HI-1),x(HI)
LOOP FOR i=A.HI1-A.HI2 DOWN TO A.LO1
POINTER INTO A(i,∗),x(i+∗) j=A.HI2
REAL sum=t(i)
LOOP FOR A.HI2 TIMES: sum=~-A(i,j)∗x(i+j); DEC j
x(i+j)=sum∗A(i,j)
REPEAT LOOP
IF NOT first THEN WRITE BINARY TO prev x(LO+2),x(LO+3)
END LeftLUDivStep1

INLINE SUBROUTINE FlushStep1 = IF NOT first THEN FLUSH prev

SUBROUTINE LeftLUDivStep2(POINTER TO ARRAY(∗) OF REAL x; ARRAY(∗,-2..2) OF REAL A)
IF NOT first THEN READ BINARY FROM prev x(LO),x(LO+1)
LOOP FOR i=A.LO1 TO A.HI1
POINTER INTO A(i,∗),x(i+∗) j=A.LO2
REAL sum=0
LOOP FOR -A.LO2 TIMES: sum=~+A(i,j)∗x(i+j); INC j
x(i+j)=~-sum
REPEAT LOOP
IF NOT last THEN WRITE BINARY TO next x(HI-3),x(HI-2)
END LeftLUDivStep2

INLINE SUBROUTINE FlushStep2 = IF NOT last THEN FLUSH next

SUBROUTINE RightLUDivStep1(ARRAY(∗) OF REAL x^,t; ARRAY(∗,-2..2) OF REAL A)
IF NOT last THEN READ BINARY FROM next x(HI-1),x(HI)
LOOP FOR i=A.HI1-A.HI2 DOWN TO A.LO1
REAL sum=t(i)
DO sum=~-A(i-j,j)∗x(i-j) FOR j=A.LO2 TO -1
x(i)=sum
REPEAT LOOP
IF NOT first THEN WRITE BINARY TO prev x(LO+2),x(LO+3)
END RightLUDivStep1

SUBROUTINE RightLUDivStep2(ARRAY(∗) OF REAL x^; ARRAY(∗,-2..2) OF REAL A)
IF NOT first THEN READ BINARY FROM prev x(LO),x(LO+1),x(LO+2),x(LO+3)
LOOP FOR i=A.LO1 TO A.HI1-A.HI2
POINTER INTO A(i,∗),x(i+∗) j=0
p=x(i+j)∗A(i,0)
x(i+j)=p
DO INC j; x(i+j)=~-A(i,j)∗p FOR A.HI2 TIMES
REPEAT LOOP
IF NOT last THEN
WRITE BINARY TO next x(HI-3),x(HI-2),x(HI-1),x(HI)
x(HI-1)=~∗A(HI-1,0)
x(HI)=(~-A(HI-1,1)∗x(HI-1))∗A(HI,0)
END IF
END RightLUDivStep2