*-----------------------------------------------------------------------
* mullin.f - multiple line-segments approximation of a data series
* by Andy Allinger, 2012-2016, released to the public domain
* This program may be used by any person for any purpose.
*-----------------------------------------------------------------------
      
*-----------------------------------------------------------------------
*   find slope from least-square accumulations
*-----------------------------------------------------------------------
      SUBROUTINE CALC_SLOPE (i, k, n, CW, CWX, CWY, CWX2, CWXY, slope)     
       IMPLICIT NONE
       INTEGER i, k, n
       REAL slope
       DOUBLE PRECISION CW, CWX, CWY, CWX2, CWXY
       DIMENSION CW(0:n), CWX(0:n), CWY(0:n), CWX2(0:n),
     &  CWXY(0:n), slope(n)

*          local variables
       INTEGER h
       DOUBLE PRECISION det, sw, swx, swy, swx2, swxy
       LOGICAL DALMEQ
       
*          begin 
       h = i - 1
       sw = CW(k) - CW(h)              ! differences of accumulated sums
       swx = CWX(k) - CWX(h)
       swy = CWY(k) - CWY(h)
       swx2 = CWX2(k) - CWX2(h)
       swxy = CWXY(k) - CWXY(h)
       det = swx2 * sw - swx * swx
       IF (DALMEQ(det, 0D0)) THEN
         PRINT *, 'at hik=', h,i,k, 'Warning: zero determinant'
       ELSE
         slope(i) = SNGL((sw * swxy - swx * swy) / det)
         IF (ISNAN(slope(i)))
     &     PRINT *, 'slope: nan encountered!! sw: ', sw, ' swx: ', swx,
     &      'swy: ', swy, ' swxy: ', swxy, ' det: ', det
       END IF
       RETURN
      END  ! of CALC_SLOPE
      

*-----------------------------------------------------------------------
*     probability slopes are distinct, by a 1-tailed t-test
*-----------------------------------------------------------------------
      SUBROUTINE CALC_P (i, j, k, n, CM, CM2, slope, p)
       IMPLICIT NONE
       INTEGER i, j, k, n
       REAL slope, p
       DOUBLE PRECISION CM, CM2
       DIMENSION CM(n), CM2(n), slope(n)
       
*                local variables
       INTEGER n1, n2, nu       
       REAL CDFT,                ! function from STARPAC
     &      EPS, rmachine,       ! macine precision
     &      t, var1, var2        ! t-statistic, variances
       DOUBLE PRECISION sm, sm2
       LOGICAL first 
       SAVE first, EPS
       DATA first / .TRUE. /
       
*              begin
       IF (first) THEN
         first = .FALSE.
         EPS = rmachine(1)
       END IF       
       
*                  n1, n2 @ variances of proposed segments
       n1 = j - i
       sm = CM(j) - CM(i)
       sm2 = CM2(j) - CM2(i)
       var1 = SNGL(sm2 - sm **2 / DBLE(n1))   ! n[1] @ s[1]^2
       n2 = k - j
       sm = CM(k) - CM(j)
       sm2 = CM2(k) - CM2(j)
       var2 = SNGL(sm2 - sm **2 / DBLE(n2))   ! n[2] @ s[2]^2
           
*             probability segments are distinct
       IF (var1 + var2 > 0.) THEN
         nu = n1 + n2 - 2
         IF (nu < 1) PRINT *, 'calc_p: nu ', nu, ' with ijk ', i, j, k
         t = ABS(slope(j) - slope(i))  
     &        / SQRT( ((var1 + var2) / FLOAT(nu)) 
     &                      * (1. / FLOAT(n1) + 1. / FLOAT(n2)) )
         p = CDFT(t, nu)       ! (im)probability of segments being same
       ELSE
         IF (ABS(slope(j) - slope(i)) > EPS) THEN
           p = 1.
         ELSE
           p = 0.
         END IF
       END IF
       RETURN
      END  ! of CALC_P


*-----------------------------------------------------------------------
* Departition intervals of the data with similar slopes.  
* Inspired by the HarmAn algorithm of Pardo & Birmingham
*                               I/O LIST
*_Name_______Type__________In/Out_____Description_______________________
*  W[n]       REAL          in         importance weights of data points
*  X[n]       REAL          in         data independent variable
*  Y[n]       REAL          in         data dependent variable
*  scis[n]    REAL          out        X coordinate of segment beginnings
*  slope[n]   REAL          out        slope of line segments
*  n          INTEGER       in         number of data points
*  v          INTEGER       out        number of approximating segments
*  ierr       INTEGER       out        error code 0=no errors
*-----------------------------------------------------------------------
      SUBROUTINE mullin (W, X, Y, scis, slope, n, v, ierr)
      
       IMPLICIT NONE
       INTEGER n, v, ierr 
       REAL W, X, Y                     ! input
       REAL scis, slope                 ! output
       DIMENSION W(n), X(n), Y(n), scis(n), slope(n)
       
*                 constants
       REAL pcrit                       ! cutoff 1-tail t-test
       PARAMETER (pcrit = 0.999999) 
       
*                 local variables
       INTEGER g, h, i, j, k, vv              ! counters

       REAL
     &  EPS, rmachine,                 ! machine precision
     &  p_h, p_i,                      ! p-values merging prev or next
     &  pmin,                          ! least p-value
     &  r,                             ! a ratio
     &  tau,                           ! error tolerance in HFTI
     &  wnew, xold, xnew, yold, ynew   ! working values of w, x, y

*           least-squares sums and accumulations
       DOUBLE PRECISION
     &  m,                                ! slope
     &  wx                                ! temp for least squares sums

       LOGICAL done, neg
     
*              allocatable arrays
       INTEGER, DIMENSION (:), ALLOCATABLE :: prev, next, IP
       REAL, DIMENSION (:), ALLOCATABLE :: cept, peach
       DOUBLE PRECISION, DIMENSION (:), ALLOCATABLE ::
     &      CM, CM2, CW, CWX, CWY, CWX2, CWXY
       REAL, DIMENSION (:,:), ALLOCATABLE :: A
     
*-----------------------------------------------------------------------
*           begin
*-----------------------------------------------------------------------
       ierr = 0
       EPS = rmachine(1)
       
       IF (n < 2) THEN                    ! not enough data
         ierr = 3
         RETURN
       END IF
 
*-----------------------------------------------------------------------
*          initial values
*-----------------------------------------------------------------------
       ALLOCATE (prev(n), next(n), peach(n), cept(n), CM(n), CM2(n), 
     &  CW(0:n), CWX(0:n), CWY(0:n), CWX2(0:n), CWXY(0:n), STAT=ierr)
       IF (ierr .NE. 0) RETURN
         
       CW(0) = 0.; CWX(0) = 0.; CWY(0) = 0.; CWX2(0) = 0.; CWXY(0) = 0.
       xnew = -1E9

       DO j = 1, n
         xold = xnew  
         wnew = W(j) 
         xnew = X(j)        
         ynew = Y(j)

*           check that input X is increasing
         IF (xnew - xold < EPS) THEN
           ierr = 1
           PRINT *, 'x not distinct or not sorted'
           RETURN
         END IF
         
*           accumulate weighted least squares sums         
         CW(j) = CW(j-1) + wnew
         wx = DPROD(wnew, xnew)
         CWX(j) = CWX(j-1) + wx
         CWY(j) = CWY(j-1) + DPROD(wnew, ynew)
         CWX2(j) = CWX2(j-1) + wx * xnew
         CWXY(j) = CWXY(j-1) + wx * ynew
         
          prev(j) = j - 1
          next(j) = j + 1
       END DO

*           accumulate standard deviation sums 
       CM(1) = 0. ; CM2(1) = 0.
       xnew = X(1) ; ynew = Y(1)
       DO j = 2, n
         xold = xnew ; xnew = X(j)
         yold = ynew ; ynew = Y(j)
         m = (ynew - yold) / (xnew - xold)
         CM(j) = CM(j-1) + m
         CM2(j) = CM2(j-1) + m **2
       END DO

*-----------------------------------------------------------------------
*        departition the line segments into longer, least-squares lines
*-----------------------------------------------------------------------
       v = n - 1    ! all partitions

*          merge pairs to reach nonzero variance
       DO j = 2, n-1, 2
         i = prev(j)
         k = next(j)
         CALL CALC_SLOPE (i, k, n, CW, CWX, CWY, CWX2, CWXY, slope)
         IF (ISNAN(slope(I))) PRINT *, 'mullin: Nan at begin phase!!'
         prev(k) = i
         next(i) = k
         v = v - 1       ! one fewer segment
       END DO  ! next j
       
       IF (BTEST(n-1,0)) THEN           ! required for even n
         i = n-1
         j = n
         CALL CALC_SLOPE (i, j, n, CW, CWX, CWY, CWX2, CWXY, slope)
         IF (ISNAN(slope(i))) PRINT *, 'mullin: NaN on even N!!!'
       END IF
       
*----------------------------------------------------------------------- 
*           evaluate other candidates to merge
*----------------------------------------------------------------------- 
      DO j = 3, n-1, 2
         i = prev(j)
         k = next(j)
         CALL CALC_P (i, j, k, n, CM, CM2, slope, peach(j))
       END DO
       
*----------------------------------------------------------------------- 
*         merge segments one at a time
*-----------------------------------------------------------------------
       done = .FALSE.
       DO WHILE (.NOT. (done .OR. v .EQ. 1))
         done = .TRUE.
         pmin = pcrit
         g = 0
         DO j = 2, n-1             ! find least p
           i = prev(j)
           IF (next(i) .NE. j) CYCLE       ! node already deleted
           
           IF (peach(j) < pmin) THEN
             pmin = peach(j)
             g = j
           END IF           
         END DO  ! next j
         
         IF (pmin < pcrit) THEN     ! merge slope
           done = .FALSE.
           i = prev(g)
           k = next(g)
           CALL CALC_SLOPE (i, k, n, CW, CWX, CWY, CWX2, CWXY, slope)
           IF (ISNAN(slope(i))) PRINT *, 'mullin:  NaN at merge!!!'
           prev(k) = i
           next(i) = k
           v = v - 1       ! one fewer segment           

*             consider merging new segment with previous
           IF (i > 1)
     &         CALL CALC_P (prev(i), i, k, n, CM, CM2, slope, peach(i))
           
*             consider merging new segment with next
           IF (k < n)
     &         CALL CALC_P (i, k, next(k), n, CM, CM2, slope, peach(k))
           
         END IF  ! p value less than threshhold?
       END DO  ! while merges still to make
       
*           arrays with bound depending on v
       ALLOCATE (IP(v+1), A(n,v+1), STAT=ierr)
       IF (ierr .NE. 0) RETURN
       
*-----------------------------------------------------------------------
  50   CONTINUE  ! least squares problem for piecewise line
*-----------------------------------------------------------------------
       DO j = 1, v+1
         DO i = 1, n
           A(i,j) = 0.
         END DO
       END DO
       
*                 the coefficient matrix
       j = 1
       h = 1
       k = next(h)
       DO i = 1, n
         IF (i .EQ. k .AND. i < n) THEN
           j = j + 1
           h = k
           k = next(k)
         END IF
         r = (X(i) - X(h)) / (X(k) - X(h))
         A(i,j) = 1. - r
         A(i,j+1) = r
       END DO
       
*                right-hand side
       DO i = 1, n
         cept(i) = Y(i)
       END DO
              
*    norm of A:  maximum of all column sums of magnitude
       tau = 0.
       DO j = 1, v+1
         r = 0.
         DO i = 1, n
           r = r + ABS(A(i,j))
         END DO
         tau = MAX(tau, r)
       END DO
       tau = tau * EPS
       
*           solve least-squares problem
       CALL HFTI (A, n, n, v+1, cept, n, 1, tau, k, 
     &               peach, scis, slope, IP)
       IF (k .EQ. 0) THEN
         PRINT *, 'hfti:  zero rank!'
         ierr = 4
         RETURN
       END IF
     
*               endpoint interpolation for slope
       i = 0
       j = 1
       DO WHILE (j .LE. n)
         i = i + 1
         scis(i) = X(j)
         j = next(j)
       END DO
       
       xnew = scis(1) ; ynew = cept(1)
       DO j = 1, v
         xold = xnew ; xnew = scis(j+1)
         yold = ynew ; ynew = cept(j+1)
         slope(j) = (ynew - yold) / (xnew - xold)
         IF (ISNAN(slope(j))) PRINT *, 'mullin: NaN at interp!!!'
       END DO
       
*-----------------------------------------------------------------------
*        require increasing slope in each segment
*-----------------------------------------------------------------------
       neg = .FALSE.
       vv = v
       h = 1
       DO k = 1, v
         IF (slope(k) .LE. 0.) THEN
           neg = .TRUE.
           PRINT *, 'negative slope at node ', k, ' of ', v
           IF (vv .EQ. 1) THEN 
             PRINT *, 'mullin:  Negative slope, unrecoverable error'
             ierr = 2
             RETURN
           ELSE IF (k .EQ. 1) THEN    ! merge with next segment
             j = next(next(1))
             prev(j) = 1
             next(1) = j
           ELSE IF (next(h) .EQ. n) THEN   ! merge with previous segment
             g = prev(h)             
             prev(n) = g
             next(g) = n
           ELSE   ! decide which segment to merge
             g = prev(h)
             i = next(h)
             j = next(i)
             CALL CALC_P (g, h, j, n, CM, CM2, slope, p_h)
             CALL CALC_P (g, i, j, n, CM, CM2, slope, p_i)
             
             IF (p_h < p_i) THEN ! merge with previous
               prev(i) = g
               next(g) = i
             ELSE                               ! merge with next
               prev(j) = h
               next(h) = j
             END IF
           END IF       ! j = {1, v} ?
           vv = vv - 1
         END IF    ! negative slope?
         IF (next(h) .GE. n) EXIT
         h = next(h)
       END DO    ! next node 
       v = vv
         
       IF (neg) GO TO 50   ! and solve the least-squares problem
       DEALLOCATE (prev, next, peach, cept, CM, CM2, CW, CWX, CWY, 
     &  CWX2, CWXY, IP, A, STAT=ierr)
       RETURN
         
      END ! of mullin
*++++++++++++++++++++++++ End of file mullin.f +++++++++++++++++++++++++
