*-----------------------------------------------------------------------
* cluster.f - data clustering for music analysis
* by Andy Allinger, 2011-2017, released to the public domain
* This program may be used by any person for any purpose.
*-----------------------------------------------------------------------

*-----------------------------------------------------------------------
* SMD - Subtract Mixed Data
*
*__Name______Type______In/Out_________Description_______________________
*  A         Real      In             Value
*  B         Real      In             Value
*  T         Real      In             Data type descriptor
*  SMD       Real      Out            Absolute difference
*-----------------------------------------------------------------------
      FUNCTION SMD (A, B, T)       
       IMPLICIT NONE
       REAL A, B, T, SMD
       
       REAL LIT, RMACHINE
       LOGICAL FIRST
       SAVE LIT, FIRST
       DATA FIRST / .TRUE. /
       IF (FIRST) THEN
         LIT = RMACHINE(2)
         FIRST = .FALSE.
       END IF
       
       IF (ABS(T) < LIT) THEN                   ! common math
         SMD = ABS(A - B)
       ELSE IF (T > 0.) THEN                        ! modulo arithmetic
         SMD = MOD( ABS(A - B), T )
         SMD = MIN(SMD, T - SMD)        ! choose the shorter path
       ELSE IF (T > -1.5) THEN             ! categorical difference
         IF (ABS(A - B) > 0.5) THEN                 
           SMD = 1.
         ELSE
           SMD = 0.
         END IF
       ELSE                             ! contra-categorical
         IF (ABS(A - B) > 0.5) THEN
           SMD = 0.
         ELSE
           SMD = 1.
         END IF
       END IF
       RETURN
      END ! of smd


*-----------------------------------------------------------------------
* DIST1 - the Manhattan distance between two points 
*         D = SUM (W[i] @ |(A[i]-B[i]) % T[i]|)  
*
*__Name_____Type_____In/Out___Description_______________________________
*  A[P]     Real     In       Column of a data matrix
*  B[P]     Real     In       Column of a data matrix
*  T[P]     Real     In       Modulo arithmetic divisors
*  W[P]     Real     In       Importance weights
*  P        Integer  In       Dimension of space
*  DIST1    Real     Out      Manhattan distance
*-----------------------------------------------------------------------
      FUNCTION DIST1 (A, B, T, W, p)
       IMPLICIT NONE
       INTEGER p
       REAL A, B, T, W, DIST1
       DIMENSION A(P), B(P), T(P), W(P)
                   
       INTEGER J
       REAL DIF, SMD

       DIST1 = 0.
       DO J = 1, P
         DIF = SMD(A(J), B(J), T(J))
         DIST1 = DIST1 + W(J) * DIF
       END DO
       RETURN
      END  ! of DIST1
      
      
*-----------------------------------------------------------------------
* DIST2 - the squared Euclidean distance between two points 
*         D = SUM (W[i] @ (A[i]-B[i]) % T[i]) ^2 
*__Name_____Type_____In/Out___Description_______________________________
*  A[P]     Real     In       Column of a data matrix
*  B[P]     Real     In       Column of a data matrix
*  T[P]     Real     In       Modulo arithmetic divisors
*  W[P]     Real     In       Importance weights
*  P        Integer  In       Dimension of space
*  DIST2    Real     Out      Squared Euclidean distance
*-----------------------------------------------------------------------
      FUNCTION DIST2(A, B, T, W, P)
       IMPLICIT NONE
       INTEGER P
       REAL A, B, T, W, DIST2
       DIMENSION A(P), B(P), T(P), W(P)

       INTEGER J
       REAL DIF, SMD

       DIST2 = 0.
       DO J = 1, P
         DIF = SMD(A(J), B(J), T(J))
         DIST2 = DIST2 + (W(J) * DIF) **2
       END DO
       RETURN
      END     ! of DIST2


*=======================================================================
* hclust - Hyperplane partitioning
* Divisive clustering based on partial sorting, a reduced form of
* Anthony Hoare's quick sort algorithm.
*
*_____Name_____________Type________I/O___Description____________________
*     X[PLIM,N]        REAL        In    data array
*     PLIM             INTEGER     In    row dimension of X
*     P                INTEGER     In    dimension of data
*     N                INTEGER     In    number of data points
*     T[P]             REAL        In    modulo period for distance
*     W[P]             REAL        In    importance weights
*     K                INTEGER     In    desired number of clusters
*     Z[N]             INTEGER     Out   what cluster each point is in
*     POP[K]           REAL        Out   cluster populations
*     IND[N]           INTEGER     Out   index array that sorts X into
*                                         output order from input order
*     BOUND[K]         INTEGER     None  work array
*     DFUNC            FUNCTION    In    function name of return type REAL
*                                         dfunc(A, B, T, W, p)
*                                         INTEGER p
*                                         REAL A[p], B[p], T[p], W[p]
*     FAULT            INTEGER     Out   exit code
*=======================================================================
      SUBROUTINE HCLUST
     &      (X,PLIM,P,N,T,W,K,Z,POP,IND,BOUND,DFUNC,FAULT)

       IMPLICIT NONE
       INTEGER PLIM, P, N, K, Z, IND, BOUND, FAULT
       REAL X, T, W, POP, DFUNC
       DIMENSION X(PLIM,N), T(P), W(P),
     &    Z(N), POP(K), IND(N), BOUND(K)

*          Local variables
       INTEGER HI, LO, IHI, ILO, i, L, LL, HAT
       INTEGER  BIGSIZE, BIGPART, PARTSIZE, ISWAP, RL, RM, RH
       REAL DHL, DHM, DML

*         initialize
       FAULT = 0
       DO i = 1, N
         IND(i) = i
       END DO
       DO L = 1, K
         BOUND(L) = N
       END DO
       POP(1) = FLOAT(N)
       
*          Trivial cases
       IF (K .EQ. 1) THEN
         DO i = 1, N
           Z(i) = 1
         END DO
         POP(1) = FLOAT(N)
         RETURN
       ELSE IF (K .EQ. N) THEN
         DO i = 1, N
           Z(i) = i
         END DO
         DO L = 1, K
           POP(L) = 1.
         END DO
         RETURN
       END IF

*=======================================================================
*       Partition a section of the array such that elements closer to the first
*       element are moved to the front, and elements closer to the last element
*       are moved to the rear.  Continue until enough clusters have been made.
*=======================================================================
       DO L = 1, K-1

         PARTSIZE = NINT(POP(1))          ! find largest partition
         BIGSIZE = PARTSIZE
         BIGPART = 1
         DO LL = 2, L
           PARTSIZE = NINT(POP(LL))
           IF (PARTSIZE > BIGSIZE) THEN
             BIGSIZE = PARTSIZE
             BIGPART = LL
           END IF
         END DO

         HI = BOUND(BIGPART)               ! find limits of partition
         LO = HI - BIGSIZE + 1
         IHI = HI - 1
         ILO = LO + 1
         IF (LO < 1) PRINT *, '!!lo=', lo, ' bigpart=', bigpart, bigsize
         IF (BIGSIZE .EQ. 2) GO TO 70

*          swap random pivot values to ends of partition (introduce chance)
         RH = HAT(HI-LO+1) -1 + LO
  20     RL = HAT(HI-LO+1) -1 + LO
         IF (RL .EQ. RH) GO TO 20

*           a third pivot value in case first two are near each other
  30     RM = HAT(HI-LO+1) -1 + LO
         IF (RM .EQ. RH .OR. RM .EQ. RL) GO TO 30
         DHL = DFUNC(X(1,IND(RH)), X(1,IND(RL)), T, W, P)
         DHM = DFUNC(X(1,IND(RH)), X(1,IND(RM)), T, W, P)
         DML = DFUNC(X(1,IND(RM)), X(1,IND(RL)), T, W, P)
         IF (DHM > DHL .OR. DML > DHL) THEN
           IF (DHM > DML) THEN
             RL = RM
           ELSE
             RH = RM
           END IF
         END IF

         ISWAP = IND(RH)
         IND(RH) = IND(HI)
         IND(HI) = ISWAP
         ISWAP = IND(RL)
         IND(RL) = IND(LO)
         IND(LO) = ISWAP

*=======================================================================
*            sort
*=======================================================================
  50     IF (ILO .LE. IHI) THEN       ! compare to lo pivot
           IF (DFUNC(X(1,IND(ILO)), X(1,IND(LO)), T, W, P) .LE.
     &    DFUNC(X(1,IND(ILO)), X(1,IND(HI)), T, W, P)) THEN
             ILO = ILO + 1
             GO TO 50
           END IF
         END IF

  60     IF (IHI .GE. ILO) THEN        ! compare to hi pivot
           IF (DFUNC(X(1,IND(IHI)), X(1,IND(HI)), T, W, P) .LE.
     &    DFUNC(X(1,IND(IHI)), X(1,IND(LO)), T, W, P)) THEN
             IHI = IHI - 1
             GO TO 60
           END IF
         END IF

         IF (ILO < IHI) THEN
           ISWAP = IND(ILO)
           IND(ILO) = IND(IHI)
           IND(IHI) = ISWAP
           ILO = ILO + 1
           IHI = IHI - 1
           IF (ILO .LE. IHI) GO TO 50
         END IF

*=======================================================================
  70   CONTINUE      ! Add this partition boundary to the bounds array
*=======================================================================
*             equal split if all distances were the same
         IF (ILO .EQ. HI) THEN
           IHI = (LO + HI) / 2
           ILO = IHI + 1
         END IF
         POP(BIGPART) = FLOAT(HI - IHI)        ! reduce population
         POP(L+1) = FLOAT(ILO - LO)            ! population in new partition
         BOUND(L+1) = IHI               ! append
       END DO     ! next partition

*=======================================================================
*  Recover memberships and populations from the index array
*=======================================================================
       CALL ISORT (BOUND, K)
       L = 1
       DO i = 1, N
         IF (i > BOUND(L)) L = L + 1
         Z(IND(i)) = L
       END DO

       i = 0
       DO L = 1, K
         POP(L) = FLOAT(BOUND(L) - i)
         i = BOUND(L)
       END DO
       RETURN
      END  ! of HCLUST



*-----------------------------------------------------------------------
* Passive sort of unsigned integers by an unstable radix algorithm
*
*___Name_____Type______In/Out___Description_____________________________
*   IX(N)    Integer   In       Input array
*   IND(N)   Integer   Out      Permutation vector
*   N        Integer   In       Length of array
*   M        Integer   In       Largest value in array
*-----------------------------------------------------------------------
      SUBROUTINE USORTU (IX, IND, N, M)
       IMPLICIT NONE
       INTEGER IX, IND, N, M
       DIMENSION IX(N), IND(N)

       INTEGER B, I, J, K, L, IL(0:31), IG(0:31), LG(0:31)  ! local variables
       INTEGER BITSZ                                        ! external function

*         Begin
       DO I = 1, N                ! initialize
         IND(I) = I
       END DO
       IF (N < 2) RETURN          ! validate

       J = BITSZ() - 1             ! greatest bit position
       B = J                       ! avoid compiler warning
       DO I = J, 0, -1
         B = I
         IF (BTEST(M, I)) GO TO 10
       END DO

  10   L = B                 ! most significant bit
       IL(L) = 1
       IG(L) = N
       LG(L) = -1

*            Sort
  20   I = IL(L)
       J = IG(L)
       IF (I .GE. J) GO TO 50    ! if empty or single, back up

*           Find a set bit at the front of the partition
  30   IF (I .LE. J) THEN
         IF (.NOT. BTEST(IX(IND(I)), L)) THEN
           I = I + 1
           GO TO 30
         END IF
       END IF

*           Find a clear bit at the rear of the partition
  40   IF (I .LE. J) THEN
         IF (BTEST(IX(IND(J)), L)) THEN
           J = J - 1
           GO TO 40
         END IF
       END IF

*             Swap these elements
       IF (I < J) THEN
         K = IND(I)
         IND(I) = IND(J)
         IND(J) = K
         I = I + 1
         J = J - 1
         GO TO 30
       END IF

*              Traverse tree
       IF (L .EQ. 0) GO TO 50  ! At the least significant bit, go back up
       IL(L-1) = IL(L)
       IG(L-1) = J
       LG(L) = -1
       L = L - 1                     ! Descend
       GO TO 20

  50   L = L + 1                     ! Back up
       IF (L > B) RETURN
       IF (LG(L) > 0) GO TO 50  ! Both subpartitions sorted; go back up
       IL(L-1) = IG(L-1) + 1         ! Do the greater sub-region
       IG(L-1) = IG(L)
       LG(L) = +1                    ! Mark as greater
       L = L - 1                     ! Descend
       GO TO 20
      END   ! of USORTU



*-----------------------------------------------------------------------
*    Let each center be the average of its points
*
*___Name____________Type______In/Out____Description_____________________
*   X(PLIM,N)       Real      In        Data points
*   PLIM            Integer   In        Row dimension of X
*   P               Integer   In        Variables each datum
*   N               Integer   In        # of data points
*   T(P)            Real      In        Periods
*   CAT(P)          Integer   In        # of categories
*   CATTOT          Integer   In        Sum of CAT()
*   C(PLIM,K)       Real      Out       Centers
*   K               Integer   In        Number of clusters
*   TAL(CATTOT,K)   Real      Out       Frequencies of observers
*   Z(N)            Integer   In        Cluster membership
*   POP(K)          Real      Out       Cluster population
*   ROBUST          Logical   In        Median instead of mean
*   WANTM           Logical   In        Compute modes of categoricals
*   LIVEC(K)        Integer   In        Skip clusters with livec .LE. 0
*   IPERM(N)        Integer   Neither   Workspace
*   XW(2@N)         Real      Neither   Workspace
*-----------------------------------------------------------------------
      SUBROUTINE CENTER (X, PLIM, P, N, T, CAT, CATTOT,
     &  C, K, TAL, Z, POP, ROBUST, WANTM, LIVEC, IPERM, XW)

       IMPLICIT NONE
       INTEGER PLIM, P, N, CAT, CATTOT, K, Z, LIVEC, IPERM
       REAL X, T, C, TAL, POP, XW
       LOGICAL ROBUST, WANTM
       DIMENSION X(PLIM,N), T(P), CAT(P), C(PLIM,K),
     &    TAL(CATTOT,K), Z(N), POP(K), LIVEC(K), IPERM(N), XW(2*N)

*        Local variables
       INTEGER H, HCUM, I, I1, I5, I9, J, L
       REAL LIT, DEV
       LOGICAL FIRST

*         External functions
       INTEGER HAT
       REAL RMACHINE

       SAVE LIT, FIRST
       DATA FIRST / .TRUE. /

*-----------------------------------------------------------------------
*           begin
*-----------------------------------------------------------------------
       IF (FIRST) THEN
         LIT = RMACHINE(2)
         FIRST = .FALSE.
       END IF

*       Count cluster populations.
       DO L = 1, K
         POP(L) = 0.
       END DO
       DO I = 1, N
         L = Z(I)
         POP(L) = POP(L) + 1.
       END DO

*       Sort implicitly by cluster ID
       CALL USORTU (Z, IPERM, N, K)

*        averages
       DO 30 J = 1, P
         IF (CAT(J) > 0 .AND. .NOT. WANTM) GO TO 30    ! don't need mode
         I9 = 0
         DO 20 L = 1, K
           I1 = I9 + 1
           I9 = I9 + NINT(POP(L))
           IF (LIVEC(L) .LE. 0 .OR. POP(L) < 0.5) GO TO 20   ! skip dead/empty
           H = 0
           DO I5 = I1, I9
             I = IPERM(I5)
             H = H + 1
             XW(H) = X(J,I)
           END DO
           IF (CAT(J) > 0) THEN
             CALL RMODE (XW, H, CAT(J), TAL, I)
             I = HAT(I)
             C(J,L) = TAL(I,1)         ! OK because i .LE. cattot always
           ELSE IF (T(J) > LIT) THEN        ! modulo arithmetic
             IF (ROBUST) THEN
               CALL CYMED (XW, H, T(J), XW(H+1), I, DEV)
             ELSE
               CALL CYMEAN (XW, H, T(J), XW(H+1), I, DEV)
             END IF
             I = HAT(I)
             C(J,L) = XW(H+I)
           ELSE IF (ROBUST) THEN           ! scalar data
             CALL MEDIAN (XW, H, C(J,L))
           ELSE                             ! arithmetic mean
             CALL MEAN (XW, H, C(J,L))
           END IF   ! robust ?
  20     CONTINUE
  30   CONTINUE
       IF (WANTM) RETURN     ! don't need to tally in this case

*          categorical and contra-categorical frequencies
       DO L = 1, K                ! zero
         DO H = 1, CATTOT
           TAL(H,L) = 0.
         END DO
       END DO

       HCUM = 0
       DO 50 J = 1, P
         IF (CAT(J) .EQ. 0) GO TO 50     ! skip others
         DO I = 1, N                              ! count
           H = HCUM + NINT(X(J,I))
           L = Z(I)
           TAL(H,L) = TAL(H,L) + 1.
         END DO
         HCUM = HCUM + CAT(J)
  50   END DO  ! next j
       RETURN
      END   ! of center


*-----------------------------------------------------------------------
* Let each center be one of its points
*
*___Name____________Type______In/Out____Description_____________________
*   X(PLIM,N)       Real      In        Data points
*   PLIM            Integer   In        Row dimension of X
*   P               Integer   In        Variables each datum
*   N               Integer   In        # of data points
*   C(PLIM,K)       Real      Out       Centers
*   K               Integer   In        Number of clusters
*   Z(N)            Integer   In        Cluster membership
*   POP(K)          Real      Out       Cluster population
*   IPERM(N)        Integer   Neither   Workspace
*-----------------------------------------------------------------------
      SUBROUTINE XOVER (X, PLIM, P, N, C, K, Z, POP, IPERM)
       IMPLICIT NONE
       INTEGER PLIM, P, N, K, Z, IPERM
       REAL X, C, POP
       DIMENSION X(PLIM,N), C(PLIM,K), Z(N), POP(K), IPERM(N)

*        Local variables
       INTEGER I, I5, I9, J, L, NPL

*         External functions
       INTEGER HAT

*-----------------------------------------------------------------------
*           begin
*-----------------------------------------------------------------------
*       Count cluster populations.
       DO L = 1, K
         POP(L) = 0.
       END DO
       DO I = 1, N
         L = Z(I)
         POP(L) = POP(L) + 1.
       END DO

!        PRINT *, 'xover: input k is: ', k
!        K = 0
!        DO I = 1, N
!          K = MAX(K, Z(I))
!        END DO
!        PRINT *, 'xover: maximum Z(i) is: ', K

*       Sort implicitly by cluster ID
       CALL USORTU (Z, IPERM, N, K)
       
!       PRINT *, 'xover: iperm is now: ', IPERM

*       Choose a random member
       I9 = 0
       DO 20 L = 1, K
         NPL = NINT(POP(L))
         I5 = I9 + HAT(NPL)
         I9 = I9 + NPL
         IF (NPL < 1) GO TO 20
         I = IPERM(I5)
!         PRINT *, 'i is ', i, ' and i5 is ', i5
         DO J = 1, P
           C(J,L) = X(J,I)
         END DO
  20   CONTINUE
       RETURN
      END  ! of XOVER


*-----------------------------------------------------------------------
* recs - re-estimate by cubic smoothing
*
*___Name________Type______I/O_______Description_________________________
*   X(PLIM,N)   Real      Both      Data points in R^p
*   PLIM        Integer   In        Row dimension of X
*   P           Integer   In        Measurements per point
*   N           Integer   In        Number of points
*   T(P)        Real      In        Period of modulo arithmetic
*   M           Integer   In        Number of sources
*   C(PLIM,K)   Real      In        Cluster centroids
*   K           Integer   In        # clusters
*   Z(N)        Integer   In        What cluster each point is in
*   IPERM(N)    Integer   Neither   Permutation vector
*   IDX(N)      Integer   Neither   File ID, each point
*   RW(N)       Real      In        W values for CFIT
*   RX(N)       Real      Work      X values for CFIT
*   RY(N)       Real      Neither   Y values for CFIT
*-----------------------------------------------------------------------
      SUBROUTINE RECS (X,PLIM,P,N,T,M,C,K,Z,IPERM,IDX,RW,RX,RY)
       IMPLICIT NONE
       INTEGER PLIM, P, N, M, K, Z, IPERM, IDX
       REAL X, T, C, RW, RX, RY
       DIMENSION X(PLIM,N), T(P), C(PLIM,K), Z(N), 
     $  IPERM(N), IDX(N), RW(N), RX(N), RY(N)

*          Constants
       REAL SMOOTH
       PARAMETER (SMOOTH = 0.05)  ! amount to adjust towards regression value

*            Local variables
      INTEGER
     $        H, HOLD,          ! file ID's
     $        I, I0, I2, I5,    ! count points
     $        IERR,             ! error code
     $        J,                ! count dimensions
     $        L,                ! cluster index
     $        O                 ! # objects in input file
     
       REAL A(0:3),            ! cubic coefficients
     $      R                  ! temp scalar

*             External functions
!!       REAL CBRT                    ! cube root
       LOGICAL ALMEQ                ! the almost-equal function

*-----------------------------------------------------------------------
*        Begin.
*-----------------------------------------------------------------------
       IERR = 0                    ! clear error codes
       DO I = 1, N                 ! source file (refer to BLEND in mixmidi.f)
         IDX(I) = NINT(X(3,I))
       END DO
       CALL USORTU (IDX, IPERM, N, M)  ! Sort implicitly
       
       DO 10 J = 1, P              ! for each dimension
         IF (.NOT. ALMEQ(T(J), 0.0)) GO TO 10
         DO I = 1, N                ! fill work arrays
           I5 = IPERM(I)
           RX(I) = X(J,I5)
           L = Z(I5)
           RY(I) = C(J,L)
         END DO

         H = IDX(IPERM(1))           ! which source file
         I0 = 1
         O = 0
         DO I = 1, N                 ! for each object
           I5 = IPERM(I)
           HOLD = H
           H = IDX(I5)
           IF (I .EQ. N) O = O + 1
           IF (H .NE. HOLD .OR. I .EQ. N) THEN      ! compute cubic
           
!!!!!!!!!!!!!!!!!!!! debug
!             PRINT *, 'data sent to cfit:'
!             DO I2 = I0, I0+O-1
!               PRINT *, RW(I2),  RX(I2), RY(I2)
!             END DO
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
           
             CALL CFIT (RW(I0), RX(I0), RY(I0), O, A, IERR)
             IF (IERR .NE. 0) PRINT *, 'recs: cfit returns #', IERR
             DO I2 = I0, I0+O-1
               R = RX(I2)
               RY(I2) = ((A(3) * R + A(2)) * R + A(1)) * R + A(0)
             END DO  ! next i
             I0 = I
             O = 0
           END IF
           O = O + 1
         END DO  ! next i
         
*             Apply the estimate
   8     FORMAT ('file ', I3, ' changing X from: ', F14.6, $)
   9     FORMAT (' to: ', F14.6)
         DO I = 1, N
           I5 = IPERM(I)
!           PRINT 8, IDX(I5), X(J,I5)
            X(J,I5) = X(J,I5) + SMOOTH * (RY(I) - X(J,I5))
!           PRINT 9, X(J,I5)
         END DO         
  10   CONTINUE  ! next j
       RETURN
      END  ! of RECS


************************************************************************
* DSM - a dissimilarity function
*
*___Variable______Type______I/O_______Description_______________________
*   X(P)          Real      In        Data points
*   P             Integer   In        Dimension of the data
*   T(P)          Real      In        Period of modulo arithmetic
*   W(P)          Real      In        Importance weights
*   CAT(P)        Integer   In        # categories each variable
*   CATTOT        Integer   In        Total categories
*   C(P)          Real      Both      Center points of clusters
*   TAL(CATTOT)   Real      Both      Observer frequencies
*   POP           Real      In        # of points in each cluster
*   LIT           Real      In        Negliglible value
*   SELF          Real      In        Object is in this cluster?
*   ROBUST        Logical   In        Use medians
*   BEST          Real      In        Bail out if this value is exceeded
*   DSM           Real      Out       Dissimilarity
************************************************************************
      FUNCTION DSM (X,P,T,W,CAT,CATTOT,C,TAL,POP,LIT,SELF,ROBUST,BEST)
       IMPLICIT NONE
       INTEGER P, CAT, CATTOT
       REAL X, T, W, C, TAL, POP, LIT, SELF, BEST, DSM
       LOGICAL ROBUST
       DIMENSION X(P), T(P), W(P), CAT(P), C(P), TAL(CATTOT)

       INTEGER H, HCUM, J
       REAL DIF

************************************************************************
*           Begin.
************************************************************************
       DSM = 0.
       HCUM = 0
       DO J = 1, P
         IF (CAT(J) .EQ. 0) THEN
           IF (T(J) < LIT) THEN                ! scalar
             DIF = X(J) - C(J)
           ELSE                                ! cyclic
             DIF = MOD(ABS(X(J) - C(J)), T(J))
             DIF = MIN(DIF, T(J) - DIF)              ! choose the shorter path
           END IF
         ELSE
           H = HCUM + NINT(X(J))
           IF (T(J) > -2.5) THEN                  ! categorical
             DIF = (POP - TAL(H)) / POP
           ELSE                             ! contra-categorical
             DIF = (TAL(H) - SELF) / POP
           END IF
           HCUM = HCUM + CAT(J)
         END IF  ! cat > 0 ?
         IF (ROBUST) THEN
           DSM = DSM + ABS(W(J) * DIF)     ! Manhattan dissimilarity
         ELSE
           DSM = DSM + (W(J) * DIF) **2    ! Squared Euclidean dissimilarity
         END IF
       
         IF (DSM > 1E30) THEN 
           PRINT *, 'dsm: huge value.  X: ', X, ' p: ', P, ' T: ', T,
     $        ' W: ', W, ' cat: ', CAT, ' Cattot: ', CATTOT, ' tal:',
     $       TAL, ' pop:', POP, ' lit: ', LIT, ' self:', SELF,
     $       ' robust: ', ROBUST, ' best: ', BEST
         END IF  
       
         IF (DSM .GE. BEST) RETURN     ! bail out
       END DO  ! next j
       RETURN
      END  ! of DSM


************************************************************************
* theur - threshhold heuristic, the maximin dissimilarity
*
*___Name____________Type______I/O_______Description________________________
*   X(PLIM,N)       Real      In        Data points
*   PLIM            Integer   In        Row dimension of X
*   P               Integer   In        Dimension of the data
*   N               Integer   In        Number of points
*   T(P)            Real      In        Periods of cyclic data
*   W(P)            Real      In        Importance weights
*   CAT(P)          Integer   In        # categories each variable
*   CATTOT          Integer   In        Total categories
*   C(PLIM,K)       Real      Neither   Center points of clusters
*   K               Integer   In        Default number of clusters
*   TAL(CATTOT,K)   Real      Neither   Observer frequencies
*   Z(N)            Integer   Neither   Argument to CENTER
*   POP(K)          Real      Neither   Population each cluster
*   ROBUST          Logical   In        Use medians instead of means
*   LIVEC(K)        Integer   Neither   Argument to CENTER
*   IPERM(N)        Integer   Neither   Permutation vector
*   SHORTX(N)       Real      Neither   Dissimilarity to each point
*   WORK(2*N)       Real      Neither   Argument to CENTER
*   THRESH          Real      Out       Threshhold distance
************************************************************************
      SUBROUTINE THEUR (X, PLIM, P, N, T, W, CAT, CATTOT, C, K, TAL, Z, 
     $  POP, ROBUST, LIVEC, IPERM, SHORTX, WORK, THRESH)
       IMPLICIT NONE
       INTEGER PLIM, P, N, CAT, CATTOT, K, Z, LIVEC, IPERM
       REAL X, T, W, C, TAL, POP, SHORTX, WORK, THRESH
       LOGICAL ROBUST
       DIMENSION X(PLIM,N), T(P), W(P), CAT(P), C(PLIM,K), 
     $  TAL(CATTOT,K), Z(N), POP(K), LIVEC(K), IPERM(N), SHORTX(N), 
     $  WORK(2*N)

*          Local variables
       INTEGER H, HCUM, I, I0, I1, J, L
       REAL BIG, D, FAR, LIT, SELF

*          External functions
       REAL DSM, RMACHINE

       DATA I1 / 1 /  ! avoid compiler warning
       
************************************************************************
*            Begin.
************************************************************************
       LIT = RMACHINE(2)
       BIG = RMACHINE(3)
       DO I = 1, N
         SHORTX(I) = BIG
       END DO

*         First center is the overall average
       DO I = 1, N
         Z(I) = 1
       END DO
       LIVEC(1) = 1
       CALL CENTER (X, PLIM, P, N, T, CAT, CATTOT, C, 1, TAL, Z, POP, 
     $  ROBUST, .FALSE., LIVEC, IPERM, WORK)
       I0 = 0    ! remember what point was chosen

*        Choose each successive center as the point with the longest
*         minimum distance to an existing center
       DO L = 2, K
         FAR = 0.
         I0 = I1
         DO I = 1, N            ! compare each point to previous center
           IF (I .EQ. I0) THEN
             SELF = 1.
           ELSE
             SELF = 0.
           END IF
           D = DSM (X(1,I), P, T, W, CAT, CATTOT, C(1,L-1),
     $             TAL(1,L-1), POP(L-1), LIT, SELF, ROBUST, SHORTX(I))
           IF (D < SHORTX(I)) SHORTX(I) = D
           IF (SHORTX(I) > FAR) THEN           ! longest short distance
             FAR = SHORTX(I)
             I1 = I
           END IF
         END DO                      ! next data point
         
         DO H = 1, CATTOT
           TAL(H,L) = 0.
         END DO
         HCUM = 0.
         DO J = 1, P         ! assign center
           C(J,L) = X(J,I1)
           IF (CAT(J) > 0) THEN
             H = HCUM + NINT(X(J,I1))
             TAL(H,L) = 1.
             HCUM = HCUM + CAT(J)
           END IF
         END DO
         POP(L) = 1.
       END DO               ! next center to initialize

*   Threshhold distance is longest distance of any point to its center
       THRESH = 0.
       DO I = 1, N
         IF (I .EQ. I1) THEN
           SELF = 1.
         ELSE
           SELF = 0.
         END IF
         D = DSM (X(1,I), P, T, W, CAT, CATTOT, C(1,K), TAL(1,K),
     $     POP(K), LIT, SELF, ROBUST, SHORTX(I))
         IF (D < SHORTX(I)) SHORTX(I) = D
         IF (SHORTX(I) > THRESH) THRESH = SHORTX(I)
       END DO
       RETURN
      END  ! of THEUR


************************************************************************
* Initialize cluster centers based on distance
*
* Refer to:
*    "kmeans++: the advantages of careful seeding"
*    David Arthur and Sergei Vassilvitskii
*    Proceedings of the eighteenth annual ACM-SIAM Symposium
*      on Discrete Algorithms, 2007
*
*___Name____________Type______I/O_______Description_____________________
*   X(PLIM,N)       Real      In        Data points
*   PLIM            Integer   In        Row dimension of X
*   P               Integer   In        Dimension of the data
*   N               Integer   In        Number of points
*   T(P)            Real      In        Period of modulo arithmetic
*   W(P)            Real      In        Importance weights
*   CAT(P)          Integer   In        Categories each variable
*   CATTOT          Integer   In        Total categories
*   C(PLIM,K)       Real      Out       Center points of clusters
*   K               Integer   In        Number of clusters to make
*   TAL(CATTOT,K)   Real      Out       Category frequencies each cluster
*   POP(K)          Real      Out       Populations 
*   ROBUST          Logical   In        Use medians instead of means
*   SHORTX(N)       Real      Neither   Squared distance to each point
*   CUMD(N)         Real      Neither   Cumulative squared distance
*   THRESH          Real      Out       Threshhold distance
************************************************************************
      SUBROUTINE DINIT (X, PLIM, P, N, T, W, CAT, CATTOT, C, K, TAL, 
     $  POP, ROBUST, SHORTX, CUMD, THRESH)

       IMPLICIT NONE
       INTEGER PLIM, P, N, CAT, CATTOT, K
       REAL X, T, W, C, TAL, POP, SHORTX, CUMD, THRESH
       LOGICAL ROBUST
       DIMENSION X(PLIM,N), T(P), W(P), CAT(P), C(PLIM,K), 
     $  TAL(CATTOT,K), POP(K), SHORTX(N), CUMD(N)

*          Local variables
       INTEGER H, HCUM, I, I1, J, L
       REAL BIG, D, LIT, SELF, TOT, U

*          External functions
       INTEGER HAT
       REAL DSM, 
     $      RMACHINE,
     $      URAND

       LIT = RMACHINE(2)
       BIG = RMACHINE(3)

* Begin.  Choose each center with probability proportional to its
* least squared distance from any existing center.
       DO I = 1, N
         SHORTX(I) = BIG
       END DO

       I1 = HAT(N)  ! choose first center at random
       DO H = 1, CATTOT
         TAL(H,1) = 0.
       END DO
       HCUM = 0
       DO J = 1, P                         ! copy
         C(J,1) = X(J,I1)
         IF (CAT(J) > 0) THEN
           H = HCUM + NINT(X(J,I1))
           TAL(H,1) = 1.
           HCUM = HCUM + CAT(J)
         END IF
       END DO
       POP(1) = 1.

       DO L = 2, K
         TOT = 0.
         DO I = 1, N             ! compare each point to previous center
           IF (I .EQ. I1) THEN
             SELF = 1.
           ELSE
             SELF = 0.
           END IF
           D = DSM (X(1,I), P, T, W, CAT, CATTOT, C(1,L-1), TAL(1,L-1),
     $                POP(L-1), LIT, SELF, ROBUST, SHORTX(I))
     
            IF (D > 1E30) PRINT *, 'dinit: huge D!!!'
     
           IF (D < SHORTX(I)) SHORTX(I) = D
           TOT = TOT + SHORTX(I)
           CUMD(I) = TOT
         END DO                      ! next data point

         U = URAND() * TOT    ! uniform at random over cumulative distance
         CALL RBSGT (CUMD, N, U, I1)
         DO H = 1, CATTOT
           TAL(H,L) = 0.
         END DO
         HCUM = 0
         DO J = 1, P         ! assign center
           C(J,L) = X(J,I1)
           IF (CAT(J) > 0) THEN
             H = HCUM + NINT(X(J,I1))
             TAL(H,L) = 1.
             HCUM = HCUM + CAT(J)
           END IF
         END DO
         POP(L) = 1.
       END DO               ! next center to initialize

*   Threshhold distance is longest distance of any point to its center
       THRESH = 0.
       DO I = 1, N
         IF (I .EQ. I1) THEN
           SELF = 1.
         ELSE
           SELF = 0.
         END IF    
         D = DSM (X(1,I), P, T, W, CAT, CATTOT, C(1,K), TAL(1,K),
     $                POP(K), LIT, SELF, ROBUST, SHORTX(I))
         IF (D < SHORTX(I)) SHORTX(I) = D
         IF (SHORTX(I) > THRESH) THRESH = SHORTX(I)
       END DO
       
       RETURN
      END  ! of DINIT



***********************************************************************
* KCLUST - partition clustering for mixed data
* origin:  Hugo Steinhaus, 1956
*
*___Variable___________Type______I/O_______Description__________________
*   X(PLIM,N)          Real      In        Data points
*   PLIM               Integer   In        Row dimension of x
*   P                  Integer   In        Dimension of the data
*   N                  Integer   In        Number of points
*   T(P)               Real      In        Period of modulo arithmetic
*   W(P)               Real      In        Importance weights
*   CAT(P)             Integer   In        # categories each variable
*   CATTOT             Integer   In        Total categories
*   C(PLIM,KMAX)       Real      Both      Center points of clusters
*   KMIN               Integer   In        Fewest clusters to make
*   K                  Integer   Both      Number of clusters to make
*   KMAX               Integer   In        Most clusters to make
*   TAL(CATTOT,KMAX)   Real      Both      Observer frequencies
*   Z(N)               Integer   Both      What cluster a point is in
*   POP(KMAX)          Real      Both      # of points in each cluster
*   ROBUST             Logical   In        Use medians
*   THRESH             Real      In        Threshhold for forming new cluster
*   LIVEC(KMAX)        Integer   Neither   Workspace, 1 = live, 0 = dead
*   LIVEX(N)           Integer   Neither   Workspace, time-to-live each point
*   IPERM(N)           Integer   Neither   Permutation vector
*   SHORTX(N)          Real      Neither   Shortest distance to current center
*   WORK(2*N)          Real      Neither   Workspace for averages
*   U                  Real      Out       Residual
*   IFAULT             Integer   Out       Error code
*                                            0 = no errors
*                                            1 = too few clusters to permit 
*                                                 a cluster to be deleted
*                                            2 = too many iterations
*                                            3 = K out of bounds
*                                            4 = can't measure point
*                                            5 = can't insert new cluster
************************************************************************
      SUBROUTINE KCLUST (X,PLIM,P,N,T,W,CAT,CATTOT,C,KMIN,K,KMAX,
     & TAL,Z,POP,ROBUST,THRESH,LIVEC,LIVEX,IPERM,SHORTX,WORK,U,IFAULT)

       IMPLICIT NONE
       INTEGER PLIM, P, N, CAT, CATTOT, KMIN, K, KMAX, Z,
     &          LIVEC, LIVEX, IPERM, IFAULT
       REAL X, T, W, C, TAL, POP, THRESH, SHORTX, WORK, U
       LOGICAL ROBUST
       DIMENSION X(PLIM,N), T(P), W(P), CAT(P), C(PLIM,KMAX),
     &   TAL(CATTOT,KMAX), Z(N), POP(KMAX),
     &   LIVEC(KMAX), LIVEX(N), IPERM(N), SHORTX(N), WORK(2*N)

*               Constants
       INTEGER HS, ITER
       PARAMETER (HS = 13,        ! length of history hash buffer
     &            ITER =  1000)   ! maximum iterations

*                Local variables
       INTEGER
     &         G, H, HCUM, I, I1, IINS, J, L,   ! counters
     &         HL,                              ! length in bytes of Z[]
     &         K0,                              ! temp. value of K
     &         LOLD, LNEW                       ! present, new cluster ID

       INTEGER*4 HASH, HIST(HS)                 ! hashes of prior Z()

       REAL BEST, WORST,    ! Shortest, longest distance to a center
     &      BIG, LIT,       ! very large number, small #
     &      D,              ! a distance
     &      SELF            ! this cluster already contains this point

       LOGICAL CHANGE             ! whether any points have been reassigned

*              external functions
        INTEGER BITSZ                       ! replacement for BIT_SIZE
        REAL DSM, RMACHINE
        LOGICAL ALMEQ

        DATA IINS / 1 /    ! avoid compiler warning

************************************************************************
*           Begin
************************************************************************
       IFAULT = 0                          ! validate, initialize
       LIT = RMACHINE(2)
       BIG = RMACHINE(3)
       IF (K < 1 .OR. K > N) THEN
         IFAULT = 3
         PRINT *, 'kclust:  k= ', K, ' but n=', N
         RETURN
       END IF
       HL = N * BITSZ() / 8

*          initialize
       IF (K .EQ. 1) GO TO 90
       DO L = 1, K                  ! initialize live sets
         LIVEC(L) = 2
       END DO
       DO I = 1, N
         LIVEX(I) = 1
         SHORTX(I) = BIG
       END DO

************************************************************************
*                      Main loop
************************************************************************
       DO G = 1, ITER
         CHANGE = .FALSE.
         CALL SHUFFL (IPERM, N)

************************************************************************
*            Find nearest center for each point
************************************************************************
         WORST = 0.
         DO I1 = 1, N
           I = IPERM(I1)
           LOLD = Z(I)                ! old assignment
           LNEW = LOLD
           BEST = SHORTX(I)

           DO 30 L = 1, K
             IF (LIVEX(I) .LE. 0 .AND. LIVEC(L) .LE. 0) GO TO 30
             IF (L .EQ. LOLD) THEN
               SELF = 1.
             ELSE
               SELF = 0.
             END IF
             D = DSM (X(1,I), P, T, W, CAT, CATTOT, C(1,L),
     $                TAL(1,L), POP(L), LIT, SELF, ROBUST, BEST)

             IF (D < BEST) THEN          ! new nearest center
               BEST = D
               LNEW = L
             END IF
  30       CONTINUE        ! next L
           IF (ALMEQ(BEST, BIG)) THEN   ! incomparable point?
             IFAULT = 4
             RETURN
           END IF
           SHORTX(I) = BEST
           
           IF (BEST > WORST) THEN  ! Compare distance to new cluster threshhold
             IINS = I
             WORST = BEST
           END IF           

************************************************************************
*              Reassign point
************************************************************************
           IF (LOLD .NE. LNEW) THEN
             Z(I) = LNEW
             LIVEC(LOLD) = 2           ! recenter old cluster
             LIVEC(LNEW) = 2           ! recenter new cluster
             CHANGE = .TRUE.
           END IF
         END DO         ! next i1

************************************************************************
* If shortest distance exceeds threshhold, make a new cluster
************************************************************************
         IF (WORST > THRESH .AND. K < KMAX) THEN
           IF (K .GE. KMAX) THEN
             IFAULT = 5
             RETURN
           END IF
           K = K + 1
           LOLD = Z(IINS)
           LNEW = K
           DO J = 1, P
             C(J,LNEW) = X(J,IINS)
           END DO
           DO H = 1, CATTOT
             TAL(H,LNEW) = 0.
           END DO
           HCUM = 0
           DO J = 1, P
             IF (CAT(J) > 0) THEN
               H = HCUM + NINT(X(J,IINS))
               TAL(H,LNEW) = 1.
               HCUM = HCUM + CAT(J)
             END IF
           END DO
           Z(IINS) = LNEW
           LIVEC(LOLD) = 2
           LIVEC(LNEW) = 2
           CHANGE = .TRUE.
         END IF

         IF (.NOT. CHANGE) GO TO 90       ! algorithm halts

************************************************************************
*           Find cluster centers
************************************************************************
         DO L = 1, K                            ! decrement LIVEC
           LIVEC(L) = LIVEC(L) - 1
         END DO
         CALL CENTER (X, PLIM, P, N, T, CAT, CATTOT,
     &    C, K, TAL, Z, POP, ROBUST, .FALSE., LIVEC, IPERM, WORK)

************************************************************************
*          Delete empty clusters
************************************************************************
         K0 = K
         DO L = K0, 1, -1
           IF (POP(L) < 0.5) THEN
             IF (K .LE. KMIN) THEN
               IFAULT = 1
               RETURN
             END IF
             DO J = 1, P
               C(J,L) = C(J,K)
             END DO
             DO H = 1, CATTOT
               TAL(H,L) = TAL(H,K)
             END DO
             DO I = 1, N
               IF (Z(I) .EQ. K) Z(I) = L
              END DO
             POP(L) = POP(K)
             LIVEC(L) = LIVEC(K)
             POP(K) = 0.
             K = K - 1
           END IF ! empty cluster?
         END DO  ! next L

************************************************************************
*            Check if anything is changing
************************************************************************
         HASH = -2128831035           ! canonical arbitrary initialization
         CALL FNV32(Z, HL, HASH)      ! hash of membership vector
         DO J = 1, MIN(G-1, HS)
           IF (HASH .EQ. HIST(J)) GO TO 90      ! exit on loop criterion
         END DO
         J = MOD(G, HS)
         IF (J .EQ. 0) J = HS
         HIST(J) = HASH                          ! remember

************************************************************************
*          Determine live points
************************************************************************
        DO I = 1, N
          LIVEX(I) = 0
        END DO
        SELF = 1.
        DO I = 1, N
          L = Z(I)
          IF (LIVEC(L) > 0) THEN
            D = DSM (X(1,I), P, T, W, CAT, CATTOT, C(1,L),
     $               TAL(1,L), POP(L), LIT, SELF, ROBUST, BIG)
            IF (D > SHORTX(I)) LIVEX(I) = 1
            SHORTX(I) = D
          END IF
        END DO

       END DO                   ! next g
       IFAULT = 2               ! too many iterations

************************************************************************
*         Compute residual
************************************************************************
  90   U = 0.
       SELF = 1.
       DO I = 1, N
         L = Z(I)
         D = DSM (X(1,I), P, T, W, CAT, CATTOT, C(1,L),
     $               TAL(1,L), POP(L), LIT, SELF, ROBUST, BIG)
         SHORTX(I) = D
         U = U + D
       END DO  ! next i
       RETURN
      END  ! of kclust



*--------------------- --------------------------------------------------
* jclust - precedes kclust
*
*___Variable___________Type______I/O______Description___________________
*   X(PLIM,N)          Real      In       Data points in R^p
*   PLIM               Integer   In       Row dimension of X
*   P                  Integer   In       Measurements per point
*   N                  Integer   In       Number of points
*   T(P)               Real      In       Period of modulo arithmetic
*   W(P)               Real      In       Importance weights of variables
*   CAT(P)             Integer   In       Number of categories
*   CATTOT             Integer   In       The total of CAT(1:p)
*   M                  Integer   In       Number of sources
*   C(PLIM,KMAX)       Real      Out      Cluster centroids
*   KMIN               Integer   In       Fewest clusters to make
*   K                  Integer   In/Out   Default/actual clusters to make
*   KMAX               Integer   In       Most clusters to make
*   TAL(CATTOT,KMAX)   Real      Work     Observer frequencies
*   Z(N)               Integer   Out      What cluster each point is in
*   POP(KMAX)          Real      Out      Number of points in each cluster
*   OPT                Integer   In       Options
*   ALG                Integer   In       Select algorithm {0,1}
*   ZSAVE(N,CTRIES)    Integer   Work     Best Z
*   IPERM(N)           Integer   Work     Permutation vector
*   LIVEX(N)           Integer   Work     Live set membership, each point
*   LIVEC(KMAX)        Integer   Work     Live set membership, each cluster
*   SHORTX(N)          Real      Work     Distance to center, each point
*   XW(2*N)            Real      Work     Work array for averages
*   IERR               Integer   Out      Error code
*                                           0 = no errors
*                                           {1,2,3,4,5} = see KCLUST
*                                           6 = K out of bounds
*                                           7 N/A (see package CLUELA)
*                                           8 = bad array bounds
*                                           9 = no weights assigned
*                                          10 N/A (see package CLUELA)
*                                          11 = negative weights
*                                          12 = bad value in CAT[]
*                                          13 = tclust fails
*                                          67 = bad value for ALG
*-----------------------------------------------------------------------
      SUBROUTINE JCLUST (X, PLIM, P, N, T, W, CAT, CATTOT, M, 
     $  C, KMIN, K, KMAX, TAL, Z, POP, OPT, ALG, ZSAVE, IPERM,
     $  LIVEX, LIVEC, SHORTX, XW, IERR)

       IMPLICIT NONE
       INTEGER CTRIES
       PARAMETER (CTRIES = 10)    ! # repeats of k-means
       INTEGER PLIM, P, N, CAT, CATTOT, M, KMIN, K, KMAX, Z,
     $  OPT, ALG, ZSAVE, IPERM, LIVEX, LIVEC, IERR
       REAL X, T, W, C, TAL, POP, SHORTX, XW
       DIMENSION X(PLIM,N), T(P), W(P), CAT(P), C(PLIM,KMAX),
     $  TAL(CATTOT,KMAX), Z(N), POP(KMAX), ZSAVE(N,CTRIES), IPERM(N),
     $  LIVEX(N), LIVEC(KMAX), SHORTX(N), XW(2*N)

*-----------------------------------------------------------------------
*          Compiled-in options
*-----------------------------------------------------------------------
       INTEGER TTRIES
       LOGICAL CRISP, REEST
       PARAMETER (CRISP = .FALSE.,       ! use all-or-nothing centers
     $            REEST = .FALSE.,        ! change input array
     $            TTRIES = 5)

*-----------------------------------------------------------------------
*            local variables
*-----------------------------------------------------------------------
      INTEGER
     $        H, i,             ! count trials, count points
     $        HBEST,            ! best trial
     $        HOK,              ! successful trials
     $        J,                ! count dimensions
     $        JTIES,            ! ties in IMODE
     $        KBEST, KTRY,      ! possible value for k
     $        KARR(CTRIES),     ! results for k, each trial
     $        KAVE(CTRIES),     ! workspace for IMODE
     $        KPERM(CTRIES),    ! workspace for IMODE
     $        L                 ! count clusters

      REAL
     $     BIG,       ! very large number
     $     FM,              ! M as Real
     $     LIT,             ! very small number
     $     D,               ! a dissimilarity
     $     SELF,            ! argument for DSM
     $     THRESH, T0,      ! threshhold for splitting
     $     U,               ! residual from clustering routines
     $     UARR(CTRIES)     

       LOGICAL
     $        BRIEF,      ! skip clustering restarts
     $        HYP,        ! hyperplane initialization
     $        ROBUST      ! robust statistics

*             external functions
       INTEGER HAT              ! random integer 1...n
       REAL    DIST1, DIST2, 
     $         DSM,   ! distance functions
     $         RMACHINE             ! find machine constants
!!       LOGICAL SAFDIV
       EXTERNAL DIST1, DIST2

*-----------------------------------------------------------------------
*       begin executable statements
*-----------------------------------------------------------------------
       IERR = 0                       ! clear error codes
       LIT = RMACHINE(2)              ! very small number
       BIG = RMACHINE(3)              ! very large number
       FM = FLOAT(M)
       KBEST = K                      ! avoid compiler warning

*-----------------------------------------------------------------------
*            validate
*-----------------------------------------------------------------------
       IF (KMIN < 1 .OR. K < KMIN .OR. K > KMAX .OR. KMAX > N) THEN
         PRINT *, 'jclust!! kmin, k, kmax ', KMIN, K, KMAX, ' n=', N
         IERR = 6
         RETURN
       END IF

       IF (P < 1 .OR. N < 1) THEN
         PRINT *, 'jclust! p: ', P, ' n: ', N
         IERR = 8
         RETURN
       END IF
              
*        at least one element of W[] must be nonzero
       DO J = 1, P
         IF (W(J) > LIT) GO TO 10
       END DO
       IERR = 9
       RETURN

  10   DO J = 1, P
         IF (W(J) < 0.) THEN
           PRINT *, 'jclust!! negative weights! W: ', W
           IERR = 11
           RETURN
         END IF
       END DO

*    Ensure that categorical and contra-categorical data are positive integers
       DO i = 1, N
         DO J = 1, P
           IF (T(J) < -0.5) THEN
             IF (X(J,i) < 0.5 .OR. X(J,i) > FLOAT(CAT(J)) + 0.4999) THEN
               PRINT *, 'CAT(', J, ')=', CAT(J), 
     $          ' but X(', J, ',', I, ')=', X(J,i)
               IERR = 12
               RETURN
             END IF
           END IF
         END DO
       END DO       

*-----------------------------------------------------------------------
*           decode input
*-----------------------------------------------------------------------
       ROBUST = BTEST(OPT, 0)       ! medians instead of means
       BRIEF = BTEST(OPT, 1)        ! skip clustering restarts
       HYP = BTEST(OPT, 2)

*-----------------------------------------------------------------------
*        special case k = n
*-----------------------------------------------------------------------
       IF (KMIN .EQ. N) THEN
         DO i = 1, N
           DO J = 1, P
             C(J,i) = X(J,i)
           END DO
         END DO
         DO i = 1, N
           Z(i) = i
         END DO
         DO L = 1, K
           POP(L) = 1.
         END DO
         RETURN
       END IF

*-----------------------------------------------------------------------
*         special case k = 1
*-----------------------------------------------------------------------
       IF (KMAX .EQ. 1) THEN
         DO i = 1, N                  ! all memberships the same
           Z(i) = 1
         END DO
         POP(1) = N
         GO TO 90
       END IF

*-----------------------------------------------------------------------
*        choose what algorithm
*-----------------------------------------------------------------------
       IF (ALG .EQ. 0 .OR. ALG .EQ. 1) THEN
         CONTINUE
       ELSE
         PRINT *, 'jclust: bad value for alg!'
         IERR = 67
         RETURN
       END IF

*-----------------------------------------------------------------------
*         initialize k-means routines, CTRIES attempts, keep the best
*-----------------------------------------------------------------------
       HOK = 0
       DO H = 1, CTRIES            ! seek least residual
         IF (HYP) THEN
           IF (ROBUST) THEN
             CALL HCLUST (X,PLIM,P,N,T,W,K,Z,POP,IPERM,LIVEC,DIST1,IERR)
           ELSE
             CALL HCLUST (X,PLIM,P,N,T,W,K,Z,POP,IPERM,LIVEC,DIST2,IERR)
           END IF
           IF (IERR > 0) PRINT *, 'hclust: Trouble partitioning'

*            find initial cluster centers
           DO L = 1, K               ! all clusters live
             LIVEC(L) = 1
           END DO
           CALL CENTER (X, PLIM, P, N, T, CAT, CATTOT,
     $         C, K, TAL, Z, POP, ROBUST, .FALSE., LIVEC, IPERM, XW)
           THRESH = 0.
           SELF = 1.
           DO I = 1, N
             L = Z(I)
             D = DSM (X(1,I), P, T, W, CAT, CATTOT, C(1,L), TAL(1,L),
     $               POP(L), LIT, SELF, ROBUST, BIG)
             IF (D > THRESH) THRESH = D
           END DO
         ELSE
           IF (H .EQ. 1 .OR. REEST) THEN
             CALL THEUR (X, PLIM, P, N, T, W, CAT, CATTOT, C, K, TAL, Z,
     $             POP, ROBUST, LIVEC, IPERM, SHORTX, XW, THRESH)
!            PRINT *, 'theur returns!  thresh= ', THRESH
           END IF
     
           CALL DINIT (X, PLIM, P, N, T, W, CAT, CATTOT, C, K, TAL,
     $       POP, ROBUST, SHORTX, XW, T0)
!           PRINT *, 'dinit returns.  t0 = ', T0
           DO i = 1, N
             Z(i) = HAT(K)
           END DO
         END IF

!!!!!!!!!!!!!!!
!         PRINT *, 'calculated thresh as: ', THRESH
!                use thresh determined by DINIT ?
!!         IF (.NOT. HYP .AND. ALG .EQ. 0) THRESH = T0

*-----------------------------------------------------------------------
*           Cluster
*-----------------------------------------------------------------------
         KTRY = K
         U = BIG
         CALL KCLUST (X,PLIM,P,N,T,W,CAT,CATTOT,C,KMIN,KTRY,KMAX,
     $      TAL,Z,POP,ROBUST,THRESH,LIVEC,LIVEX,IPERM,SHORTX,XW,U,IERR)

!!!!!       debug:
!           PRINT *, 'returned from kclust.  h=', h, ' u=', u, ' k=',
!     $         KTRY, ' ierr=', IERR

         IF (BRIEF .AND. IERR .EQ. 0) THEN
           KBEST = KTRY
           GO TO 50      ! break from loop H
         END IF         

         IF (IERR .EQ. 0) THEN
           HOK = HOK + 1
           KARR(HOK) = KTRY
           UARR(HOK) = U
           DO i = 1, N
             ZSAVE(i,HOK) = Z(i)
           END DO
           
*-----------------------------------------------------------------------
* Adjust X towards an approximation of C as a function of X. This step is 
* intended to have the same effect as Dynamic Time Warping, since 
* data clustering by itself does not take advantage of information about 
* proximity of events in their original context.
*----------------------------------------------------------------------
           IF (REEST .AND. ALG .EQ. 0) THEN
             DO i = 1, N                      ! weight by inverse distance
               SHORTX(i) = FM / (FM + SHORTX(i))
             END DO
             CALL RECS (X, PLIM, P, N, T, M, C, KTRY, Z, IPERM, LIVEX,
     $                   SHORTX, XW(1), XW(N+1))
           END IF  ! re-estimate?

         ELSE
           CONTINUE
           PRINT *, 'kclust:  error#', IERR
         END IF  ! successful kclust run?
       END DO  ! next H

       IF (HOK .EQ. 0) RETURN  ! failure
       CALL IMODE (KARR, KAVE, KPERM, HOK, JTIES)  ! most common K
       U = BIG
       HBEST = 1

*         choose lowest residual from trials with modal KTRY
       IF (JTIES .EQ. 1) THEN
         KBEST = KAVE(1)
         DO H = 1, HOK
           IF (KARR(H) .EQ. KBEST .AND. UARR(H) < U) THEN
             HBEST = H
             U = UARR(H)
           END IF
         END DO

*         choose lowest residual from any trial
       ELSE
         DO H = 1, HOK
           IF (UARR(H) < U) THEN
             HBEST = H
             KBEST = KARR(H)
             U = UARR(H)
           END IF
         END DO
       END IF

*          restore best saved values
       PRINT *, 'choosing results of trial ', HBEST
       DO i = 1, N
         Z(i) = ZSAVE(i,HBEST)         
       END DO

*-----------------------------------------------------------------------
*           Come here when skipping restarts
*-----------------------------------------------------------------------
  50   IERR = 0
       K = KBEST
       IF (ALG .EQ. 0 .OR. K .EQ. 1 .OR. K .EQ. N) GO TO 90

*-----------------------------------------------------------------------
*              Mixture decomposition.
*-----------------------------------------------------------------------
       DO H = 1, TTRIES
         KTRY = K
         IF (REEST .AND. H < TTRIES) THEN
           J = 3
         ELSE
           J = 500
         END IF
         CALL TCLUST (X, PLIM, P, N, T, CAT, CATTOT, M, C, KMIN, KTRY,
     $  KMAX, TAL, Z, POP, ROBUST, IPERM, XW, XW(N+1), SHORTX, J, IERR)
         PRINT *, 'jclust iter ', H, ' tclust returns #', IERR
         IF (IERR .NE. 0) THEN
!!!         PRINT *, 'jclust: returning failure!!'
           IERR = 13
!!         PAUSE 'tclust has failed!'
!          RETURN     ! failure
         END IF

         IF (IERR .EQ. 0 .AND. REEST .AND. H < TTRIES) THEN
!           DO L = 1, KTRY           ! Crispify
!             LIVEC(L) = 1
!           END DO
!           CALL CENTER (X, PLIM, P, N, T, CAT, CATTOT, 
!     $          C, KTRY, TAL, Z, POP, ROBUST, .FALSE., LIVEC, IPERM, XW)

           SELF = 1.
           DO i = 1, N                      ! weight by inverse distance
             L = Z(i)
             D = DSM (X(1,I), P, T, W, CAT, CATTOT, C(1,L), TAL(1,L),
     $             POP(L), LIT, SELF, ROBUST, BIG)
             SHORTX(i) = FM / (FM + D)
           END DO
           CALL RECS (X, PLIM, P, N, T, M, C, KTRY, Z, IPERM, LIVEX,
     $                   SHORTX, XW(1), XW(N+1))
         ELSE IF (IERR .NE. 0) THEN
           CONTINUE
         ELSE
           GO TO 70
         END IF  ! re-estimate?
       END DO

  70   K = KTRY
       IF (CRISP) THEN
         CONTINUE
       ELSE
         RETURN   ! success
       END IF

*-----------------------------------------------------------------------
*         Count cluster populations, return centers
*-----------------------------------------------------------------------
  90   DO L = 1, K         ! all clusters live
         LIVEC(L) = 1
       END DO
       CALL CENTER (X, PLIM, P, N, T, CAT, CATTOT,
     $    C, K, TAL, Z, POP, ROBUST, .TRUE., LIVEC, IPERM, XW)
       RETURN
      END  ! of JCLUST
*++++++++++++++++++++++++ End of file cluster.f ++++++++++++++++++++++++