      PROGRAM PCA
C
C     Computes principal components of a set of spectra.
C     Written by TRM May 1993. Modified Feb 1998 to use NR routine
C     rather than NAG.
C
C     Arguments:
C
C       input  -- input molly file
C       output -- input molly file
C       ncpt   -- number of components to list (to a file called pca.lis)
C     
C     Operation:
C     
C     The program first removes the mean of all the spectra so that only
C     deviations from the mean are dealt with. It then scales the spectra
C     by the average of their variance, square rooted. If there are N spectra
C     of M points each (the arc scale and number of pixels are assumed to be
C     identical for all spectra), they can be imagined as N points in M
C     dimensional space. If these are projected onto a line in M-dimensional
C     space and the variance computed, it can be shown that the result is a
C     quadratic form x_i A_{ij} x_j where x is the M-dimensional vector you
C     projected onto. The eigenvectors of matrix A give spectral components of
C     maximal variation (if they have large eigen values).
C     
C     If you want to mask out regions of the spectrum then the best method is
C     to set them to a constant. This effectively projects points into fewer
C     dimensions. All such operations must be done in MOLLY beforehand.
C
C     The output file will consist of M spectra of M points each starting 
C     with the largest eigenvalue and proceeding downwards. 
C     
C     
      IMPLICIT NONE
      INTEGER MAXD, NSPIX, NPIX, NSARC, NARC
      INTEGER FCODE, NCHAR, NDOUB, NINTR, NREAL, MAXS
      INTEGER MXCHAR, MXDOUB, MXINTR, MXREAL, MXARC
      PARAMETER (MXCHAR=50,MXDOUB=50,MXINTR=50,MXREAL=50)
      INTEGER MXCPT
      PARAMETER (MXCPT=5)
      REAL CONT(MXCPT)
      CHARACTER*16 NMCHAR(MXCHAR)
      CHARACTER*16 NMDOUB(MXDOUB)
      CHARACTER*16 NMINTR(MXINTR)
      CHARACTER*16 NMREAL(MXREAL)
      CHARACTER*32 HDCHAR(MXCHAR)
      DOUBLE PRECISION HDDOUB(MXDOUB)
      INTEGER HDINTR(MXINTR)
      REAL HDREAL(MXREAL)
      PARAMETER (MAXD=1800,MAXS=800)
      REAL COUNTS(MAXD), ERRORS(MAXD), FLUX(MAXD,MAXS)
      REAL A(MAXD,MAXD), V(MAXD,MAXD), D(MAXD), E(MAXD), R(MAXD)
      PARAMETER (MXARC=20)
      DOUBLE PRECISION ARC(MXARC), AF(MXARC), SUM, SUM1
      CHARACTER*64 INPUT, OUTPUT, TEST
      CHARACTER*16 UNITS
      REAL    YPLOT(MAXD)
      REAL SCALE(MAXD), RATIO(MAXD), CFRAT, GET_ERF, GET_FLX
      INTEGER I, N, J, IFAIL, K, IARGC, NARG, NCPT, NROT
      EXTERNAL IARGC
C
      NARG = IARGC()
      IF(NARG.GT.3) THEN
         WRITE(*,*) 'Require only input and output names and'
         WRITE(*,*) 'number of components whose contribution'
         WRITE(*,*) 'is to be listed <',MXCPT
         GOTO 999
      ELSE IF(NARG.EQ.3) THEN
         CALL GETARG(1,INPUT)
         CALL GETARG(2,OUTPUT)
         CALL GETARG(3,TEST)
         READ(TEST,*) NCPT
        IF(NCPT.GT.MXCPT) THEN
           WRITE(*,*) 'Too many to be listed. Maximum number = ',MXCPT
           GOTO 999
        END IF
      ELSE IF(NARG.EQ.2) THEN
         CALL GETARG(1,INPUT)
         CALL GETARG(2,OUTPUT)
         WRITE(*,'(A,$)') 'Number of components to be listed: '
         READ(*,*) NCPT
      ELSE IF(NARG.EQ.1) THEN
         CALL GETARG(1,INPUT)
         WRITE(*,*) 'Enter principal component output file'
         READ(*,'(A)') OUTPUT
         WRITE(*,'(A,$)') 'Number of components to be listed: '
         READ(*,*) NCPT
      ELSE 
         WRITE(*,*) 'Enter spectrum input file'
         READ(*,'(A)') INPUT
         WRITE(*,*) 'Enter principal component output file'
         READ(*,'(A)') OUTPUT
         WRITE(*,'(A,$)') 'Number of components to be listed: '
         READ(*,*) NCPT
         IF(NCPT.GT.MXCPT) THEN
            WRITE(*,*) 'Sorry, maximum allowed = ',MXCPT
            GOTO 999
         END IF
      END IF
      OPEN(UNIT=47,FILE=INPUT,STATUS='OLD',FORM='UNFORMATTED')
      OPEN(UNIT=48,FILE=OUTPUT,STATUS='UNKNOWN',FORM='UNFORMATTED')
      IF(NCPT.GT.0) 
     &     OPEN(UNIT=49,FILE='pca.lis',STATUS='UNKNOWN')
      NSPIX = 0
      NSARC = 0
      N = 0
 100  CALL RTHEAD(47, FCODE, UNITS, NPIX, NARC, NCHAR, NDOUB, 
     &     NINTR, NREAL, MXCHAR, MXDOUB, MXINTR, MXREAL, NMCHAR,
     &     NMDOUB, NMINTR, NMREAL, HDCHAR, HDDOUB, HDINTR, 
     &     HDREAL, IFAIL)
      IF(IFAIL.GT.0 .OR. NPIX.EQ.0) GOTO 999
      IF(IFAIL.EQ.0) THEN
         IF(NSPIX.EQ.0) THEN
            NSPIX = NPIX
            NSARC = NARC
         ELSE IF(NPIX.NE.NSPIX .OR. NARC.NE.NSARC) THEN
            WRITE(*,*) 'Pixels: ',NPIX,' ',NSPIX
            WRITE(*,*) 'Arc coeff: ',NARC,' ',NSARC
            GOTO 999
         END IF
C
C     Read data
C
         N = N + 1
         CALL RTDATA(47, COUNTS, ERRORS, FLUX(1,N), ARC, 
     &        FCODE, NPIX, NARC, IFAIL)
         IF(IFAIL.NE.0) GOTO 999
         IF(N.EQ.1) THEN
            DO I = 1, NSPIX
               E(I) = 0.D0
               RATIO(I) = CFRAT(COUNTS(I),FLUX(I,N))
            END DO
            DO I = 1, ABS(NSARC)
               AF(I) = ARC(I)
            END DO
         END IF
         DO I = 1, NSPIX
            E(I) = E(I) + GET_ERF(COUNTS(I),ERRORS(I),FLUX(I,N))**2
         END DO
         DO I = 1, NSPIX
            FLUX(I,N) = GET_FLX(COUNTS(I), FLUX(I,N))
         END DO
         GOTO 100
      ELSE IF(IFAIL.LT.0) THEN
         WRITE(*,*) 'Read ',N,' spectra, ',NSPIX,' points each.'
         CLOSE(UNIT=47)
      END IF
C     
C     Remove mean value from points, develope a scaling
C     parameter for each point.
C     
      WRITE(*,*) 'Removing mean values and scaling'
      DO I = 1, NSPIX
         R(I) = 0.D0
      END DO
      DO J = 1, N
         DO I = 1, NSPIX
            R(I) = R(I) + FLUX(I,J)
         END DO
      END DO
      SUM1 = 0.D0
      DO I = 1, NSPIX
         R(I) = R(I)/REAL(N)
         SCALE(I) = SQRT(E(I)/REAL(N))
         SUM1 = SUM1 + (R(I)/SCALE(I))**2
      END DO
C     
C     SUM1 is a scaling factor to divide into errors at the end
C     as eigenvectors are normalised to unit length
C
      SUM1 = SQRT(SUM1)
      DO J = 1, N
         DO I = 1, NSPIX
            FLUX(I,J) = (FLUX(I,J) - R(I))/SCALE(I)
         END DO
      END DO
C     
C     Initialise matrix
C
      WRITE(*,*) 'Computing matrix'
      DO J = 1, NSPIX
         DO I = 1, NSPIX
            A(I,J) = 0.
         END DO
      END DO
C     
C     Add in data to matrix (do half, then reflect)
C
      DO J = 1, NSPIX
         DO I = J, NSPIX
            DO K = 1, N
               A(I,J) = A(I,J) + FLUX(I,K)*FLUX(J,K)
            END DO
         END DO
      END DO  
      DO J = 1, NSPIX-1
         DO I = J+1, NSPIX
            A(J,I) = A(I,J)
         END DO
      END DO
C
C     Compute eigenvectors and eigenvalues
C     V will contain vectors, D values.
C
      IFAIL = 0
      WRITE(*,*) 'Computing eigenvectors ... can be slow'
      CALL EIGVEC(A,NSPIX,MAXD,D,V,NROT,E,R,IFAIL)
      IF(IFAIL.NE.0) GOTO 999
C
C     Sort into ascending order.
C     
      WRITE(*,*) 'Sorting eigenvectors by eigenvalue'
      CALL EIGSRT(D,V,NSPIX,MAXD)
C
C     Change sign of eigenvectors to ensure that all have a
C     positive mean
C
      DO I = 1, NSPIX
         SUM = 0.D0
         DO J = 1, NSPIX
            SUM = SUM + SCALE(J)*V(J,I)
         END DO
         IF(SUM.LT.0.D0) THEN
            DO J = 1, NSPIX
               V(J,I) = - V(J,I)
            END DO
         END IF
      END DO
C
C     Compute contributions from first NCPT components
C     and dump to pca.lis (effectively a light curve)
C
      IF(NCPT.GT.0) THEN
         DO J = 1, N
            DO K = 1, NCPT
               SUM = 0.D0
               DO I = 1, NSPIX
                  SUM = SUM + FLUX(I,J)*V(I,K)
               END DO
               CONT(NSPIX-K+1) = REAL(SUM)
            END DO
            WRITE(49,*,ERR=999) (CONT(K),K=1,NCPT)
         END DO
         CLOSE(UNIT=49)
         WRITE(*,*) 'Written pca.lis'
      END IF
C
C     Dump to a Molly file
C
      NCHAR = 1
      NDOUB = 0
      NINTR = 1
      NREAL = 1
      NMCHAR(1) = 'Object'
      NMREAL(1) = 'Eigenvalue'
      NMINTR(1) = 'Component number'
      HDCHAR(1) = 'Principal component'
      DO I = 1, NSPIX
         HDREAL(1) = D(I)
         HDINTR(1) = I
         DO J = 1, NSPIX
            YPLOT(J)  = SCALE(J)*V(J,I)
            COUNTS(J) = RATIO(J)*YPLOT(J)
            ERRORS(J) = REAL(RATIO(J)*SCALE(J)/SQRT(REAL(N))/SUM1)
            IF(COUNTS(J).EQ.0.) THEN
               YPLOT(J) = RATIO(J)
            END IF
         END DO
         CALL WTHEAD(48, FCODE, UNITS, NSPIX, NSARC, NCHAR, 
     &        NDOUB, NINTR, NREAL, MXCHAR, MXDOUB, MXINTR, MXREAL,
     &        NMCHAR, NMDOUB, NMINTR, NMREAL, HDCHAR, HDDOUB, HDINTR, 
     &        HDREAL, IFAIL)
         IF(IFAIL.NE.0) THEN
            WRITE(*,*) 'Failed during write to disk'
            GOTO 999
         END IF
         CALL WTDATA(48, COUNTS, ERRORS, YPLOT, AF, FCODE, 
     &        NSPIX, NSARC, IFAIL)
         IF(IFAIL.NE.0) THEN
            WRITE(*,*) 'Failed during write to disk'
            GOTO 999
         END IF
      END DO
      CLOSE(UNIT=48)
      STOP 'Successful completion'
 999  CLOSE(UNIT=47)
      CLOSE(UNIT=48)
      CLOSE(UNIT=49)
      STOP 'Program aborted'
      END
      
