################################################################################ ## ## ##The program calculates the time lag dependent trend and correlation test ## ##based the replacement of the Neumann trend test and the Durbin-Watson test ## ##by one-way ANOVA with resampling ## ##The time lag (h) dependent test can be interpreted for each h in terms of ## ##the well-known Neumann trend test and Durbin-Watson test (the later applied ## ##on the raw data instead of the residuals). This test is a sensitive way to ## ##to detect hidden correlations and trends in time series. ## ##The Neumann case: ## ## H0(Neumann): there is no gradual trend in the data. ## ## H1(Neumann): there is a gradual trend in the data. ## ## H0 is rejected, if 1-p() is less than alfa (e.g. alfa=0.05) ## ##The Durbin–Watson case (on raw data): ## ## H0(DW): there is no autocorrelation in the data. ## ## H1,low(DW): there is a positive autocorrelation in the data. ## ## H1,up(DW): there is a negative autocorrelation in the data. ## ## H0(DW) is rejected in the direction to H1,low, if 1-p() < alfa ## ## H0(DW) is rejected in the direction to H1,up, if if 1-p() > alfa ## ## ## ################################################################################ ## ## ##Cite the original research article as: ## ##The replacement of the Neumann trend test and the Durbin-Watson test on ## ##residuals by one-way ANOVA with resampling and an extension of the tests to ## ## different time lags ## ##Journal of Chemometrics, vol 24., pages 140-148, (2010). ## ##Author: Gergely Toth, toth@chem.elte.hu, http:##www.chem.elte.hu#toth ## ##Institute of Chemistry,Eötvös University H-1518 Budapest, P.O. Box 32, ## ##Hungary ## ## ## ##Programming code is written by Adam Csibi and Gergely Toth ## ##Version 14th May, 2010 ## ##Available at http://www.chem.elte.hu/toth ## ## ## ################################################################################ ## ## ##HOW TO RUN: ## ## Open the script in R ## ## Change the name of the inputfile to the name of your data file ## ## Set missingvalue according to your data ## ## select run all from the edit menu ## ## START EXECUTABLE WITH standard input#output redirection ## ## WARNING: The code is rather slow. For large datafiles use the C program ## ## code that is about 100 times faster! ## ## ## ################################################################################ ## ## ##FORMAT OF DATAFILE ## ## first row = number of data (na) ## ## 2 - (na+1) rows = data (one coloumn) ## ## if a data is smaller than 'missingvalue' (see in script), it is not used in## ## the calculations ## ## ## ##EXAMPLE: ## ## 1234 ## ## 1234.433 ## ## 3443.34 ## ## 123.312e-15 ## ## 12356.154 ## ## ... ## ## ## ## If 'missingvalue'is set to 1.0e-12, the third data is never selected in the## ## the resampling procedure of the ANOVA calculation ## ## ## ##OUTPUT: h and corresponding 1-p value pairs in each row ## ## ## ################################################################################ inputfile="l_bpph_no2.dat" #input data file name outputfile="testresults.dat" #output data file name na<-scan(file = inputfile, what = integer(0), n = 1) #scan first row in data file = number of data ad<-scan(file = inputfile, what = numeric(0), skip=1,blank.lines.skip=TRUE, n = na) #scan data vector #count missing values (a value smaller than missingvalue is never chosen in the resampling) missingvalue <- 1.0e-12 #Specify for your data set! nazero <- 0 i <- 1 for(i in i:na) {if(ad[i] < missingvalue) {nazero = nazero+1}} #set constants nh <- as.integer((na-2*nazero)/4) #max time lag ns <- as.integer((na-nazero)/4) #number of sets in a size ng <- as.integer((na-nazero)/4) #number of groups in an ANOVA ni <- 2 #number of data in a group nfintra <- ng*(ni-1) #degrees of freedom in F-distribution nfinter <- ng-1 na;nazero;nh;ns;ng #print constants ich <- as.integer(na) #flag for a data (0=free, 1=already chosen) faveg <- as.double(nh) #average f(h) #main calculation h <- 2 while ( h <= nh ) { #loop for diff. time lags faveg[h] <- 0.0 #zero f(h) average is <- 1 while ( is <= ns ){ #loop for a set at a time lag sumt <- 0.0 #zero cummulative variables and flags for ANOVA with resampling sumcf <- 0.0 suma <- 0.0 sumn <- 0 ia <- 1 while ( ia <= na ){ ich[ia]=0 ia <- ia+1 } #end of ia ig <- 1 while ( ig <= ng ){ #loop for groups within a set repeat { ja <- as.integer((runif(1,0,1)*na)+1) #random chosen of a data jah <- ja+h-1 if (jah <= na) { if ((ich[ja]==0 && ich[jah]==0) && (ad[ja]>missingvalue && ad[jah]>missingvalue)) break } } ich[ja] <- 1 ich[jah] <- 1 sumi <- ad[ja]+ad[jah] sumt <- sumt+(ad[ja]^2+ad[jah]^2) sumcf <- sumcf+sumi sumn <- sumn+ni suma <- suma+sumi^2/as.double(ni) ig <- ig+1 }# end of ig sumcf <- sumcf^2/as.double(sumn) f <- ((suma-sumcf)/as.double(ng-1))/((sumt-suma)/as.double(sumn-ng)) # CALCULATE F_DISTRIBUTION faveg[h] <- faveg[h]+f is <- is+1 } #end of is faveg[h] <- faveg[h]/as.double(ns) #calculate average for the sets faveg[h] <- 1.0-pf(faveg[h],nfinter,nfintra) #p-value of average f h h <- h+1 } #end of h # output of calculated data h <- 2 while(h<=nh){ cat(h,'\t') cat(faveg[h],'\n') h <- h+1 } faveg <- faveg[2:nh] #plot calculated data xrange <- c(0, nh) yrange <- c(0, 1) grafikai.parameterek <- par(mfcol = c(2, 1), pty = "m", bty = "o") plot(faveg,pch=10,axes=TRUE,xlim = xrange,ylim = yrange,main="VARTREND", xlab = "h", ylab = "p-value") plot(faveg,pch=19,xlim = xrange,ylim = yrange,type="b",main="VARTREND", xlab = "h", ylab = "p-value") par(grafikai.parameterek) write.table(faveg, file = outputfile,row.names =TRUE,col.names=FALSE) #end of main