Thurstonian scaling

#Thurstone case V  (assumption of equal and uncorrelated error variances)
#Version of March 22, 2005
#Do a Thurstone scaling (Case 5) of either a square choice matrix or a rectangular rank order matrix
#Need to add output options to allow users to see progress
#data are either a set of rank orders or
#a set of of choices (columns chosed over row)
#output is a list of 
# scale scores
#goodness of fit estimate    (1 - sumsq(error)/sumsq(original)    not very good
# the model choice matrix
#the error (residual) matrix
#the original choice matrix 

thurstone <- function(x, ranks = FALSE,opt=TRUE) {     #the main routine
    if (ranks) {choice <- choice.mat(x)  
       } else {if (is.matrix(x)) choice <- x
              choice <- as.matrix(x)}
            
        scale.values <- scale.vect(choice)  #convert rank order information to choice matrix 
     
       fit <- thurstone.fit(scale.values,choice,opt)        #see how well this model fits
       thurstone <- list("scale values ="=round(scale.values,2), "goodness of fit"= fit,"choice matrix "= choice)
    }
    
#the functions used by thurstone:

 #this next function does almost all of the work, by transforming choices to normal deviates
 scale.mat <- function(x) {scale.mat <- qnorm(x)}   #convert choice matrix to normal deviates
 
 #find the average normal deviate score for each item, and subtract from the minimum
 scale.vect <- function(x)           #do the actual thurstonian scaling
     {nvar <- dim(x)[2]
     score <- colSums(scale.mat(x))   #find the summed scores
     minscore <-min(score)            #rescale to 0 as minumum
     relative <- score - minscore
     relative <- relative/nvar
     return(relative) }         #the thurstone scale values
     
 #next two functions are used for finding goodness of fit   -- not a very powerful measure  
 thurstone.model <- function (x,opt) {      #returns the lower diagonal distances
    if (opt) {return(1-pnorm(dist(x,diag=TRUE)))} else return(pnorm(dist(x,diag=TRUE))) }
    
 thurstone.fit <- function (s,d,opt) {   #s is the thurstone scale values, d is the data matrix
     model <- thurstone.model(s,opt)
     dm <- as.dist(d)              #makes the distance matrix a lower diagonal
     error <- model - dm         #error = model - data  or more conventionally data = model+error
     fit <- 1- sum(error*error)/sum(dm*dm)   #a typical fit measure -- squared error/squared original
     thurstone.fit <- list("goodness of fit = "=round(fit,2),"model matrix"=round(model,2),"error matrix "=round(error,2))
     }
     
#if we have rank order data, then convert them to a choice matrix
#convert a rank order matrix into a choice matrix
orders.to.choice <- function(x,y) {      #does one subject (row) at a time
   nvar<-dim(x)[2]
   for (i in 1:nvar) {
      for (j in 1:nvar) {
        if (x[j]< x[i] ) {y[i,j] <- y[i,j]+1
         } else if  (x[j] == x[i]) {
                       y[j,i] <- y[j,i]+.5    #take ties into account
                       y[i,j] <- y[i,j] + .5
              } else  y[j,i] <- y[j,i]+1        
          }} 
          return(y)}
          
#  makes repeated calls to orders.to.choice  -- can we vectorize this?        
choice.mat <-function(x) {
   nsubs<- dim(x)[1]
   nvar <- dim(x)[2]
   y<- array(rep(0,nvar*nvar),dim=c(nvar,nvar))
   for (k in 1:nsubs) {
      y<-orders.to.choice(x[k,],y) }     #is there a way to vectorize this?
     d <- diag(y)    
     y<- y/(2*d)
     lower <- 1/(4*nsubs)     #in case of 0 or 1, we put limits 
     upper <- 1- lower
     y[yupper] <- upper
    return(y) }
    
 

 ###end of thurstone functions
 #now some test data
 
 #Nunnally's data (Nunnally and Bernstein took these from Guilford)

.500 .818 .770 .811 .878 .892 .899 .892 .926
.182 .500 .601 .723 .743 .736 .811 .845 .858
.230 .399 .500 .561 .736 .676 .845 .797 .818
.189 .277 .439 .500 .561 .588 .676 .601 .730 
.122 .257 .264 .439 .500 .493 .574 .709 .764
.108 .264 .324 .412 .507 .500 .628 .682 .628
.101 .189 .155 .324 .426 .372 .500 .527 .642
.108 .155 .203 .399 .291 .318 .473 .500 .628
.074 .142 .182 .270 .236 .372 .358 .372 .500

  
#ranking data
#data set 1   (from http://marketing.byu.edu/htmlpages/books/pcmds/THURSTONE.html#N_1_) 
1   2   3   4   5
5	4	3	2	1     
4	2	1	3	4     
3	2	1	4	5     
2	2	2	1	1     
3	3	3	3	2     
5	1	1	2	4     
5	2	1	2	4     
2	1	3	2	1     
1	2	2	2	4     


#data set 2  (from http://marketing.byu.edu/htmlpages/books/pcmds/THURSTONE.html#N_1_)

 0.50 0.44 0.42 0.43 0.52 0.48 0.37 
 0.56 0.50 0.40 0.56 0.63 0.35 0.33 
 0.58 0.60 0.50 0.44 0.45 0.44 0.32 
 0.57 0.44 0.56 0.50 0.35 0.48 0.59 
 0.48 0.38 0.55 0.65 0.50 0.50 0.44 
 0.52 0.65 0.56 0.52 0.50 0.50 0.80 
 0.63 0.67 0.68 0.41 0.56 0.20 0.50 


part of a short guide to R
Version of April 20, 2005
William Revelle
Department of Psychology
Northwestern University