# An R program by Natasa Kejzar for work in collaboration with D. R. White: This is for computing q-exponential parameters of city size distributions rather than inappropriate power laws. City size distributions are not scale-free.

# nonlinear least squares estimation of q-exponential curve to the data (the estimation of parameters y0, kappa, q)
# x_axis ... city-size
# y_axis ... cumulative population size
# kappa, q, y0 ... initial values (have to be very careful how to pick)

log_est <- function(x_axis,y_axis,kappa=2,q=1.2,y0=1){
   # do the "cleaning" of the data (get rid of all the zeros)
   x <- x_axis[1:length(y_axis)]
   y <- y_axis[y_axis > 0]
   x <- x[y_axis > 0]

   # the NLS estimation of the q-exp. function
   temp <- nls(y ~ (y0*(1-(1-q)*x/kappa)^(1/(1-q))),start=list(y0=y0,kappa=kappa,q=q))

   y0 <- summary(temp)$param[1]
   kappa <- summary(temp)$param[2]
   q <- summary(temp)$param[3]
   return(list(nls_model=temp,y0=y0, kappa=kappa, q=q))
}

# THE USE
#
city_size <- c(63.1,79.4,100,125.9,158.5,199.5,251.2,316.2,398.1,501.2,631,794.3,1000,1258.9,1584.9,1995.3,2511.9,3162.3,3981.1,5011.9,6309.6,7943.3,10000,12589.3,15848.9,19952.6)
pop_size <- c(0,0,0,0,0,0,28131,24763,21328,19107,17917,12229,10456,8391,8391,6491,4241,4241,4241,0,0,0,0,0,0,0)
params <- log_est(city_size,pop_size,kappa=100,q=1.8,y0=1*10^5)
params
#$nls_model
#Nonlinear regression model
#  model:  y ~ (y0 * (1 - (1 - q) * x/kappa)^(1/(1 - q)))
#   data:  parent.frame()
#          y0        kappa            q
#57173.424740   243.849159     2.000495
# residual sum-of-squares:  9278225
#
#$y0
#      y0
#57173.42
#
#$kappa
#   kappa
#243.8492
#
#$q
#       q
#2.000495


# --------------------------------------------- #

# weighted linear least squares estimation of q-exponential curve to the data (the estimation of a parameter kappa with respect to different parameters q and known y0)
# x_axis ... city-size
# y_axis ... cumulative population size
# w ... weights for respective x_axis, y_axis
# y0 ... the value of a parameter y0

# the following procedure estimates kappa parameters for given y0 and a set of qs (from 1.01 to 4.00, with the step of 0.01)

set_lin_est_w <- function(x_axis,y_axis,y0,w){
   i <- 1
   # do the "cleaning" of the data (get rid of all the zeros)
   x <- x_axis[1:length(y_axis)]
   y <- y_axis[y_axis > 0]
   x <- x[y_axis > 0]
   w <- w[y_axis > 0]

   # initialization of a list, where to save z values
   zlist <- list()

   q <- 1.01
   # compute z values
   z <- ((y/y0)^(1-q) - 1) / (1-q)
   zlist[[1]] <- z

   temp <- summary.lm(lm(z ~ x - 1,weights=w))
   r_sq <- temp$adj; est <- temp$coef[[1]]
   se <- temp$coef[[2]]; p_val <- temp$coef[[4]]

   # compute the kappa parameters for the respective qs
   q_set <- (102:400)/100
   for (q in q_set) {
      i <- i + 1
      z <- ((y/y0)^(1-q) - 1) / (1-q)
      zlist[[i]] <- z
      temp <- summary.lm(lm(z ~ x - 1,weights=w))
      r_sq <- c(r_sq, temp$adj); est <- c(est, temp$coef[[1]])
      se <- c(se, temp$coef[[2]]); p_val <- c(p_val, temp$coef[[4]])
   }

   return(list(x=x, z=zlist, r_sq=r_sq, est=est, se=se, p_val=p_val))
}

# to get the best q and kappa parameters out of the list, which we get from the function set_lin_est_w, I used the following function:


# x_axis ... city-size
# y_axis ... cumulative population size
# w ... weights for respective x_axis, y_axis
#y0 was estimated with NLS estimation or taken from interpolation between the nearest two periods od NLS estimation, where it converged

lin_est_w <- function(x_axis,y_axis,y0,w){
   q_set <- (101:400)/100
   # kappa parameters are estimated from the function above
   temp <- set_lin_est_w(x_axis,y_axis,y0,w)

   # get the index from the list, where the R^2 was the highest
   ind <- which(temp$r_sq==max(temp$r_sq))

   kappa <- -1/temp$est[ind]
   q <- q_set[ind]

   return(list(y0=y0, kappa=kappa, q=q))
}

# THE USE
city_size <- c(63.1,79.4,100,125.9,158.5,199.5,251.2,316.2,398.1,501.2,631,794.3,1000,1258.9,1584.9,1995.3,2511.9,3162.3,3981.1,5011.9,6309.6,7943.3,10000,12589.3,15848.9,19952.6)
pop_size <- c(0,0,0,0,0,0,28131,24763,21328,19107,17917,12229,10456,8391,8391,6491,4241,4241,4241,0,0,0,0,0,0,0)
weights <- c(75,75,75,75,75,71,44,32,22,17,15, 7, 5, 3, 3, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
y0 <- 57173.42

params <- lin_est_w(city_size,pop_size,y0,weights)
params
#$y0
#[1] 57173.42
#
#$kappa
#[1] 241.0479
#
#$q
#[1] 2.04