#An R program for estimating q for degree distributions, Doug White,
#Modified from an original by Natasa Kejzar

#This program may be cut and pasted into the R workspace window

# nonlinear least squares estimation of q-exponential curve to the data (the estimation of parameters y0, kappa, q)
# x_axis ... degree (number of nodes in a network)
# y_axis ... degree distribution frequency (nodes with a given degree)
# kappa, q, y0 ... initial values (have to be very careful how to pick)
# a weighting option is provided
# zeros are treated as missing values

log_est <- function(x_axis,y_axis,kappa=2,q=1.2,y0=1){
   # do the "cleaning" of the data (get rid of all the zeros)
   x <- x_axis[1:length(y_axis)]
   y <- y_axis[y_axis > 0]
   x <- x[y_axis > 0]

   # the NLS estimation of the q-exp. function
   temp <- nls(y ~ (y0*(1-(1-q)*x/kappa)^(1/(1-q))),start=list(y0=y0,kappa=kappa,q=q))

   y0 <- summary(temp)$param[1]
   kappa <- summary(temp)$param[2]
   q <- summary(temp)$param[3]
   return(list(nls_model=temp,y0=y0, kappa=kappa, q=q))
}

# THE USE
#
degree_nums <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21, 22,23,24,25,26,27,28,29,30,3 1,32,33,34,35,36,37,38,39,40)
degree_freq <- c(0,0,0,0,0,0,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,31,30,29,28,27,26,25,24,24,0)
params <- log_est(degree_nums,degree_freq,kappa=100,q=1.8,y0=1*10^5)
params

#$nls_model
#Nonlinear regression model
#  model:  y ~ (y0 * (1 - (1 - q) * x/kappa)^(1/(1 - q)))
#   data:  parent.frame()
#          y0        kappa            q
#57173.424740   243.849159     2.000495
# residual sum-of-squares:  9278225
#
#$y0
#      y0
#57173.42
#
#$kappa
#   kappa
#243.8492
#
#$q
#       q
#2.000495


# --------------------------------------------- #

# weighted linear least squares estimation of q-exponential curve to the data (the estimation of a parameter kappa with respect to different parameters q and known y0)
# x_axis ... city-size
# y_axis ... cumulative population size
# w ... weights for respective x_axis, y_axis
# y0 ... the value of a parameter y0

# the following procedure estimates kappa parameters for given y0 and a #set of qs (from 1.01 to 4.00, with the step of 0.01)
#DWHITE CHANGE RANGE FOR INCREASING FUNCTION (q above 1 is for decreasing functions, below 1 for increasing functions)

set of qs (from 0.01 to 0.99, with the step of 0.01)

set_lin_est_w <- function(x_axis,y_axis,y0,w){
   i <- 1
   # do the "cleaning" of the data (get rid of all the zeros)
   x <- x_axis[1:length(y_axis)]
   y <- y_axis[y_axis > 0]
   x <- x[y_axis > 0]
   w <- w[y_axis > 0]

   # initialization of a list, where to save z values
   zlist <- list()

#  q <- 1.01 #decreasing function
#DWHITE CHANGE RANGE FOR INCREASING FUNCTION
   q <- .99
   # compute z values
   z <- ((y/y0)^(1-q) - 1) / (1-q)
   zlist[[1]] <- z

   temp <- summary.lm(lm(z ~ x - 1,weights=w))
   r_sq <- temp$adj; est <- temp$coef[[1]]
   se <- temp$coef[[2]]; p_val <- temp$coef[[4]]

   # compute the kappa parameters for the respective qs
#  q_set <- (102:400)/100
#DWHITE CHANGE RANGE FOR INCREASING FUNCTION
   q_set <- (002:099)/100
   for (q in q_set) {
      i <- i + 1
      z <- ((y/y0)^(1-q) - 1) / (1-q)
      zlist[[i]] <- z
      temp <- summary.lm(lm(z ~ x - 1,weights=w))
      r_sq <- c(r_sq, temp$adj); est <- c(est, temp$coef[[1]])
      se <- c(se, temp$coef[[2]]); p_val <- c(p_val, temp$coef[[4]])
   }

   return(list(x=x, z=zlist, r_sq=r_sq, est=est, se=se, p_val=p_val))
}

# to get the best q and kappa parameters out of the list, which we get from the function set_lin_est_w, I used the following function:

# x_axis ... city-size
# y_axis ... cumulative population size
# w ... weights for respective x_axis, y_axis
#y0 was estimated with NLS estimation or taken from interpolation between the nearest two periods od NLS estimation, where it converged

lin_est_w <- function(x_axis,y_axis,y0,w){
#  q_set <- (101:400)/100
#DWHITE CHANGE RANGE FOR INCREASING FUNCTION
   q_set <- (002:099)/100
   # kappa parameters are estimated from the function above
   temp <- set_lin_est_w(x_axis,y_axis,y0,w)

   # get the index from the list, where the R^2 was the highest
   ind <- which(temp$r_sq==max(temp$r_sq))

   kappa <- -1/temp$est[ind]
   q <- q_set[ind]

   return(list(y0=y0, kappa=kappa, q=q))
}
# THE USE
degree_nums <- c(1,2,3,4,5,6,7,8,9)
degree_freq <- c(43,42,34,17,7,3,5,4,2)
weights <- c(1,1,1,1,1,1,1,1,1)
y0 <- 65
params <- lin_est_w(degree_nums,degree_freq,y0,weights)
params


#
#65
#3.61
#0.73


#$y0
#[1] 55
#
#$kappa
#[1] 3.61
#
#$q
#[1] 0.73