# ABOUT -------------------------------------------------------------------
# This based on supplementary material for the following paper but was modified for this workshop.
# Article title: Scientific collaboration of researchers and organizations: A two-level blockmodeling approach
# Journal name: Scientometrics
# Authors: Marjan Cugmas (marjan.cugmas@fdv.uni-lj.si), Franc Mali (franc.mali@fdv.uni-lj.si) and Aleš Žiberna (ales.ziberna@fdv.uni-lj.si)
# Affiliation: Faculty of Social Sciences, University of Ljubljana, Kardeljeva pločšad 5, SI-1000 Ljubljana

# LOADING PACKAGES -----------------------------------------
library(blockmodeling)

# IMPORTING NETWORK DATA --------------------------------------------------

# The object network data is a list with the following elements:
# --- network The network is in a matrix form. The individuals and the organizations 
# are by lines and columns. The 1s indicate a link between two 
# individuals, two organizations or between individual and organization.
# --- clu A vector which defines if a given node is an individual 
# or an organization (1=individual level; 2=organizational level).
# --- fieldResearchers A vector: scientific field of an individual.
# 1 - Educational studies
# 2 - Economics
# 3 - Sociology
# 4 - Administrative and organizational sciences
# 5 - Criminology and social work
# 5 - Information science and librarianship
# 5 - Architecture and design
# 5 - Ethnic studies
# 6 - Political science
# 9 - Psychology
# 10 - Sport
# --- fieldOrganisatons A vector: scientific field of an organisation.
# 1 - Natural and biotechnical sciences
# 2 - Engineering sciences and technologies
# 3 - Medical sciences
# 4 - General social sciences
# 5 - Other social sciences
# 6 - Economics
# 7 - Humanities

# load the network data and the partition defining the level 
allF <- readRDS("networkData.RDS")
# extract the network for auterachers


netAut <- allF$network[allF$clu==1,allF$clu==1]

# Compute 3 core to reduce network size (otherwise computations would take too long.)
netAutC3<-netAut
while(min(apply(netAutC3,1,sum))<3){
  sel<-apply(netAutC3,1,sum)>3
  netAutC3<-netAutC3[sel,sel]
}
dim(netAutC3)
plotMat(netAutC3)
autC3<-colnames(netAutC3)

org<-colnames(allF$network)[allF$clu==2]
cluC3<-allF$clu[colnames(allF$network)%in%c(autC3,org)]
netTLC3<-allF$network[c(autC3,org),c(autC3,org)]
orgC3<-names(which(apply(netTLC3[cluC3==2,cluC3==1],1,sum)>0))
cluC3<-allF$clu[colnames(allF$network)%in%c(autC3,orgC3)]
netTLC3<-allF$network[c(autC3,orgC3),c(autC3,orgC3)]
dim(netTLC3)
plotMat(netTLC3,cluC3)

# count the number of individuals and the number of organizations
NC3 <- unclass(table(cluC3))

# CALCULATE THE WEIGHTS ---------------------------------------------------

# calculate the number of errors that would have been obtained if 
# each part would consists of just one block
EM <- blockmodeling::critFunC(netTLC3,
                              clu = rep(1:2,times = NC3),
                              approaches = "hom", 
                              blocks = "com")$EM[1,,]
# consider the inversively proportional values
wFix <- EM[1,1]/EM
# set the weight 0 to the empty part of the network
wFix[1,2] <- 0
# devide the weights for the two-mode part by two
wFix_1_2 <- wFix
wFix_1_2[2,1] <- wFix[2,1]/2
# apply these weights to all the network values/cells


# APPLY BLOCKMODELING -----------------------------------------------------

# set the number of clusters for individual level 
kAut <- 8
# set the number of clusters for organizational level 
kOrg <- 4
# the values are most likely not optimal, this is here for demonstration purpuses only.

resName<-paste0("resTLC3k",kAut,kOrg,collapse = "")

if(!file.exists(paste0(resName,".RDS",collapse = ""))){
  #Warning - with 1000 repetitions this takes about 45 minutes on 4 physical core laptop
  tmpTime<-system.time(resTmp <- optRandomParC(M = netTLC3, 
                             n = NC3, 
                             k = c(kAut,kOrg), 
                             rep = 1000, 
                             nCores = 0, 
                             approaches = "hom",
                             blocks = "com",
                             posWeights  = expandMat(wFix_1_2, nn = c(c(kAut,kOrg)))))
  attr(resTmp,"time")<-tmpTime
  saveRDS(resTmp,file=paste0(resName,".RDS",collapse = ""))
  assign(resName,resTmp)
} else assign(resName,readRDS(resTmp,file=paste0(resName,".RDS",collapse = "")))
plot(get(resName), main=resName,cex.axes=0.5)




# Compute 5 core to reduce network size (otherwise computations would take too long.)
# A very boring solution
netAutC5<-netAut
while(min(apply(netAutC5,1,sum))<5){
  sel<-apply(netAutC5,1,sum)>5
  netAutC5<-netAutC5[sel,sel]
}
dim(netAutC5)
plotMat(netAutC5)
autC5<-colnames(netAutC5)

org<-colnames(allF$network)[allF$clu==2]
cluC5<-allF$clu[colnames(allF$network)%in%c(autC5,org)]
netTLC5<-allF$network[c(autC5,org),c(autC5,org)]
orgC5<-names(which(apply(netTLC5[cluC5==2,cluC5==1],1,sum)>0))
cluC5<-allF$clu[colnames(allF$network)%in%c(autC5,orgC5)]
netTLC5<-allF$network[c(autC5,orgC5),c(autC5,orgC5)]
dim(netTLC5)
plotMat(netTLC5,cluC5)

# count the number of individuals and the number of organizations
NC5 <- unclass(table(cluC5))

# CALCULATE THE WEIGHTS ---------------------------------------------------

# calculate the number of errors that would have been obtained if 
# each part would consists of just one block
EM <- blockmodeling::critFunC(netTLC5,
                              clu = rep(1:2,times = NC5),
                              approaches = "hom", 
                              blocks = "com")$EM[1,,]
# consider the inversively proportional values
wFix <- EM[1,1]/EM
# set the weight 0 to the empty part of the network
wFix[1,2] <- 0
# devide the weights for the two-mode part by two
wFix_1_2 <- wFix
wFix_1_2[2,1] <- wFix[2,1]/2
# apply these weights to all the network values/cells


# APPLY BLOCKMODELING -----------------------------------------------------

# set the number of clusters for individual level 
kAut <- 5
# set the number of clusters for organizational level 
kOrg <- 3
# the values are most likely not optimal, this is here for demonstration purpuses only.
resName<-paste0("resTLC5k",kAut,kOrg,collapse = "")

if(!file.exists(paste0(resName,".RDS",collapse = ""))){
  #Warning - with 1000 repetitions this takes about 45 minutes on 4 physical core laptop
  tmpTime<-system.time(resTmp <- optRandomParC(M = netTLC5, 
                                               n = NC5, 
                                               k = c(kAut,kOrg), 
                                               rep = 1000, 
                                               nCores = 0, 
                                               approaches = "hom",
                                               blocks = "com",
                                               posWeights  = expandMat(wFix_1_2, nn = c(c(kAut,kOrg)))))
  attr(resTmp,"time")<-tmpTime
  saveRDS(resTmp,file=paste0(resName,".RDS",collapse = ""))
  assign(resName,resTmp)
} else assign(resName,readRDS(resTmp,file=paste0(resName,".RDS",collapse = "")))
plot(get(resName), main=resName,cex.axes=0.5)
