Prerequisite

You need a running installation of docker, docker-compose and R. If you have either windows 9,10 pro, linux or a mac environment you can just follow the steps in the install_ilcm.R Script. When you are working in a windows home edition environment you are forced to use the docker toolbox instead. Here you need to follow the insturction of the install_ilcm_toolbox.R Script and insert some commands (docker_commands.R) in the docker-console by hand.

Install Script

Then you need to download the repository from https://git.informatik.uni-leipzig.de/mam10cip/iLCM-ShinyApp or the install-file from the website and open the start_ilcm.R script in your R instance. In this script, we will get the docker images for mariadb, solr, zookeeper and the iLCM shiny-server. In the end we will import some example data to the database, forward it to solr and start the working with the iLCM. Thse steps will now be explained in more detail

Install needed packages

install.packages('RMySQL')
library(RMySQL)
install.packages('RCurl')
library(RCurl)
install.packages('dplyr')
library(dplyr)

Set home directory

We need to check wheather the system variable points to the correct home directory.

Sys.setenv(HOME = "/home/user")
#or for windows
#Sys.setenv(HOME = "C:/Users/User")

Specify parameters and names

ZK_CLUSTER_SIZE=1
SOLRCLOUD_CLUSTER_SIZE=1
DOCKER_BIN="docker"
DOCKER_COMPOSE_BIN="docker-compose"

SZD_DATA_DIR <- paste0(Sys.getenv("HOME"),"/.iLCM/","data")

APP= basename(getwd()) %>% stringr::str_to_lower() %>%
  stringr::str_replace_all(pattern="_",replacement="")

Pull docker images

Now we will pull the docker images if they are not already present in the latest version. Then we create the coresponding folders in the config directory and copy the configuration files there.

Mariadb

script = system2(command = DOCKER_BIN, args = "images",stdout = T)
there <- script %>% grep(pattern = paste0(m_container_name," "),value = T) %>% 
  grepl(pattern = paste0(m_container_version," "))

if(length(there)<1)
  there <- F

if(!there)
  system2(command = DOCKER_BIN,
          args = paste0("pull"," ",m_container_name,":",m_container_version),stdout = T)

#CREATE Script/conf directory 
HOST_DATA=paste0(SZD_DATA_DIR,"/mariadb")
dir.create(paste0(HOST_DATA,"/myconfig"),recursive = T)
dir.create(paste0(HOST_DATA,"/docker-entrypoint-initdb.d"),recursive = T)

#Copy prepared configuration there
file.copy("TMCA/db/init_iLCM.sql",paste0(HOST_DATA,"/docker-entrypoint-initdb.d"),
          recursive = T)
f <- list.files(HOST_DATA, all.files = TRUE, full.names = TRUE, recursive = TRUE)
d <- list.dirs(HOST_DATA,recursive = T,full.names = T)
Sys.chmod(c(f,d), mode = "777", use_umask = TRUE)

solr

s_container_name="solr"
s_container_version="latest"

script = system2(command = DOCKER_BIN, args = "images",stdout = T)
there <- script %>% grep(pattern = paste0(s_container_name," "),value = T) %>%
  grepl(pattern = paste0(s_container_version," "))

if(length(there)<1)
  there <- F

if(!there)
  system2(command = DOCKER_BIN,
          args = paste0("pull"," ",s_container_name,":",s_container_version),stdout = T)

Zookeeper

z_container_name="zookeeper"
z_container_version="latest"

script = system2(command = DOCKER_BIN, args = "images",stdout = T)
there <- script %>% grep(pattern = paste0(z_container_name," "),value = T) %>% 
  grepl(pattern = paste0(z_container_version," "))

if(length(there)<1)
  there <- F

if(!there)
  system2(command = DOCKER_BIN,
          args = paste0("pull"," ",z_container_name,":",z_container_version),stdout = T)


# Need a volume to read the config from
conf_prefix <- "zoo-"
conf_container=paste0(conf_prefix,1)
cluster_size=ZK_CLUSTER_SIZE

for (i in 1:cluster_size)
{
  HOST_DATA=paste0(SZD_DATA_DIR,"/",conf_prefix,i)
  dir.create(paste0(HOST_DATA,"/logs"),recursive = T)
  dir.create(paste0(HOST_DATA,"/data"),recursive = T)
}

iLCM Shiny-Server

r_container_name="ckahmann/ilcm_r"
r_container_version="latest"

script = system2(command = DOCKER_BIN, args = "images",stdout = T)
there <- script %>% grep(pattern = paste0(r_container_name," "),value = T) %>%
  grepl(pattern = paste0(r_container_version," "))

if(length(there)<1)
  there <- F

if(!there)
  system2(command = DOCKER_BIN,
          args = paste0("pull"," ",r_container_name,":",r_container_version),stdout = T)

Copy configuration files

SOLR_HEAP=""
HOST_PREFIX=paste0(s_container_name,"-")

for (i in 1:SOLRCLOUD_CLUSTER_SIZE)
{
  SOLR_HOSTNAME=paste0(HOST_PREFIX,i)
  HOST_DATA_DIR=paste0(SZD_DATA_DIR,"/",SOLR_HOSTNAME)
  Sys.chmod(HOST_DATA_DIR, mode = "777", use_umask = TRUE)
  dir.create(paste0(HOST_DATA_DIR,"/logs"),recursive = T)
  dir.create(paste0(HOST_DATA_DIR,"/store/solr"),recursive = T)
  dir.create(paste0(HOST_DATA_DIR,"/store/sql-driver"),recursive = T)
  dir.create(paste0(HOST_DATA_DIR,"/store/shared-lib"),recursive = T)
  
  file.copy("TMCA/solr/solr.xml",paste0(HOST_DATA_DIR,"/store/solr"),recursive = T)
  file.copy("TMCA/solr/docker-entrypoint-initdb.d",HOST_DATA_DIR,recursive = T)
  file.copy("TMCA/solr/mariadb-java-client-2.1.0.jar",paste0(HOST_DATA_DIR,
                                                  "/store/sql-driver"),recursive = T)
  file.copy("TMCA/solr/config",paste0(HOST_DATA_DIR,"/store/solr"),recursive = T)
  Sys.chmod(HOST_DATA_DIR, mode = "777", use_umask = TRUE)
  #system("chmod -R 777 /home/christian/.iLCM/data/")
}

Set network name

NETWORK_NAME=paste0("tmca","_default")

stop running instances (for restarting)

print(system2(command = DOCKER_COMPOSE_BIN, args = "-f TMCA/docker-compose.yml stop",
              stdout = T))
system2(command = DOCKER_COMPOSE_BIN, args = "-f TMCA/docker-compose.yml down",
        stdout = T)

create network

system2(command = DOCKER_BIN, args = paste0("network create"," ",NETWORK_NAME),
        stdout = T)

create images

system2(command = DOCKER_COMPOSE_BIN, args = "-f TMCA/docker-compose.yml create",
        stdout = T)

start images

print(system2(command = DOCKER_COMPOSE_BIN, args = "-f TMCA/docker-compose.yml start",
              stdout = T))

Now calling ‘docker ps’ in a terminal should result in something like this:

knitr::include_graphics('docker_ps.png')

Upload example data

For a test szenario we will upload the 7 Harry Potter books and some New York Times articles in our database.

mydb = dbConnect(MySQL(), user='root', password='ilcm', dbname='ilcm', host='0.0.0.0')
rs <- dbSendQuery(mydb, 'set character set "utf8"')
#upload Guardian
query<-paste0("LOAD DATA LOCAL INFILE 'TMCA/Beispieldaten/meta_1999_12_GU.csv' INTO TABLE ilcm.documents  FIELDS TERMINATED BY ',' ENCLOSED BY '\"' LINES TERMINATED BY '","\n","' (dataset,id_doc,section,title,date,body,publisher,token,author,type,language) ;")
rs<- dbSendQuery(mydb, query)
query<-paste0("LOAD DATA LOCAL INFILE 'TMCA/Beispieldaten/token_1999_12_GU.csv' INTO TABLE ilcm.token  FIELDS TERMINATED BY ',' ENCLOSED BY '\"' LINES TERMINATED BY '","\n","';")
rs<- dbSendQuery(mydb, query)

#update metadata-tables
rs<-dbSendQuery(mydb,
                "Insert into ilcm.meta_type (Select distinct dataset,type from documents);")
rs<-dbSendQuery(mydb,
                "Insert into ilcm.meta_publisher (Select distinct dataset,publisher from documents);")
rs<-dbSendQuery(mydb,
                "Insert into ilcm.meta_section (Select distinct dataset,section from documents);")
rs<-dbSendQuery(mydb,
                "Insert into ilcm.meta_token (Select distinct dataset,token from documents);")
rs<-dbSendQuery(mydb,
                "Insert into ilcm.meta_date (Select distinct dataset,date from documents);")

configure solr and import data

#upload solr config
cat(system2(command = DOCKER_BIN,
            args = paste0("exec"," -i --user=solr tmca_solr-1_1 bin/solr zk upconfig -n iLCM -d /store/solr/config/iLCM"),stdout = T))

#USE SOLR API TO CREATE COLLECTION
z <- getURL(
  paste0("http://localhost:8081/solr/admin/collections?action=CREATE&collection.configName=iLCM&maxShardsPerNode=1&name=iLCM&numShards=1&replicationFactor=1&router.name=compositeId&routerName=compositeId&wt=json"), 
  followlocation=TRUE)

#import data
z<-getURL(
  paste0("http://localhost:8081/solr/iLCM/dataimport?command=full-import"),followlocation=TRUE
)
#initiate suggest
z<-getURL(
  paste0("http://localhost:8081/solr/iLCM/suggest?suggest.build=true"),followlocation=TRUE
)

Start iLCM

Now you need to open a Browser Window (tested on Chrome) and go to: https://localhost:3837

If you are using the toolbox, you can reach the app at: 192.168.99.100:3837