configr is an integrated parser package that json, ini, yaml and toml format files can now be processed. The vignette will walk you through the basics of using configr to extend existing parser in R.

Built-in examples of configuration file

Example of json, ini, yaml, toml can be used follow the instructions below.

library(configr)
config.json <- system.file('extdata', 'config.json', package='configr')
config.ini <- system.file('extdata', 'config.ini', package='configr')
config.yaml <- system.file('extdata', 'config.yaml', package='configr')
config.toml <- system.file('extdata', 'config.toml', package='configr')
config.glob <- system.file('extdata', 'config.global.toml', package='configr')

Check the configuration file type

is.json.file, is.ini.file, is.yaml.file and is.toml.file can be used to check the configuration file type. If input file were coincident with required, it will return TRUE. get.config.type will using above functions and get the file type name: json, ini, yaml, toml or FALSE.

is.json.file(config.json)
#> [1] TRUE
is.toml.file(config.toml)
#> [1] TRUE
is.ini.file(config.ini)
#> [1] TRUE
is.yaml.file(config.yaml)
#> [1] TRUE
get.config.type(config.json)
#> [1] "json"
get.config.type(config.yaml)
#> [1] "yaml"
get.config.type(config.ini)
#> [1] "ini"
get.config.type(config.toml)
#> [1] "toml"

Get the configuration section names

Section names of configuration file can be get using eval.config.sections. Python package ConfigParser sections inspired us to add this function.

eval.config.sections(config.ini)
#> [1] "default"            "comments"           "extra_list_parse"  
#> [4] "other_config_parse" "rcmd_parse"         "bash_parse"        
#> [7] "mulitple_parse"     "glue_parse"
eval.config.sections(config.toml)
#> [1] "bash_parse"         "comments"           "default"           
#> [4] "extra_list_parse"   "glue_parse"         "mulitple_parse"    
#> [7] "other_config_parse" "title"

Read the configuration file

read.config can read a configuration file in R and as a list object that can pass parameter to inner read function (fromJSON/read.ini/yaml.load_file/parseToml) accordingly.

# Read in R as a list (JSON/INI/YAML/TOML be suported)
# fromJSON/read.ini/readLines/yaml.load  parameters can be automatch by parameter name (encoding .etc.)
read.config(file = config.toml)
#> List of 8
#>  $ bash_parse        :List of 2
#>   ..$ parsed: chr "bash"
#>   ..$ raw   : chr "#>#echo bash#<#"
#>  $ comments          :List of 1
#>   ..$ version: chr "0.2.3"
#>  $ default           :List of 1
#>   ..$ debug: chr "{{debug}} {{debug2}}"
#>  $ extra_list_parse  :List of 2
#>   ..$ parsed: chr "1"
#>   ..$ raw   : chr "{{yes}}"
#>  $ glue_parse        :List of 4
#>   ..$ parsed_1: chr [1:10] "1" "2" "3" "4" ...
#>   ..$ parsed_2: int [1:10] 1 2 3 4 5 6 7 8 9 10
#>   ..$ raw_1   : chr "!!glue {1:10}"
#>   ..$ raw_2   : chr "!!glue_numeric {1:10}"
#>  $ mulitple_parse    :List of 2
#>   ..$ parsed: chr "configr, configr, yes, 1, config, config, no, 0"
#>   ..$ raw   : chr "@>@str_replace('config','g$','gr')@<@, #>#echo configr#<#, {{key:yes_flag}}, {{yes}}, @>@str_replace('configr',"| __truncated__
#>  $ other_config_parse:List of 2
#>   ..$ parsed: chr "yes no"
#>   ..$ raw   : chr "{{key:yes_flag}} {{key:no_flag}}"
#>  $ title             : chr "TOML Example"

eval.config return a value or a list object containing the file path, config group, filetype as the attribute.

# Get the same obj with config package, only get the 
# 'default or R_CONFIG_ACTIVE config sets' in config.cfg or R_CONFIGFILE_ACTIVE
eval.config(file = config.yaml)
#> $debug
#> [1] "{{debug}} {{debug2}}"
#> 
#> attr(,"config")
#> [1] "default"
#> attr(,"configtype")
#> [1] "yaml"
#> attr(,"file")
#> [1] "/private/var/folders/gg/t4t06x3s54dfs9p8cpzr_d300000gn/T/RtmpHXLUk0/Rinst458512c6ffeb/configr/extdata/config.yaml"

# Read designated section
eval.config(file = config.json, config = "comments")
#> $version
#> [1] "0.2.3"
#> 
#> attr(,"config")
#> [1] "comments"
#> attr(,"configtype")
#> [1] "json"
#> attr(,"file")
#> [1] "/private/var/folders/gg/t4t06x3s54dfs9p8cpzr_d300000gn/T/RtmpHXLUk0/Rinst458512c6ffeb/configr/extdata/config.json"

# Read designated section with its one value
eval.config(file = config.ini, config = "comments", value = "version")
#> [1] "0.2.3"

eval.config.merge will merge multiple sections (equal to config in eval.config function) and reduce the layer of configuration file.

eval.config.merge(file = config.json, sections = c('default', 'comments'))
#> $debug
#> [1] "{{debug}} {{debug2}}"
#> 
#> $version
#> [1] "0.2.3"
#> 
#> attr(,"config")
#> [1] "default"  "comments"
#> attr(,"configtype")
#> [1] "json"
#> attr(,"file")
#> [1] "/private/var/folders/gg/t4t06x3s54dfs9p8cpzr_d300000gn/T/RtmpHXLUk0/Rinst458512c6ffeb/configr/extdata/config.json"
eval.config.merge(file = config.toml, sections = c('default', 'comments'))
#> $debug
#> [1] "{{debug}} {{debug2}}"
#> 
#> $version
#> [1] "0.2.3"
#> 
#> attr(,"config")
#> [1] "default"  "comments"
#> attr(,"configtype")
#> [1] "toml"
#> attr(,"file")
#> [1] "/private/var/folders/gg/t4t06x3s54dfs9p8cpzr_d300000gn/T/RtmpHXLUk0/Rinst458512c6ffeb/configr/extdata/config.toml"

fetch.config can parse configuration files from internet and local that merged the files and return a list.

links <- c("https://raw.githubusercontent.com/JhuangLab/BioInstaller/master/inst/extdata/config/db/db_annovar.toml", 
           "https://raw.githubusercontent.com/JhuangLab/BioInstaller/master/inst/extdata/config/db/db_main.toml", 
           system.file('extdata', 'config.toml', package = "configr"))
x <- fetch.config(links)
x[c(1:5, length(x))]
#> $db_annovar_1000g
#> $db_annovar_1000g$buildver_available
#> $db_annovar_1000g$buildver_available$`1000g`
#> [1] "hg18"
#> 
#> $db_annovar_1000g$buildver_available$`1000g2010`
#> [1] "hg18"
#> 
#> $db_annovar_1000g$buildver_available$`1000g2012apr`
#> [1] "hg19" "hg18"
#> 
#> $db_annovar_1000g$buildver_available$`1000g2012jul`
#> [1] "hg18"
#> 
#> $db_annovar_1000g$buildver_available$`1000g2014oct`
#> [1] "hg38" "hg19" "hg18"
#> 
#> $db_annovar_1000g$buildver_available$`1000g2015aug`
#> [1] "hg38" "hg19"
#> 
#> $db_annovar_1000g$buildver_available$other
#> [1] "hg19"
#> 
#> 
#> $db_annovar_1000g$description
#> [1] "alternative allele frequency data in 1000 Genomes Project"
#> 
#> $db_annovar_1000g$source_url
#> [1] "http://www.openbioinformatics.org/annovar/download/{{buildver}}_{{version}}.zip"
#> 
#> $db_annovar_1000g$version_available
#>  [1] "1000g2015aug" "1000g2014oct" "1000g2014sep" "1000g2014aug" "1000g2012apr"
#>  [6] "1000g2012feb" "1000g2011may" "1000g2010nov" "1000g2012apr" "1000g2010jul"
#> [11] "1000g2010"    "1000g"       
#> 
#> $db_annovar_1000g$version_newest
#> [1] "1000g2015aug"
#> 
#> 
#> $db_annovar_1000g_sqlite
#> $db_annovar_1000g_sqlite$buildver_available
#> [1] "hg19"
#> 
#> $db_annovar_1000g_sqlite$install
#> [1] "#R#for(i in c('all', 'afr', 'eas', 'eur', 'sas', 'amr')) {\\n  x <- set.1000g.db(sprintf('{{version}}_%s', i), '{{buildver}}', \\\"sql\\\");\\n  params <- list(sql.file = x, dbname = str_replace(x, '.sql$', ''));\\n  do.call(sql2sqlite, params)\\n}\\n#R#"
#> 
#> $db_annovar_1000g_sqlite$source_url
#> [1] "http://bioinfo.rjh.com.cn/download/annovarR/humandb/{{buildver}}_{{version}}.tar.gz"
#> 
#> $db_annovar_1000g_sqlite$version_available
#> [1] "1000g2015aug"
#> 
#> $db_annovar_1000g_sqlite$version_newest
#> [1] "1000g2015aug"
#> 
#> 
#> $db_annovar_abraom
#> $db_annovar_abraom$buildver_available
#> [1] "hg19" "hg38"
#> 
#> $db_annovar_abraom$description
#> [1] "abraom: 2.3 million [Brazilian genomic variants](https://www.ncbi.nlm.nih.gov/pubmed/28332257)"
#> 
#> $db_annovar_abraom$source_url
#> [1] "http://www.openbioinformatics.org/annovar/download/{{buildver}}_{{version}}.txt.gz"
#> 
#> $db_annovar_abraom$version_available
#> [1] "abraom"
#> 
#> 
#> $db_annovar_avsift
#> $db_annovar_avsift$buildver_available
#> [1] "hg19" "hg18"
#> 
#> $db_annovar_avsift$decompress
#> [1] TRUE TRUE
#> 
#> $db_annovar_avsift$description
#> [1] "whole-exome SIFT scores for non-synonymous variants (obselete and should not be uesd any more)"
#> 
#> $db_annovar_avsift$source_url
#> [1] "http://www.openbioinformatics.org/annovar/download/{{buildver}}_{{version}}.txt.gz"    
#> [2] "http://www.openbioinformatics.org/annovar/download/{{buildver}}_{{version}}.txt.idx.gz"
#> 
#> $db_annovar_avsift$version_available
#> [1] "avsift"
#> 
#> $db_annovar_avsift$version_newest
#> [1] "avsift"
#> 
#> 
#> $db_annovar_avsnp
#> $db_annovar_avsnp$buildver_available
#> $db_annovar_avsnp$buildver_available$avsnp138
#> [1] "hg19"
#> 
#> $db_annovar_avsnp$buildver_available$avsnp142
#> [1] "hg38" "hg19"
#> 
#> $db_annovar_avsnp$buildver_available$avsnp144
#> [1] "hg38" "hg19"
#> 
#> $db_annovar_avsnp$buildver_available$avsnp147
#> [1] "hg38" "hg19"
#> 
#> $db_annovar_avsnp$buildver_available$avsnp150
#> [1] "hg38" "hg19"
#> 
#> 
#> $db_annovar_avsnp$decompress
#> [1] TRUE TRUE
#> 
#> $db_annovar_avsnp$description
#> $db_annovar_avsnp$description$avsnp138
#> [1] "dbSNP138 with allelic splitting and left-normalization"
#> 
#> $db_annovar_avsnp$description$avsnp142
#> [1] "dbSNP142 with allelic splitting and left-normalization"
#> 
#> $db_annovar_avsnp$description$avsnp144
#> [1] "dbSNP144 with allelic splitting and left-normalization (http://annovar.openbioinformatics.org/en/latest/articles/dbSNP/#additional-discussions)"
#> 
#> $db_annovar_avsnp$description$avsnp147
#> [1] "dbSNP147 with allelic splitting and left-normalization"
#> 
#> 
#> $db_annovar_avsnp$source_url
#> [1] "http://www.openbioinformatics.org/annovar/download/{{buildver}}_{{version}}.txt.gz"    
#> [2] "http://www.openbioinformatics.org/annovar/download/{{buildver}}_{{version}}.txt.idx.gz"
#> 
#> $db_annovar_avsnp$version_available
#> [1] "avsnp150" "avsnp147" "avsnp144" "avsnp142" "avsnp138"
#> 
#> $db_annovar_avsnp$version_newest
#> [1] "avsnp150"
#> 
#> 
#> $title
#> [1] "TOML Example"

Converting and writing configuration file

convert.config will read a configuration file and write a configuration file with appointed file type (json. ini, yaml). Moreover, write.config is similar to convert.config but using the list object rather than a file.

# Convert YAML configuration file to JSON format
out.json <- tempfile(fileext = ".json")
convert.config(file = config.yaml, out.file = out.json, convert.to = "JSON")
#> [1] TRUE
get.config.type(out.json)
#> [1] "json"

# Generate a JSON format configuration file
list.test <- list(a=c(123,456))
out.fn <- sprintf("%s/test.json", tempdir())
write.config(config.dat = list.test, file.path = out.fn, write.type = "json")
#> [1] TRUE
get.config.type(out.fn)
#> [1] "json"

# Generate a YAML format configuration file with defined indent
write.config(config.dat = list.test, file.path = out.fn, write.type = "yaml", indent = 4)
#> [1] TRUE
get.config.type(out.fn)
#> [1] FALSE

# Generate a YAML format configuration file with defined indent and pointed sections
#write.config(config.dat = list.test, file.path = out.fn, write.type = "yaml", sections = "a", indent = 4)
#get.config.type(out.fn)

Configr specific extra parse

configr own several userful extra parse function, you can use the parse.extra to finish these work for any list object. Of course, read.config, eval.config and eval.config.merge can directly using parse.extra by passing parameters to parse.extra.

Note: glue.parse using the glue package glue function to do that. Just like glue('{1:5}') and be processed by unname(unlist(x)). The !!glue can be changed if you setted glue.flag. It is a remarkable fact that only contain the glue.flag character be parsed and the order of item will be changed if the glue result were multiple values. e.g. ['{a}', '!!glue {1:5}', '{{a}}'] will be parsed to ['{a}', '1', '2', '3', '4', '5', '{{a}}']

other.config <- system.file('extdata', 'config.other.yaml', package='configr')

read.config(file = other.config)
#> $key
#> $key$test_parse
#> [1] 123
#> 
#> $key$test_parse2
#> [1] 234
#> 
#> $key$yes_flag
#> [1] "yes"
#> 
#> $key$no_flag
#> [1] "no"
#> 
#> 
#> $`samtools@1.3.1`
#> $`samtools@1.3.1`$source_dir
#> [1] "/tmp"

config.1 <- read.config(file = config.json)
config.1$default
#> $debug
#> [1] "{{debug}} {{debug2}}"
read.config(file = config.json, extra.list = list(debug = "self", debug2 = "self2"))$default
#> $debug
#> [1] "self self2"

sections <- c('default', 'other_config_parse')
config.1[sections]
#> $default
#> $default$debug
#> [1] "{{debug}} {{debug2}}"
#> 
#> 
#> $other_config_parse
#> $other_config_parse$raw
#> [1] "{{key:yes_flag}} {{key:no_flag}}"
#> 
#> $other_config_parse$parsed
#> [1] "yes no"
read.config(file = config.json, extra.list = list(debug = "self", debug2 = "self2"), 
  other.config = other.config)[sections]
#> $default
#> $default$debug
#> [1] "self self2"
#> 
#> 
#> $other_config_parse
#> $other_config_parse$raw
#> [1] "yes no"
#> 
#> $other_config_parse$parsed
#> [1] "yes no"

sections <- c('default', 'other_config_parse', 'rcmd_parse')
# The followed two line command will return the same value
config.1[sections]
#> $default
#> $default$debug
#> [1] "{{debug}} {{debug2}}"
#> 
#> 
#> $other_config_parse
#> $other_config_parse$raw
#> [1] "{{key:yes_flag}} {{key:no_flag}}"
#> 
#> $other_config_parse$parsed
#> [1] "yes no"
#> 
#> 
#> $rcmd_parse
#> $rcmd_parse$raw
#> [1] "@>@ Sys.Date() @<@"
read.config(file = config.json, extra.list = list(debug = "self", debug2 = "self2"), 
  other.config = other.config, rcmd.parse = T)[sections]
#> $default
#> $default$debug
#> [1] "self self2"
#> 
#> 
#> $other_config_parse
#> $other_config_parse$raw
#> [1] "yes no"
#> 
#> $other_config_parse$parsed
#> [1] "yes no"
#> 
#> 
#> $rcmd_parse
#> $rcmd_parse$raw
#> [1] "2020-07-17"
parse.extra(config.1, extra.list = list(debug = "self", debug2 = "self2"), 
  other.config = other.config, rcmd.parse = T)[sections]
#> $default
#> $default$debug
#> [1] "self self2"
#> 
#> 
#> $other_config_parse
#> $other_config_parse$raw
#> [1] "yes no"
#> 
#> $other_config_parse$parsed
#> [1] "yes no"
#> 
#> 
#> $rcmd_parse
#> $rcmd_parse$raw
#> [1] "2020-07-17"


sections <- c('default', 'other_config_parse', 'rcmd_parse', 'mulitple_parse')
config.1[sections]
#> $default
#> $default$debug
#> [1] "{{debug}} {{debug2}}"
#> 
#> 
#> $other_config_parse
#> $other_config_parse$raw
#> [1] "{{key:yes_flag}} {{key:no_flag}}"
#> 
#> $other_config_parse$parsed
#> [1] "yes no"
#> 
#> 
#> $rcmd_parse
#> $rcmd_parse$raw
#> [1] "@>@ Sys.Date() @<@"
#> 
#> 
#> $mulitple_parse
#> $mulitple_parse$raw
#> [1] "@>@str_replace('config','g$','gr')@<@, #>#echo configr#<#, {{key:yes_flag}}, {{yes}}, @>@str_replace('configr','r','')@<@, #># echo config#<#, {{key:no_flag}}, {{no}}"
#> 
#> $mulitple_parse$parsed
#> [1] "configr, configr, yes, 1, config, config, no, 0"
parse.extra(config.1, extra.list = list(debug = "self", debug2 = "self2", yes = "1", no = "0"), 
  other.config = other.config, rcmd.parse = T, bash.parse = T)[sections]
#> $default
#> $default$debug
#> [1] "self self2"
#> 
#> 
#> $other_config_parse
#> $other_config_parse$raw
#> [1] "yes no"
#> 
#> $other_config_parse$parsed
#> [1] "yes no"
#> 
#> 
#> $rcmd_parse
#> $rcmd_parse$raw
#> [1] "2020-07-17"
#> 
#> 
#> $mulitple_parse
#> $mulitple_parse$raw
#> [1] "configr, configr, yes, 1, config, config, no, 0"
#> 
#> $mulitple_parse$parsed
#> [1] "configr, configr, yes, 1, config, config, no, 0"

# glue parse
raw <- c("a", "!!glue{1:5}", "c")
list.raw <- list(glue = raw, nochange = 1:10)
list.raw
#> $glue
#> [1] "a"           "!!glue{1:5}" "c"          
#> 
#> $nochange
#>  [1]  1  2  3  4  5  6  7  8  9 10
expect.parsed.1 <- c("a", "1", "2", "3", "4", "5", "c")
expect.parsed.2 <- list(glue = expect.parsed.1, nochange = 1:10)
parse.extra(list.raw, glue.parse = TRUE, glue.flag = "!!glue")
#> $glue
#> [1] "a" "1" "2" "3" "4" "5" "c"
#> 
#> $nochange
#>  [1]  1  2  3  4  5  6  7  8  9 10


read.config(config.glob, global.vars.field = NULL)
#> List of 7
#>  $ global_vars: chr [1:4] "gvar_1" "gvar_2" "gvar_3" "gvar_5"
#>  $ gvar_1     : chr "G1"
#>  $ gvar_2     : chr "G2"
#>  $ gvar_3     : chr "G3"
#>  $ gvar_5     : chr "G5"
#>  $ subsection :List of 4
#>   ..$ value_1: chr "{{gvar_1}}/value_1"
#>   ..$ value_2: chr "{{gvar_2}}/value_2"
#>   ..$ value_3: chr "{{gvar_3}}/value_3"
#>   ..$ value_5: chr "{{gvar_5}}/value_5"
#>  $ title      : chr "Demo of global vars of configuration files"

read.config(config.glob)
#> List of 7
#>  $ global_vars: chr [1:4] "gvar_1" "gvar_2" "gvar_3" "gvar_5"
#>  $ gvar_1     : chr "G1"
#>  $ gvar_2     : chr "G2"
#>  $ gvar_3     : chr "G3"
#>  $ gvar_5     : chr "G5"
#>  $ subsection :List of 4
#>   ..$ value_1: chr "G1/value_1"
#>   ..$ value_2: chr "G2/value_2"
#>   ..$ value_3: chr "G3/value_3"
#>   ..$ value_5: chr "G5/value_5"
#>  $ title      : chr "Demo of global vars of configuration files"

External urls about configuration format and others

configr provides a function config.help to access external resource about various configuration format and other related materies.

Show all external urls stored in configr
config.help()
# Open item in browser
# config.help('toml_stackoverflow_search')
# Or use the row number to access
# config.help(23)

Others usage

config.section.del can be used to delete a section of config, just do config$section <- NULL.

config <- read.config(file = config.json, extra.list = list(debug = "self", debug2 = "self2"), 
  other.config = other.config)[sections]
names(config)
#> [1] "default"            "other_config_parse" "rcmd_parse"        
#> [4] "mulitple_parse"
config <- config.sections.del(config, 'default')
names(config)
#> [1] "other_config_parse" "rcmd_parse"         "mulitple_parse"

str2config can be used to parse a string object to a configuration list

json_string <- '{"city" : "Crich"}\n'
yaml_string <- 'foo: 123\n'
json_config <- str2config(json_string)
yaml_config <- str2config(yaml_string)
print(json_config)
#> $city
#> [1] "Crich"
print(yaml_config)
#> $foo
#> [1] 123

Session info

Here is the output of sessionInfo() on the system on which this document was compiled:

#> R version 4.0.0 (2020-04-24)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Catalina 10.15.5
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] configr_0.3.5
#> 
#> loaded via a namespace (and not attached):
#>  [1] compiler_4.0.0 RcppTOML_0.1.6 magrittr_1.5   ini_0.3.1      tools_4.0.0   
#>  [6] glue_1.4.1     yaml_2.2.1     Rcpp_1.0.4.6   stringi_1.4.6  knitr_1.28    
#> [11] jsonlite_1.6.1 stringr_1.4.0  xfun_0.14      evaluate_0.14