posted an update

Just got script to run for batch causal discovery processing using tetrad/r-causal:

####### frame script:

!/usr/bin/env Rscript

library(stringi) library(stringr)

workingDir <- getwd() sourceRepo <- "UT" groups <- c("0", "1") #, "1") ades <- "gib" #c("aki", "ali") paths <- c("PREDISPOSES-INV==", "CAUSES-INV==", "TREATS==COEXISTS_WITH-INV")

for (g in groups) { for (a in ades) { for (p in paths) { source.drug.dir <- paste("tetrad", sourceRepo, "/", a, "/", g, "/", p, "/processed/", sep="") target.drug.dir <- paste("ccd", sourceRepo, "/", a, "/", g, "/", p, "/", sep="") drugs <- list.files(path=source.drug.dir) for (d in drugs) { sourceFileName <- paste(source.drug.dir, "", d, sep="") targetFileName <- paste(target.drug.dir, "", d, sep="") cmd.txt <- paste("Rscript simonMagus.R", sourceFileName, targetFileName, sep=" ") print(cmd.txt) system(cmd.txt) } } } }

############## ... and the worker bee:

!/usr/bin/env Rscript

library(rJava) library(rcausal)

sudo Rscript ./simonMagus.R tetradUT/gib/1/CAUSES-INV==/processed/Ketoprofen.tsv Ketoprofen.gfci.txt

writeVec <- function(filename, data) { write.table(file = filename, x = data, row.names = FALSE, col.names = FALSE) }

args <- commandArgs(trailingOnly=TRUE)

sourceFileName <- as.character(args[1]) targetFileName <- as.character(args[2])

data <- read.table(file=sourceFileName, header = TRUE) CVCs <- colnames(data) CVCs <- subset(CVCs, CVCs!='ade') # CVCs <- subset(CVCs, CVCs!='medication') CVCs forbid <- list(c()) require <- list(c()) temporal <- list(c(CVCs), c('medication'), c('ade'))

prior <- priorKnowledge(forbiddirect = forbid, requiredirect = require, addtemporal = temporal)

prior <- priorKnowledge(addtemporal = temporal) dat.gfci <- gfci.discrete(df=data,structurePrior=1.0,samplePrior=1.0, depth=1, verbose = TRUE, priorKnowledge = prior, java.parameters="-Xmx52g") dat.gfci$parameters #Show the FGS Discrete's parameters dat.gfci$datasets #Show the dataset dat.gfci$nodes #Show the result's nodes dat.gfci$edges #Show the result's edges model.edges <- dat.gfci$edges edges.list <- c() for (i in 1:length(model.edges)) { edges.list <- c(edges.list, paste(as.character(i), model.edges[i], sep=". ")) }

edges.list <- c("Graph Edges:", edges.list) writeVec(filename = "newGraph.txt", data=edges.list) command.txt <- paste("more newGraph.txt | sed 's/\"//g' | sed 's/x//g' > ", targetFileName, sep="") print(command.txt) system(command.txt)

input: raw case list format

output: input for tetrad/Cytoscape that looks like this:

Graph Edges:

  1. abdominal_pain --> ade
  2. abdominal_pain --> colonic_polyps
  3. abdominal_pain --> well_nourished
  4. ade --> chronic_radiation_proctitis
  5. ade --> colonic_polyps
  6. ade --> enterocolitis
  7. ade --> proctosigmoiditis
  8. ade --> well_nourished
  9. agitation --- abdominal_pain
  10. agitation --> ade
  11. colonic_polyps --> premature_ejaculation
  12. dementia --> abdominal_pain
  13. dementia --> ade
  14. dementia --> agitation
  15. dementia --> medication
  16. medication --- abdominal_pain
  17. medication --> ade
  18. medication --- agitation
  19. medication --> colonic_polyps
  20. medication --> premature_ejaculation
  21. panic_disorder --> abdominal_pain
  22. panic_disorder --> agitation
  23. panic_disorder --> medication

Yay!

--Scott

Log in or sign up for Devpost to join the conversation.