removed ggm dependency

santikka · Dec 18, 2020 · 346caea · 346caea
1 parent 1d49517
commit 346caea
Show file tree

Hide file tree

Showing 51 changed files with 440 additions and 319 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1,2 @@
+^.*\.git$
+^README.md$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,11 +1,18 @@
 Package: causaleffect
-Version: 1.3.11
-Date: 2020-05-14
+Version: 1.3.12
+Date: 2020-12-18
 Title: Deriving Expressions of Joint Interventional Distributions and Transport Formulas in Causal Models
-Author: Santtu Tikka
-Maintainer: Santtu Tikka <[email protected]>
-Imports: ggm, igraph, XML
-Suggests: R.rsp
+Authors@R: person(given = "Santtu",
+    family = "Tikka",
+    role = c("aut", "cre"),
+    email = "[email protected]",
+    comment = c(ORCID = "0000-0003-4039-4342"))
+BugReports: https://github.com/santikka/causaleffect/issues
+Description: Functions for identification and transportation of causal effects. Provides a conditional causal effect identification algorithm (IDC) by Shpitser, I. and Pearl, J. (2006) <http://ftp.cs.ucla.edu/pub/stat_ser/r329-uai.pdf>, an algorithm for transportability from multiple domains with limited experiments by Bareinboim, E. and Pearl, J. (2014) <http://ftp.cs.ucla.edu/pub/stat_ser/r443.pdf> and a selection bias recovery algorithm by Bareinboim, E. and Tian, J. (2015) <http://ftp.cs.ucla.edu/pub/stat_ser/r445.pdf>. All of the previously mentioned algorithms are based on a causal effect identification algorithm by Tian , J. (2002) <http://ftp.cs.ucla.edu/pub/stat_ser/r309.pdf>.
+License: GPL (>= 2)
+Imports: igraph
+Suggests: R.rsp, XML
 VignetteBuilder: R.rsp
-Description: Functions for identification and transportation of causal effects. Provides a conditional causal effect identification algorithm (IDC) by Shpitser, I. and Pearl, J. (2006) <http://ftp.cs.ucla.edu/pub/stat_ser/r329-uai.pdf>, an algorithm for transportability from multiple domains with limited experiments by Bareinboim, E. and Pearl, J. (2014) <http://ftp.cs.ucla.edu/pub/stat_ser/r443.pdf> and a selection bias recovery algorithm by Bareinboim, E. and Tian, J. (2015) <http://ftp.cs.ucla.edu/pub/stat_ser/r445.pdf>. All of the previously mentioned algorithms are based on a causal effect identification algorithm by Tian , J. (2002) <http://ftp.cs.ucla.edu/pub/stat_ser/r309.pdf>. 
-License: GPL-2
+NeedsCompilation: no
+Author: Santtu Tikka [aut, cre] (<https://orcid.org/0000-0003-4039-4342>)
+Maintainer: Santtu Tikka <[email protected]>
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,31 @@
-export(aux.effect, causal.effect, generalize, get.expression, meta.transport, parse.graphml, recover, surrogate.outcome, transport, verma.constraints)
-import(igraph, XML)
-importFrom(ggm, dSep, powerset)
+export(aux.effect)
+export(generalize)
+export(causal.effect)
+export(get.expression)
+export(meta.transport)
+export(parse.graphml)
+export(transport)
+export(recover)
+export(surrogate.outcome)
+export(verma.constraints)
+importFrom(igraph, get.vertex.attribute)
+importFrom(igraph, set.edge.attribute)
+importFrom(igraph, vertex.attributes)
+importFrom(igraph, topological.sort)
+importFrom(igraph, induced.subgraph)
+importFrom(igraph, decompose.graph)
+importFrom(igraph, edge.attributes)
+importFrom(igraph, subgraph.edges)
+importFrom(igraph, get.adjacency)
+importFrom(igraph, neighborhood)
+importFrom(igraph, read.graph)
+importFrom(igraph, get.edges)
+importFrom(igraph, vertices)
+importFrom(igraph, vcount)
+importFrom(igraph, is.dag)
+importFrom(igraph, edges)
+importFrom(igraph, `%->%`)
+importFrom(igraph, V)
+importFrom(igraph, E)
 importFrom(stats, setNames)
 importFrom(utils, combn)
diff --git a/NEWS b/NEWS
@@ -1,3 +1,7 @@
+Changes from version 1.3.11 to 1.3.12
+ * The package no longer depends on the 'ggm' package.
+ * The package no longer requires the 'XML' package, now suggests instead.
+
 Changes from version 1.3.10 to 1.3.11
  * Fixed inconsistency with function arguments when computing causal effects with surrogate experiments using 'aux.effect'.
  * Fixed a rare issue with simplification.

diff --git a/R/ancestors.R b/R/ancestors.R
@@ -1,6 +1,12 @@
 ancestors <- function(node, G, topo) {
-  an.ind <- unique(unlist(neighborhood(G, order = vcount(G), nodes = node, mode = "in")))
-  an <- V(G)[an.ind]$name
+  an.ind <- unique(unlist(igraph::neighborhood(G, order = igraph::vcount(G), nodes = node, mode = "in")))
+  an <- igraph::V(G)[an.ind]$name
   an <- an %ts% topo
-  return (an)
+  return(an)
+}
+
+ancestors_unsrt <- function(node, G) {
+  an.ind <- unique(unlist(igraph::neighborhood(G, order = igraph::vcount(G), nodes = node, mode = "in")))
+  an <- igraph::V(G)[an.ind]$name
+  return(an)
 }
diff --git a/R/c.components.R b/R/c.components.R
@@ -1,18 +1,18 @@
 c.components <- function(G, topo) {
-  A <- as.matrix(get.adjacency(G))
-  v <- get.vertex.attribute(G, "name")
+  A <- as.matrix(igraph::get.adjacency(G))
+  v <- igraph::get.vertex.attribute(G, "name")
   indices <- which(A >= 1 & t(A) >= 1, arr.ind = TRUE)
   bidirected <- NULL
-  e <- E(G)
+  e <- igraph::E(G)
   if (nrow(indices) > 0) {
     bidirected <- unlist(apply(indices, 1, function(x) {
       e[v[x[1]] %->% v[x[2]]]
     }))
   }
-  G.bidirected <- subgraph.edges(G, bidirected, delete.vertices = FALSE)
-  subgraphs <- decompose.graph(G.bidirected)
+  G.bidirected <- igraph::subgraph.edges(G, bidirected, delete.vertices = FALSE)
+  subgraphs <- igraph::decompose.graph(G.bidirected)
   cc <- lapply(subgraphs, function(x) {
-    v.sub <- get.vertex.attribute(x, "name")
+    v.sub <- igraph::get.vertex.attribute(x, "name")
     return(v.sub %ts% topo)
   })
   cc.rank <- order(sapply(cc, function(x) {

diff --git a/R/causal.effect.R b/R/causal.effect.R
@@ -1,11 +1,11 @@
 causal.effect <- function(y, x, z = NULL, G, expr = TRUE, simp = FALSE, steps = FALSE, primes = FALSE, prune = FALSE, stop_on_nonid = TRUE) {
-  if (length(edge.attributes(G)) == 0) {
-    G <- set.edge.attribute(G, "description", 1:length(E(G)), NA)
+  if (length(igraph::edge.attributes(G)) == 0) {
+    G <- igraph::set.edge.attribute(G, "description", 1:length(igraph::E(G)), NA)
   }
   G.obs <- observed.graph(G)
-  if (!is.dag(G.obs)) stop("Graph 'G' is not a DAG")
-  topo <- topological.sort(G.obs)
-  topo <- get.vertex.attribute(G, "name")[topo]
+  if (!igraph::is.dag(G.obs)) stop("Graph 'G' is not a DAG")
+  topo <- igraph::topological.sort(G.obs)
+  topo <- igraph::get.vertex.attribute(G, "name")[topo]
   if (length(setdiff(y, topo)) > 0) stop("Set 'y' contains variables not present in the graph.")
   if (length(setdiff(x, topo)) > 0) stop("Set 'x' contains variables not present in the graph.")
   if (length(z) > 0 && !identical(z, "")) {
@@ -40,11 +40,8 @@ causal.effect <- function(y, x, z = NULL, G, expr = TRUE, simp = FALSE, steps =
   if (res$tree$call$id) {
     if (simp) {
       G.unobs <- unobserved.graph(G)
-      G.adj <- as.matrix(get.adjacency(G.unobs))
-      topo.u <- topological.sort(G.unobs)
-      topo.u <- get.vertex.attribute(G.unobs, "name")[topo.u]
       res.prob <- deconstruct(res.prob, probability(), topo)
-      res.prob <- parse.expression(res.prob, topo, G.adj, G, G.obs)
+      res.prob <- parse.expression(res.prob, topo, G.unobs, G, G.obs)
     }
     attr(res.prob, "algorithm") <- algo
     attr(res.prob, "query") <- list(y = y, x = x, z = z)

diff --git a/R/causal.parents.R b/R/causal.parents.R
@@ -1,5 +1,5 @@
 causal.parents <- function(node, vi, G, G.obs, topo) {
-  G.vi <- induced.subgraph(G, vi)
+  G.vi <- igraph::induced.subgraph(G, vi)
   cc <- c.components(G.vi, topo)
   t <- Find(function(x) node %in% x, cc)
   pa.t <- parents(t, G.obs)

diff --git a/R/children.R b/R/children.R
@@ -1,6 +1,12 @@
 children <- function(node, G, topo) {
-  ch.ind <- unique(unlist(neighborhood(G, order = 1, nodes = node, mode = "out")))
-  ch <- V(G)[ch.ind]$name
+  ch.ind <- unique(unlist(igraph::neighborhood(G, order = 1, nodes = node, mode = "out")))
+  ch <- igraph::V(G)[ch.ind]$name
   ch <- ch %ts% topo
   return(ch)
 }
+
+children_unsrt <- function(node, G) {
+  ch.ind <- unique(unlist(igraph::neighborhood(G, order = 1, nodes = node, mode = "out")))
+  ch <- igraph::V(G)[ch.ind]$name
+  return(ch)
+}
diff --git a/R/compare.graphs.R b/R/compare.graphs.R
@@ -1,15 +1,15 @@
 compare.graphs <- function(G1, G2) {
-  e1 <- as.data.frame(get.edges(G1, E(G1)))
-  e1[ ,3] <- edge.attributes(G1)
-  e2 <- as.data.frame(get.edges(G2, E(G2)))
-  e2[ ,3] <- edge.attributes(G2)
+  e1 <- as.data.frame(igraph::get.edges(G1, igraph::E(G1)))
+  e1[ ,3] <- igraph::edge.attributes(G1)
+  e2 <- as.data.frame(igraph::get.edges(G2, igraph::E(G2)))
+  e2[ ,3] <- igraph::edge.attributes(G2)
   n1 <- nrow(e1)
   n2 <- nrow(e2)
   if (n1 != n2) return(FALSE)
   if (ncol(e1) == 2) e1$description <- "O"
   if (ncol(e2) == 2) e2$description <- "O"
   e1[which(is.na(e1[,3])), 3] <- "O"
   e2[which(is.na(e2[,3])), 3] <- "O"
-  if (all(duplicated(rbind(e1,e2))[(n1+1):(2*n1)])) return(TRUE)
+  if (all(duplicated(rbind(e1, e2))[(n1+1):(2*n1)])) return(TRUE)
   return(FALSE)
 }
diff --git a/R/connected.R b/R/connected.R
@@ -1,6 +1,6 @@
 connected <- function(y, G, topo) {
-  connected <- unique(unlist(neighborhood(G, order = vcount(G), nodes = y, mode = "all")))
-  co <- V(G)[connected]$name
+  connected <- unique(unlist(igraph::neighborhood(G, order = igraph::vcount(G), nodes = y, mode = "all")))
+  co <- igraph::V(G)[connected]$name
   co <- co %ts% topo
   return(co)
 }
diff --git a/R/dSep.R b/R/dSep.R
@@ -0,0 +1,110 @@
+# Implements relevant path separation (rp-separation) for testing d-separation. For details, see:
+#
+# Relevant Path Separation: A Faster Method for Testing Independencies in Bayesian Networks
+# Cory J. Butz, Andre E. dos Santos, Jhonatan S. Oliveira;
+# Proceedings of the Eighth International Conference on Probabilistic Graphical Models,
+# PMLR 52:74-85, 2016.
+#
+# Note that the roles of Y and Z have been reversed from the paper, meaning that
+# we are testing whether X is separated from Y given Z in G.
+
+dSep <- function(G, x, y, z) {
+  an_z <- ancestors_unsrt(z, G)
+  an_xyz <- ancestors_unsrt(union(union(x, y), z), G)
+  stack_top <- length(x)
+  stack_size <- max(stack_top, 64)
+  stack <- rep(FALSE, stack_size)
+  stack[1:stack_top] <- TRUE
+  names(stack)[1:stack_top] <- x
+  visited_top <- 0
+  visited_size <- 64
+  visited <- rep(FALSE, visited_size)
+  names(visited) <- rep(NA, visited_size)
+  is_visited <- FALSE
+  while (stack_top > 0) {
+    is_visited <- FALSE
+    el <- stack[stack_top]
+    el_name <- names(el)
+    stack_top <- stack_top - 1
+    if (visited_top > 0) {
+      for (i in 1:visited_top) {
+        if (el == visited[i] && identical(el_name, names(visited[i]))) {
+          is_visited <- TRUE
+          break
+        }
+      }
+    }
+    if (!is_visited) {
+      if (el_name %in% y) return(FALSE)
+      visited_top <- visited_top + 1
+      if (visited_top > visited_size) {
+        visited_old <- visited
+        visited_size_old <- visited_size
+        visited_size <- visited_size * 2
+        visited <- rep(FALSE, visited_size)
+        visited[1:visited_size_old] <- visited_old
+        names(visited[1:visited_size_old]) <- names(visited_old)
+      }
+      visited[visited_top] <- el
+      names(visited)[visited_top] <- el_name
+      if (el && !(el_name %in% z)) {
+        visitable_parents <- intersect(setdiff(parents_unsrt(el_name, G), el_name), an_xyz)
+        visitable_children <- intersect(setdiff(children_unsrt(el_name, G), el_name), an_xyz)
+        n_vis_pa <- length(visitable_parents)
+        n_vis_ch <- length(visitable_children)
+        if (n_vis_pa + n_vis_ch > 0) {
+          while (n_vis_pa + n_vis_ch + stack_top > stack_size) {
+            stack_old <- stack
+            stack_size_old <- stack_size
+            stack_size <- stack_size * 2
+            stack <- rep(FALSE, stack_size)
+            stack[1:stack_size_old] <- stack_old
+            names(stack[1:stack_size_old]) <- names(stack_old)
+          }
+          stack_add <- stack_top + n_vis_pa + n_vis_ch
+          stack[(stack_top + 1):(stack_add)] <- c(rep(TRUE, n_vis_pa), rep(FALSE, n_vis_ch))
+          names(stack)[(stack_top + 1):(stack_add)] <- c(visitable_parents, visitable_children)
+          stack_top <- stack_add
+        }
+      } else if (!el) {
+        if (!(el_name %in% z)) {
+          visitable_children <- intersect(setdiff(children_unsrt(el_name, G), el_name), an_xyz)
+          n_vis_ch <- length(visitable_children)
+          if (n_vis_ch > 0) {
+            while (n_vis_ch + stack_top > stack_size) {
+              stack_old <- stack
+              stack_size_old <- stack_size
+              stack_size <- stack_size * 2
+              stack <- rep(FALSE, stack_size)
+              stack[1:stack_size_old] <- stack_old
+              names(stack[1:stack_size_old]) <- names(stack_old)
+            }
+            stack_add <- stack_top + n_vis_ch
+            stack[(stack_top + 1):(stack_add)] <- rep(FALSE, n_vis_ch)
+            names(stack)[(stack_top + 1):(stack_add)] <- visitable_children
+            stack_top <- stack_add
+          }
+        }
+        if (el_name %in% an_z) {
+          visitable_parents <- intersect(setdiff(parents_unsrt(el_name, G), el_name), an_xyz)
+          n_vis_pa <- length(visitable_parents)
+          if (n_vis_pa > 0) {
+            while (n_vis_pa + stack_top > stack_size) {
+              stack_old <- stack
+              stack_size_old <- stack_size
+              stack_size <- stack_size * 2
+              stack <- rep(FALSE, stack_size)
+              stack[1:stack_size_old] <- stack_old
+              names(stack[1:stack_size_old] <- stack_old)
+            }
+            stack_add <- stack_top + n_vis_pa
+            stack[(stack_top + 1):(stack_add)] <- rep(TRUE, n_vis_pa)
+            names(stack)[(stack_top + 1):(stack_add)] <- visitable_parents
+            stack_top <- stack_add
+          }
+        }
+      }
+    }
+  }
+  return(TRUE)
+}
diff --git a/R/descendants.R b/R/descendants.R
@@ -1,6 +1,6 @@
 descendants <- function(node, G, topo) {
-  de.ind <- unique(unlist(neighborhood(G, order = vcount(G), nodes = node, mode = "out")))
-  de <- V(G)[de.ind]$name
+  de.ind <- unique(unlist(igraph::neighborhood(G, order = igraph::vcount(G), nodes = node, mode = "out")))
+  de <- igraph::V(G)[de.ind]$name
   de <- de %ts% topo
   return(de)
 }
diff --git a/R/descendent.sets.R b/R/descendent.sets.R
@@ -15,13 +15,12 @@ descendent.sets <- function(node, s, G.s.obs, topo) {
   desc <- desc[!vapply(desc, is.null, logical(1))]
   n.desc <- length(desc)
   if (n.desc > 0) {
-    desc.pow <- powerset(1:n.desc, nonempty = TRUE)
+    desc.pow <- powerset(1:n.desc)[-1]
     n.sets <- 2^n.desc - 1
     D <- vector(mode = "list", length = n.sets)
     for (i in 1:n.sets) {
       D[[i]] <- Reduce(union, desc[desc.pow[[i]]])
     }
-    # cat("Descendant sets of ", s, " not containing ", node, " are" , as.character(unique(D)), "\n")
     return(unique(D))
   }
   return(list())

diff --git a/R/exclusion.restrictions.R b/R/exclusion.restrictions.R
@@ -1,12 +1,12 @@
 exclusion.restrictions <- function(G) {
   G.obs <- observed.graph(G)
-  topo <- topological.sort(G.obs)
-  v <- get.vertex.attribute(G, "name")[topo]
+  topo <- igraph::topological.sort(G.obs)
+  v <- igraph::get.vertex.attribute(G, "name")[topo]
   ex <- lapply(v, function(y) {
     pa <- setdiff(parents(y, G.obs, topo), y)
     Z <- setdiff(v, union(y, pa))
     if (length(Z) > 0) {
-      Z.pow <- powerset(setdiff(v, union(y, pa)), nonempty = TRUE)
+      Z.pow <- powerset(setdiff(v, union(y, pa)))[-1]
       return(list(pa = pa, Z = Z.pow))
     } else return(NULL)
   })

diff --git a/R/gather.R b/R/gather.R
diff --git a/R/generalize.R b/R/generalize.R
@@ -1,21 +1,21 @@
 generalize <- function(y, x, Z, D, expr = TRUE, simp = FALSE, steps = FALSE, primes = FALSE, stop_on_nonid = TRUE) {
   d <- length(D)
   z <- length(Z)
-  v <- get.vertex.attribute(D[[1]], "name")
-  s <- v[which(vertex.attributes(D[[1]])$description == "S")]
+  v <- igraph::get.vertex.attribute(D[[1]], "name")
+  s <- v[which(igraph::vertex.attributes(D[[1]])$description == "S")]
   if (length(s) > 0) stop("The causal diagram cannot contain selection variables.")
   if (d != z) stop("Number of available experiments does not match number of domains.")
   if (length(intersect(x, y)) > 0) stop("Sets 'x' and 'y' are not disjoint.")
-  topo <- lapply(D, function(k) topological.sort(observed.graph(k)))
-  topo <- lapply(1:d, function(k) get.vertex.attribute(D[[k]], "name")[topo[[k]]])
+  topo <- lapply(D, function(k) igraph::topological.sort(observed.graph(k)))
+  topo <- lapply(1:d, function(k) igraph::get.vertex.attribute(D[[k]], "name")[topo[[k]]])
   D <- lapply(D, function(k) {
-    if (length(edge.attributes(k)) == 0) {
-      k <- set.edge.attribute(k, "description", 1:length(E(k)), NA)
+    if (length(igraph::edge.attributes(k)) == 0) {
+      k <- igraph::set.edge.attribute(k, "description", 1:length(igraph::E(k)), NA)
     }
     return(k)
   })
   for (i in 1:d) {
-    if (!is.dag(observed.graph(D[[i]]))) {
+    if (!igraph::is.dag(observed.graph(D[[i]]))) {
       if (i > 1) stop("Selection diagram 'D[", i, "]' is not a DAG.")
       else stop("Causal diagram 'D[", i, "]' is not a DAG.")
     }