Skip to content

Commit

Permalink
Fixed bug in DiskDGraph loading
Browse files Browse the repository at this point in the history
  • Loading branch information
pbloem committed Aug 23, 2016
1 parent f80beb1 commit 4e90420
Show file tree
Hide file tree
Showing 7 changed files with 88,431 additions and 42 deletions.
11 changes: 6 additions & 5 deletions nodes/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,12 @@
<artifactId>mapdb</artifactId>
<version>3.0.1</version>
</dependency>
<dependency>
<groupId>com.github.pbloem</groupId>
<artifactId>externalsortinginjava</artifactId>
<version>v0.1.0</version>
</dependency>

<dependency>
<groupId>com.github.pbloem</groupId>
<artifactId>externalsortinginjava</artifactId>
<version>v0.1.0</version>
</dependency>
</dependencies>

<repositories>
Expand Down
48 changes: 29 additions & 19 deletions nodes/src/main/java/org/nodes/DiskDGraph.java
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ public DiskDGraph(File dir)
*/
public DiskDGraph(File dir, boolean nullLabels)
{
this.nullLabels = nullLabels;

dir.mkdirs();
File dbFile = new File(dir, "graph."+id+".db");

Expand Down Expand Up @@ -523,11 +525,11 @@ public List<DLink<String>> links()

List<DLink<String>> list = new ArrayList<DLink<String>>(degree());

for(int neighbor : out.get(index))
for(int neighbor : out.get((int)index))
list.add(new DiskDLink(index, neighbor));

for(int neighbor : in.get(index))
if(neighbor != index) // no double reflexive links
for(int neighbor : in.get((int)index))
if(neighbor != ((int)index)) // no double reflexive links
list.add(new DiskDLink(neighbor, index));

return list;
Expand All @@ -539,7 +541,7 @@ public List<DLink<String>> linksOut()
check();

List<DLink<String>> list = new ArrayList<DLink<String>>(outDegree());
for(int neighbor : out.get(index))
for(int neighbor : out.get((int)index))
list.add(new DiskDLink(index, neighbor));

return list;
Expand All @@ -551,7 +553,7 @@ public List<DLink<String>> linksIn()
check();

List<DLink<String>> list = new ArrayList<DLink<String>>(inDegree());
for(int neighbor : in.get(index))
for(int neighbor : in.get((int)index))
list.add(new DiskDLink(neighbor, index));

return list;
Expand All @@ -565,12 +567,12 @@ public Collection<? extends DLink<String>> links(Node<String> other)
List<DLink<String>> list = new ArrayList<DLink<String>>();

int o = other.index();
for(int neighbor : out.get(index))
for(int neighbor : out.get((int)index))
if(neighbor == o)
list.add(new DiskDLink(index, neighbor));

if(index != o)
for(int neighbor : in.get(index))
for(int neighbor : in.get((int)index))
if(neighbor == o)
list.add(new DiskDLink(neighbor, index));

Expand All @@ -585,8 +587,8 @@ public Collection<? extends DLink<String>> linksOut(DNode<String> other)
List<DLink<String>> list = new ArrayList<DLink<String>>(outDegree());

int o = other.index();
for(int neighbor : out.get(index))
if(neighbor == o)
for(int neighbor : out.get((int)index))
if(((int)neighbor) == ((int)o))
list.add(new DiskDLink(index, neighbor));

return list;
Expand All @@ -600,7 +602,7 @@ public Collection<? extends DLink<String>> linksIn(DNode<String> other)
List<DLink<String>> list = new ArrayList<DLink<String>>(inDegree());

int o = other.index();
for(int neighbor : in.get(index))
for(int neighbor : in.get((int)index))
if(neighbor == o)
list.add(new DiskDLink(neighbor, index));

Expand All @@ -612,7 +614,6 @@ private class DiskDLink implements DLink<String>
{
private DNode<String> from, to;


private long nodeModState = nodeModCount;

private boolean dead = false;
Expand Down Expand Up @@ -901,7 +902,7 @@ private void read()
public DNode<String> add(String label)
{

if(!nullLabels && label != null)
if(nullLabels && label != null)
throw new IllegalArgumentException("Graph is set to null labels only.");

if(! nullLabels)
Expand Down Expand Up @@ -1169,11 +1170,12 @@ public static DiskDGraph fromFile(File file, File dir)
// * sort the input file by first element
File forward = new File(dir, "forward.edgelist");

ExternalSort.mergeSortedFiles(
ExternalSort.sortInBatch(

List<File> files = ExternalSort.sortInBatch(
file,
new LComp(true), ExternalSort.DEFAULTMAXTEMPFILES,
Charset.defaultCharset(), dir, false), forward);
Charset.defaultCharset(), dir, false);
ExternalSort.mergeSortedFiles(files, forward, new LComp(true), Charset.defaultCharset());

System.out.println("Forward sort finished");

Expand All @@ -1184,11 +1186,11 @@ public static DiskDGraph fromFile(File file, File dir)
forward.delete();
File backward = new File(dir, "backward.edgelist");

ExternalSort.mergeSortedFiles(
ExternalSort.sortInBatch(
files = ExternalSort.sortInBatch(
file,
new LComp(false), ExternalSort.DEFAULTMAXTEMPFILES,
Charset.defaultCharset(), dir, false), backward);
Charset.defaultCharset(), dir, false);
ExternalSort.mergeSortedFiles(files, backward, new LComp(false), Charset.defaultCharset());

System.out.println("Backward sort finished");

Expand Down Expand Up @@ -1272,9 +1274,17 @@ private static long readSorted(List<List<Integer>> list, File file, boolean forw

if(a != (int) current)
{
list.add(neighbors);
try {
list.add(neighbors);
} catch(AssertionError e)
{
throw new AssertionError("Failed to add list to IndexTreeList. current list size: "+list.size()+", list to be added "+neighbors);
}
neighbors.clear();

if(a < list.size())
throw new IllegalStateException("Next index is "+a+", while list size is already " + list.size() + ". It seems like the sorting ot the file went wrong.");

while(list.size() < a)
list.add(Collections.EMPTY_LIST);

Expand Down
20 changes: 20 additions & 0 deletions nodes/src/main/java/org/nodes/data/Examples.java
Original file line number Diff line number Diff line change
Expand Up @@ -148,4 +148,24 @@ public static UGraph<String> citations()
throw new RuntimeException("Could not load the file for the citations graph from the classpath.", e);
}
}

/**
* A snapshot of part of the Gnutella P2P network. Source:
* http://snap.stanford.edu/data/p2p-Gnutella30.html
*
* @return
*/
public static DGraph<String> p2p()
{
ClassLoader classLoader = Examples.class.getClassLoader();
File file = new File(classLoader.getResource("graphs/p2p/p2p.txt").getFile());

try
{
return Data.edgeListDirected(file);
} catch (IOException e)
{
throw new RuntimeException("Could not load the file for the P2P graph from the classpath.", e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public List<Integer> generate()

result.clear();
result.add(Global.random().nextInt(graph.size()));

restarts ++;
if(restarts > RESTARTS)
checkLCC(depth);
Expand Down
60 changes: 42 additions & 18 deletions nodes/src/test/java/org/nodes/DiskDGraphTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.junit.Test;
import org.nodes.data.Data;
import org.nodes.data.Examples;
import org.nodes.random.RandomGraphs;
import org.omg.Messaging.SyncScopeHelper;

import nl.peterbloem.kit.FileIO;
Expand Down Expand Up @@ -279,34 +280,38 @@ public void testImportBig()
System.out.println(graph.numLinks());
}


/**
* Run with low heap space...
*
* @throws IOException
*/
@Test
public void testImport()
throws IOException
{
Global.randomSeed();

FileIO.copy("graphs/citations/citations.txt", DIR);
FileIO.copy("graphs/p2p/p2p.txt", DIR);

DGraph<String> diskGraph = DiskDGraph.fromFile(new File(DIR, "citations.txt"), DIR);
DGraph<String> diskGraph = DiskDGraph.fromFile(new File(DIR, "p2p.txt"), DIR);
assertEquals(diskGraph.size(), new ArrayList<DNode<String>>(diskGraph.nodes()).size());
assertEquals(diskGraph.numLinks(), new ArrayList<DLink<String>>(diskGraph.links()).size());

DGraph<String> memGraph = Data.edgeListDirectedUnlabeled(new File(DIR, "citations.txt"), true);

// diskGraph = LightDGraph.copy(diskGraph);

for(int i : series(memGraph.size()))
{
String a = diskGraph.get(i).out() + " " + diskGraph.get(i).in();
String b = memGraph.get(i).out() + " " + memGraph.get(i).in();

if(! a.equals(b))
{
System.out.println("d " + a);
System.out.println("m " + b);
}
DGraph<String> memGraph = Data.edgeListDirectedUnlabeled(new File(DIR, "p2p.txt"), true);

}
// for(int i : series(memGraph.size()))
// {
// String a = diskGraph.get(i).out() + " " + diskGraph.get(i).in();
//
// String b = memGraph.get(i).out() + " " + memGraph.get(i).in();
//
// if(! a.equals(b))
// {
// System.out.println("d " + a);
// System.out.println("m " + b);
// }
// }

assertEquals(memGraph, diskGraph);
}
Expand Down Expand Up @@ -578,7 +583,26 @@ public void testNeighborsFast()
}
}

@After
/**
* Test subgraph extraction
*
*/
@Test
public void testJBC()
{
DGraph<String> graph = Graphs.jbcDirected();
graph = DiskDGraph.copy(graph, DIR);

List<Integer> nodes = Arrays.asList(13, 15, 16);

DGraph<String> subgraph = Subgraph.dSubgraphIndices(graph, nodes);
System.out.println(subgraph);

assertEquals(3, subgraph.size());
assertEquals(2, subgraph.numLinks());
}

// @After
public void cleanup()
{
for(File file : DIR.listFiles())
Expand Down
1 change: 1 addition & 0 deletions nodes/src/test/resources/graphs/p2p/metadata.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
http://snap.stanford.edu/data/p2p-Gnutella30.html
Loading

0 comments on commit 4e90420

Please sign in to comment.