[SOLVED] Parsing GEXF in PHP

All questions about the GEXF (see http://gexf.net before)
Post Reply [phpBB Debug] PHP Warning: in file [ROOT]/vendor/twig/twig/lib/Twig/Extension/Core.php on line 1275: count(): Parameter must be an array or an object that implements Countable
elijah
Gephi Community Support
Posts:169
Joined:11 Sep 2010 18:09
Location:Stanford, CA
Contact:
[SOLVED] Parsing GEXF in PHP

Post by elijah » 05 Jun 2011 04:22

I'm using PHP to translate a .gexf output to JSON to look into representation of Gephi output in Protovis/D3 but I'm having a bear of a time accessing the viz: attributes of a node. For some reason, when I check the children of node, I see "attvalues" but I don't see viz:size, viz:position or viz:color. I'm just using simplexml_load_file so if someone has a better process or a built parser, let me know.

Of course, the real thing to do is build a Protovis/D3 exporter (unfortunately, it doesn't parse id values for source/target but instead treats the id value as the order of the node in the JSON) which I suppose I should do.

User avatar
jacomyma
Gephi Core Developer
Posts:61
Joined:09 Feb 2010 23:23
Contact:

Re: Parsing GEXF in PHP

Post by jacomyma » 05 Jun 2011 07:29

I had a similar need and I used DOMXPath that works well. You have to register the namespaces with the xPath object:

Code: Select all

$xp->registerNamespace('gexf', 'http://www.gexf.net/1.1draft');
$xp->registerNamespace('viz', 'http:///www.gexf.net/1.1draft/viz');
After that, queries give you all the attributes you want, but you have to explicit the namespaces:

Code: Select all

$nodesattributes = $xp->query("//gexf:graph[1]/gexf:attributes[@class='node']/gexf:attribute");
Here is all my code (stores the gexf in a database). I don't claim it's clean or well done, but it works and could help you...
NB: dbh is the database handler.

Code: Select all

<?php

# GEXF
$gexfDom = new DomDocument();
$gexfDom->load($file->get('path'));
$xp = new DOMXPath($gexfDom);

if($gexfDom->getElementsByTagName('gexf')->item(0)->getAttribute('xmlns') == "http://www.gephi.org/gexf"){
	# 'Old style' GEXF
	$xp->registerNamespace('gexf', 'http://www.gephi.org/gexf');
	$xp->registerNamespace('viz', 'http://www.gephi.org/gexf/viz');
	
	$this->dbh->beginTransaction();
	# Nodes table
	$nodesQuery = "CREATE TABLE nodes (id INTEGER PRIMARY KEY AUTOINCREMENT, importId TEXT, label TEXT, x REAL, y REAL, size REAL";
	# Nodes Attributes
	$nodesattributes = $xp->query("//gexf:graph[1]/gexf:attributes[@class='node']/gexf:attribute");
	$existingNames = array("id", "importId", "label", "x", "y", "size");
	foreach($nodesattributes as $attribute){
		$name = $attribute->getAttribute('id');
		$name = preg_replace('/[^a-z0-9_]*/i','',$name);
		while(in_array($name, $existingNames)){
			$name = "old_$name";
		}
		$existingNames[] = $name;
		$name = $this->dbh->quote($name);
		$title = $this->dbh->quote($attribute->getAttribute('title'));
		$default = $this->dbh->quote($attribute->getAttribute('default'));
		$truetype = strtolower($attribute->getAttribute('type'));
		if($truetype == "integer" || $truetype == "long"){
			$type = "INTEGER";
			$nodesQuery .= ", $name $type DEFAULT $default";
		} else if($truetype == "double" || $truetype == "float"){
			$type = "REAL";
			$nodesQuery .= ", $name $type DEFAULT $default";
		} else if($truetype == "boolean" || $truetype == "string" || $truetype == "anyuri"){
			$type = "TEXT";
			$nodesQuery .= ", $name $type DEFAULT $default";
		} else if($truetype == "liststring"){
			$type = "TAGS";
		} else {
			die("Error (graph): unsupported attribute type ".strtolower($attribute->getAttribute('type')));
		}
		$truetype = $this->dbh->quote($truetype);
		$type = $this->dbh->quote($type);
		$this->dbh->exec(utf8_decode("INSERT INTO nodesattributes ('name', 'title', 'truetype', 'type', 'defaultvalue', 'viztype') VALUES ($name, $title, $truetype, $type, $default, 'unpublished')"));
	}
	$nodesQuery .= ")";
	$this->dbh->exec(utf8_decode($nodesQuery));
	$this->dbh->commit();

	$this->dbh->beginTransaction();
	# Edges table
	$edgesQuery = "CREATE TABLE edges (id INTEGER PRIMARY KEY AUTOINCREMENT, importId TEXT, source INTEGER, target INTEGER";
	# Edges Attributes
	$edgesattributes = $xp->query("//gexf:graph[1]/gexf:attributes[@class='edge']/gexf:attribute");
	$existingNames = array("id", "importId", "source", "target");
	foreach($edgesattributes as $attribute){
		$name = $attribute->getAttribute('id');
		$name = preg_replace('/[^a-z0-9_]*/i','',$name);
		while(in_array($name, $existingNames)){
			$name = "old_$name";
		}
		$existingNames[] = $name;
		$name = $this->dbh->quote($name);
		$title = $this->dbh->quote($attribute->getAttribute('title'));
		$default = $this->dbh->quote($attribute->getAttribute('default'));
		$truetype = strtolower($attribute->getAttribute('type'));
		if($truetype == "integer" || $truetype == "long"){
			$type = "INTEGER";
			$edgesQuery .= ", $name $type DEFAULT $default";
		} else if($truetype == "double" || $truetype == "float"){
			$type = "REAL";
			$edgesQuery .= ", $name $type DEFAULT $default";
		} else if($truetype == "boolean" || $truetype == "string" || $truetype == "anyuri"){
			$type = "TEXT";
			$edgesQuery .= ", $name $type DEFAULT $default";
		} else if($truetype == "liststring"){
			$type = "TAGS";
		} else {
			die("Error (graph): unsupported attribute type ".strtolower($attribute->getAttribute('type')));
		}
		$truetype = $this->dbh->quote($truetype);
		$type = $this->dbh->quote($type);
		$this->dbh->exec(utf8_decode("INSERT INTO 'edgesattributes' ('name', 'title', 'truetype', 'type', 'defaultvalue', 'viztype') VALUES ($name, $title, $truetype, $type, $default, 'unpublished')"));
	}
	$edgesQuery .= ")";
	$this->dbh->exec(utf8_decode($edgesQuery));
	$this->dbh->commit();
	
	$this->loadAttributes();
	$tagQueries = array();
	
	# Fill nodes table
	$this->dbh->beginTransaction();
	$nodes = $xp->query("//gexf:graph[1]/gexf:nodes/gexf:node");
	foreach($nodes as $node){
		$query_insert = "INSERT INTO nodes ('importid', 'label', 'x', 'y', 'size'";
		$importid = $this->dbh->quote($node->getAttribute('id'));
		$label = $this->dbh->quote($node->getAttribute('label'));
		$x = rand(-100, 100);
		$y = rand(-100, 100);
		$size = 1;
		if($positionElement = $xp->query("//gexf:graph[1]/gexf:nodes/gexf:node[@id='".$node->getAttribute('id')."']/viz:position[1]")){
			$x = $this->dbh->quote($positionElement->item(0)->getAttribute('x'));
			$y = $this->dbh->quote($positionElement->item(0)->getAttribute('y'));
		}
		if($sizeElement = $xp->query("//gexf:graph[1]/gexf:nodes/gexf:node[@id='".$node->getAttribute('id')."']/viz:size[1]")){
			$size = $this->dbh->quote($sizeElement->item(0)->getAttribute('value'));
		}
		$query_values = "VALUES ($importid, $label, $x, $y, $size";
		if($node->getElementsByTagName('attvalues')->length > 0 && $node->getElementsByTagName('attvalues')->item(0)->getElementsByTagName('attvalue')->length > 0){
			foreach($node->getElementsByTagName('attvalues')->item(0)->getElementsByTagName('attvalue') as $value){
				$attId = $value->getAttribute('id') + 1;
				$attValue = $this->dbh->quote($value->getAttribute('value'));
				$att = $this->getNodeAttributeById($attId);
				$attName = $this->dbh->quote($att['name']);
				if($att['type'] == 'TAGS'){
					$tags = explode("|", $value->getAttribute('value'));
					foreach($tags as $tag){
						$tag = trim($tag);
						if($tag != ""){
							$tag = $this->dbh->quote($tag);
							$tagQueries[] = "INSERT INTO nodestags (attribute, node, value) SELECT $attId, n.id, $tag FROM nodes n WHERE n.importId=$importid";
						}
					}
				} else {
					$query_insert .= ", $attName";
					$query_values .= ", $attValue";
				}
			}
		}
		$this->dbh->exec(utf8_decode("$query_insert ) $query_values )"));
	}
	$this->dbh->commit();
	
	# Fill edges table
	$this->dbh->beginTransaction();
	$edges = $xp->query("//gexf:graph[1]/gexf:edges/gexf:edge");
	foreach($edges as $edge){
		$query_insert = "INSERT INTO 'edges' ('importid', 'source', 'target'";
		$importid = $this->dbh->quote($edge->getAttribute('id'));
		$source = $this->dbh->quote($edge->getAttribute('source'));
		$target = $this->dbh->quote($edge->getAttribute('target'));
		$query_select = "SELECT $importid, sourcenodes.id, targetnodes.id";
		$query_from = " FROM nodes sourcenodes, nodes targetnodes WHERE sourcenodes.importId=$source AND targetnodes.importId=$target";
		if($edge->getElementsByTagName('attvalues')->length > 0 && $edge->getElementsByTagName('attvalues')->item(0)->getElementsByTagName('attvalue')->length > 0){
			foreach($edge->getElementsByTagName('attvalues')->item(0)->getElementsByTagName('attvalue') as $value){
				$attId = $value->getAttribute('id') + 1;
				$attValue = $this->dbh->quote($value->getAttribute('value'));
				$att = $this->getNodeAttributeById($attId);
				$attName = $this->dbh->quote($att['name']);
				if($att['type'] == 'TAGS'){
					$tags = explode("|", $value->getAttribute('value'));
					foreach($tags as $tag){
						$tag = trim($tag);
						if($tag != ""){
							$tag = $this->dbh->quote($tag);
							$tagQueries[] = "INSERT INTO 'edgestags' (attribute, edge, value) SELECT $attId, e.id, $tag FROM edges e WHERE e.importId=$importid";
						}
					}
				} else {
					$query_insert .= ", $attName";
					$query_values .= ", $attValue";
				}
			}
		}
		$this->dbh->exec(utf8_decode("$query_insert ) $query_select $query_from"));
	}
	$this->dbh->commit();
	
	# Execute the tag queries
	$this->dbh->beginTransaction();
	foreach($tagQueries as $query){
		$this->dbh->exec(utf8_decode($query));
	}
	$this->dbh->commit();
	
	
	
} else if($gexfDom->getElementsByTagName('gexf')->item(0)->getAttribute('xmlns') == "http://www.gexf.net/1.1draft"){
	# 1.1 draft GEXF
	$xp->registerNamespace('gexf', 'http://www.gexf.net/1.1draft');
	$xp->registerNamespace('viz', 'http:///www.gexf.net/1.1draft/viz');
	
	$this->dbh->beginTransaction();
	# Nodes table
	$nodesQuery = "CREATE TABLE nodes (id INTEGER PRIMARY KEY AUTOINCREMENT, importid TEXT, label TEXT, x REAL, y REAL, size REAL";
	# Nodes Attributes
	$nodesattributes = $xp->query("//gexf:graph[1]/gexf:attributes[@class='node']/gexf:attribute");
	$existingNames = array("id", "importid", "label", "x", "y", "size");
	foreach($nodesattributes as $attribute){
		$name = "attr_".$attribute->getAttribute('id');
		$name = preg_replace('/[^a-z0-9_]*/i','',$name);
		while(in_array($name, $existingNames)){
			$name = "old_$name";
		}
		$existingNames[] = $name;
		$name = $this->dbh->quote($name);
		$title = $this->dbh->quote($attribute->getAttribute('title'));
		$default = $this->dbh->quote($attribute->getAttribute('default'));
		$truetype = strtolower($attribute->getAttribute('type'));
		if($truetype == "int" || $truetype == "integer" || $truetype == "long"){
			$type = "INTEGER";
			$nodesQuery .= ", $name $type DEFAULT $default";
		} else if($truetype == "double" || $truetype == "float"){
			$type = "REAL";
			$nodesQuery .= ", $name $type DEFAULT $default";
		} else if($truetype == "boolean" || $truetype == "string" || $truetype == "anyuri"){
			$type = "TEXT";
			$nodesQuery .= ", $name $type DEFAULT $default";
		} else if($truetype == "liststring"){
			$type = "TAGS";
		} else {
			die("Error (graph): unsupported attribute type ".strtolower($attribute->getAttribute('type')));
		}
		$truetype = $this->dbh->quote($truetype);
		$type = $this->dbh->quote($type);
		$this->dbh->exec(utf8_decode("INSERT INTO nodesattributes ('name', 'title', 'truetype', 'type', 'defaultvalue', 'viztype') VALUES ($name, $title, $truetype, $type, $default, 'unpublished')"));
	}
	$nodesQuery .= ")";
	$this->dbh->exec(utf8_decode($nodesQuery));
	$this->dbh->commit();

	$this->dbh->beginTransaction();
	# Edges table
	$edgesQuery = "CREATE TABLE edges (id INTEGER PRIMARY KEY AUTOINCREMENT, importid TEXT, source INTEGER, target INTEGER";
	# Edges Attributes
	$edgesattributes = $xp->query("//gexf:graph[1]/gexf:attributes[@class='edge']/gexf:attribute");
	$existingNames = array("id", "importid", "source", "target", "weight");
	foreach($edgesattributes as $attribute){
		$name = "attr_".$attribute->getAttribute('id');
		$name = preg_replace('/[^a-z0-9_]*/i','',$name);
		while(in_array($name, $existingNames)){
			$name = "old_$name";
		}
		$existingNames[] = $name;
		$name = $this->dbh->quote($name);
		$title = $this->dbh->quote($attribute->getAttribute('title'));
		$default = $this->dbh->quote($attribute->getAttribute('default'));
		$truetype = strtolower($attribute->getAttribute('type'));
		if($truetype == "int" || $truetype == "integer" || $truetype == "long"){
			$type = "INTEGER";
			$edgesQuery .= ", $name $type DEFAULT $default";
		} else if($truetype == "double" || $truetype == "float"){
			$type = "REAL";
			$edgesQuery .= ", $name $type DEFAULT $default";
		} else if($truetype == "boolean" || $truetype == "string" || $truetype == "anyuri"){
			$type = "TEXT";
			$edgesQuery .= ", $name $type DEFAULT $default";
		} else if($truetype == "liststring"){
			$type = "TAGS";
		} else {
			die("Error (graph): unsupported attribute type ".strtolower($attribute->getAttribute('type')));
		}
		$truetype = $this->dbh->quote($truetype);
		$type = $this->dbh->quote($type);
		$this->dbh->exec(utf8_decode("INSERT INTO 'edgesattributes' ('name', 'title', 'truetype', 'type', 'defaultvalue', 'viztype') VALUES ($name, $title, $truetype, $type, $default, 'unpublished')"));
	}
	$edgesQuery .= ")";
	$this->dbh->exec(utf8_decode($edgesQuery));
	$this->dbh->commit();
	
	$this->loadAttributes();
	$tagQueries = array();
	
	# Fill nodes table
	$this->dbh->beginTransaction();
	$nodes = $xp->query("//gexf:graph[1]/gexf:nodes/gexf:node");
	foreach($nodes as $node){
		$query_insert = "INSERT INTO nodes ('importid', 'label', 'x', 'y', 'size'";
		$importid = $this->dbh->quote($node->getAttribute('id'));
		$label = $this->dbh->quote($node->getAttribute('label'));
		$x = rand(-100, 100);
		$y = rand(-100, 100);
		$size = 1;
		if($positionElement = $xp->query("//gexf:graph[1]/gexf:nodes/gexf:node[@id='".$node->getAttribute('id')."']/viz:position[1]")){
			$x = $this->dbh->quote($positionElement->item(0)->getAttribute('x'));
			$y = $this->dbh->quote($positionElement->item(0)->getAttribute('y'));
		}
		if($sizeElement = $xp->query("//gexf:graph[1]/gexf:nodes/gexf:node[@id='".$node->getAttribute('id')."']/viz:size[1]")){
			$size = $this->dbh->quote($sizeElement->item(0)->getAttribute('value'));
		}
		$query_values = "VALUES ($importid, $label, $x, $y, $size";
		
		foreach($node->getElementsByTagName('attvalue') as $value){
			$attName = "attr_".preg_replace('/[^a-z0-9_]*/i','',$value->getAttribute('for'));
			$attValue = $this->dbh->quote($value->getAttribute('value'));
			$att = $this->getNodeAttributeByName($attName);
			$attName = $this->dbh->quote($att['name']);
			if($att['type'] == 'TAGS'){
				$tags = explode("|", $value->getAttribute('value'));
				foreach($tags as $tag){
					$tag = $this->dbh->quote($tag);
					$tagQueries[] = "INSERT INTO nodestags (attribute, node, value) SELECT a.id, n.id, $tag FROM nodes n, nodesattributes a WHERE n.importid=$importid AND a.name=$attName";
				}
			} else {
				$query_insert .= ", $attName";
				$query_values .= ", $attValue";
			}
		}
		$this->dbh->exec(utf8_decode("$query_insert ) $query_values )"));
	}
	$this->dbh->commit();
	
	# Fill edges table
	$this->dbh->beginTransaction();
	$edges = $xp->query("//gexf:graph[1]/gexf:edges/gexf:edge");
	foreach($edges as $edge){
		$query_insert = "INSERT INTO 'edges' ('importid', 'source', 'target'";
		$importid = $this->dbh->quote($edge->getAttribute('id'));
		$source = $this->dbh->quote($edge->getAttribute('source'));
		$target = $this->dbh->quote($edge->getAttribute('target'));
		$query_select = "SELECT $importid, sourcenodes.id, targetnodes.id";
		$query_from = " FROM nodes sourcenodes, nodes targetnodes WHERE sourcenodes.importid=$source AND targetnodes.importid=$target";
		
		foreach($edge->getElementsByTagName('attvalue') as $value){
			$attName = "attr_".preg_replace('/[^a-z0-9_]*/i','',$value->getAttribute('for'));
			$attValue = $this->dbh->quote($value->getAttribute('value'));
			$att = $this->getEdgeAttributeByName($attName);
			$attName = $this->dbh->quote($att['name']);
			if($attName != "''"){
				if($att['type'] == 'TAGS'){
					$tags = explode("|", $attValue);
					foreach($tags as $tag){
						$tag = $this->dbh->quote($tag);
						$tagQueries[] = "INSERT INTO 'edgestags' (attribute, node, value) SELECT $attId, e.id, $tag FROM edges e WHERE e.importid=$importid";
					}
				} else {
					$query_insert .= ", $attName";
					$query_select .= ", $attValue";
				}
			}
		}
		$this->dbh->exec(utf8_decode("$query_insert ) $query_select $query_from"));
	}
	$this->dbh->commit();
	
	# Execute the tag queries
	$this->dbh->beginTransaction();
	foreach($tagQueries as $query){
		$this->dbh->exec(utf8_decode($query));
	}
	$this->dbh->commit();
} else {
	die("unsupported version of GEXF format");
}

Post Reply
[phpBB Debug] PHP Warning: in file [ROOT]/vendor/twig/twig/lib/Twig/Extension/Core.php on line 1275: count(): Parameter must be an array or an object that implements Countable
[phpBB Debug] PHP Warning: in file [ROOT]/vendor/twig/twig/lib/Twig/Extension/Core.php on line 1275: count(): Parameter must be an array or an object that implements Countable