Skip to content
Snippets Groups Projects
Commit 2a1b1ca5 authored by manxilin's avatar manxilin
Browse files

init update

parents
Branches
No related tags found
No related merge requests found
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# editor
*.sw?
# data
figs/
results/
tmp/
log/
runs/
vis/
data/
data/REDDIT-MULTI-12K/
data/COLLAB
data/PROTEINS
data/PROTEINS_full
data/NCI1
## Author
Manxi Lin s192230
Mengge Hu s192113
Guangya Shen s200104
## Data set
https://drive.google.com/file/d/1nTM9c4HgIeb6iFauLQABuGjqDGpc43iv/view?usp=sharing
Unzip it in here
## Check our main result
- See `main.ipynb`
- Proof of our result: `./screenshots`
\ No newline at end of file
import torch
import torch.nn as nn
from torch.autograd import Variable
import random
"""
Set of modules for aggregating embeddings of neighbors.
"""
class MeanAggregator(nn.Module):
"""
Aggregates a node's embeddings using mean of neighbors' embeddings
"""
def __init__(self, features, cuda=False, gcn=False):
"""
Initializes the aggregator for a specific graph.
features -- function mapping LongTensor of node ids to FloatTensor of feature values.
cuda -- whether to use GPU
gcn --- whether to perform concatenation GraphSAGE-style, or add self-loops GCN-style
"""
super(MeanAggregator, self).__init__()
self.features = features
self.cuda = cuda
self.gcn = gcn
def forward(self, nodes, to_neighs, num_sample=10):
"""
nodes --- list of nodes in a batch
to_neighs --- list of sets, each set is the set of neighbors for node in batch
num_sample --- number of neighbors to sample. No sampling if None.
"""
# Local pointers to functions (speed hack)
_set = set
if not num_sample is None:
_sample = random.sample
samp_neighs = [_set(_sample(to_neigh,
num_sample,
)) if len(to_neigh) >= num_sample else to_neigh for to_neigh in to_neighs]
else:
samp_neighs = to_neighs
if self.gcn:
samp_neighs = [samp_neigh + set([nodes[i]]) for i, samp_neigh in enumerate(samp_neighs)]
unique_nodes_list = list(set.union(*samp_neighs))
unique_nodes = {n:i for i,n in enumerate(unique_nodes_list)}
mask = Variable(torch.zeros(len(samp_neighs), len(unique_nodes))) # (n x n')
column_indices = [unique_nodes[n] for samp_neigh in samp_neighs for n in samp_neigh] # for each neigh, get index in unique_nodes_list
row_indices = [i for i in range(len(samp_neighs)) for j in range(len(samp_neighs[i]))] # node index
mask[row_indices, column_indices] = 1 # adjacent matrix
if self.cuda:
mask = mask.cuda()
num_neigh = mask.sum(1, keepdim=True) # sum for each node (n x 1)
mask = mask.div(num_neigh)
if self.cuda:
embed_matrix = self.features(torch.LongTensor(unique_nodes_list).cuda())
else:
embed_matrix = self.features(torch.LongTensor(unique_nodes_list))
to_feats = mask.mm(embed_matrix) # mean over each node (AF)
return to_feats
# ENZYMES
python -m train --datadir=data --bmname=ENZYMES --cuda=3 --max-nodes=100 --num-classes=6
# ENZYMES - Diffpool
python -m train --bmname=ENZYMES --assign-ratio=0.1 --hidden-dim=30 --output-dim=30 --cuda=1 --num-classes=6 --method=soft-assign
# DD
python -m train --datadir=data --bmname=DD --cuda=0 --max-nodes=500 --epochs=1000 --num-classes=2
# DD - Diffpool
python -m train --bmname=DD --assign-ratio=0.1 --hidden-dim=64 --output-dim=64 --cuda=1 --num-classes=2 --method=soft-assign
import networkx as nx
import numpy as np
import torch
import pickle
import random
from graph_sampler import GraphSampler
def prepare_val_data(graphs, args, val_idx, max_nodes=0):
random.shuffle(graphs)
val_size = int(np.ceil((len(graphs)*(1-args.train_ratio-args.test_ratio))))
train_graphs = graphs[:val_idx * val_size]
if val_idx < 9:
train_graphs = train_graphs + graphs[(val_idx+1) * val_size :]
val_graphs = graphs[val_idx*val_size: (val_idx+1)*val_size]
print('Num training graphs: ', len(train_graphs),
'; Num validation graphs: ', len(val_graphs))
print('Number of graphs: ', len(graphs))
print('Number of edges: ', sum([G.number_of_edges() for G in graphs]))
print('Max, avg, std of graph size: ',
max([G.number_of_nodes() for G in graphs]), ', '
"{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', '
"{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs])))
# minibatch
dataset_sampler = GraphSampler(train_graphs, normalize=False, max_num_nodes=max_nodes,
features=args.feature_type)
train_dataset_loader = torch.utils.data.DataLoader(
dataset_sampler,
batch_size=args.batch_size,
shuffle=True,
num_workers=args.num_workers)
dataset_sampler = GraphSampler(val_graphs, normalize=False, max_num_nodes=max_nodes,
features=args.feature_type)
val_dataset_loader = torch.utils.data.DataLoader(
dataset_sampler,
batch_size=args.batch_size,
shuffle=False,
num_workers=args.num_workers)
return train_dataset_loader, val_dataset_loader, \
dataset_sampler.max_num_nodes, dataset_sampler.feat_dim, dataset_sampler.assign_feat_dim
This diff is collapsed.
This diff is collapsed.
# ENZYMES
# python -m train --datadir=data --bmname=ENZYMES --cuda=0 --max-nodes=100 --num-classes=6
# ENZYMES - Diffpool
#python -m train --bmname=ENZYMES --assign-ratio=0.1 --hidden-dim=30 --output-dim=30 --cuda=0 --epochs=1 --num-classes=6 --method=soft-assign --dropout=0.8
# DD
# python -m train --datadir=data --bmname=DD --cuda=0 --max-nodes=500 --epochs=1000 --num-classes=2
# DD - Diffpool
#python -m train --bmname=DD --assign-ratio=0.1 --hidden-dim=64 --output-dim=64 --cuda=0 --num-classes=2 --method=soft-assign --epochs=1
!python -m train --bmname=DD --batch-size=30 \
--dropout=0.3 --assign-ratio=0.5 --unpool-ratio=0.5 \
--hidden-dim=64 --output-dim=64 --cuda=0 --num-classes=2 \
--method=soft-assign --num-pool=3 --epochs=100 --num-unpool=3 \
--weight-decay=0
\ No newline at end of file
import networkx as nx
import numpy as np
import random
import gen.feat as featgen
import util
def gen_ba(n_range, m_range, num_graphs, feature_generator=None):
graphs = []
for i in np.random.choice(n_range, num_graphs):
for j in np.random.choice(m_range, 1):
graphs.append(nx.barabasi_albert_graph(i,j))
if feature_generator is None:
feature_generator = ConstFeatureGen(0)
for G in graphs:
feature_generator.gen_node_features(G)
return graphs
def gen_er(n_range, p, num_graphs, feature_generator=None):
graphs = []
for i in np.random.choice(n_range, num_graphs):
graphs.append(nx.erdos_renyi_graph(i,p))
if feature_generator is None:
feature_generator = ConstFeatureGen(0)
for G in graphs:
feature_generator.gen_node_features(G)
return graphs
def gen_2community_ba(n_range, m_range, num_graphs, inter_prob, feature_generators):
''' Each community is a BA graph.
Args:
inter_prob: probability of one node connecting to any node in the other community.
'''
if feature_generators is None:
mu0 = np.zeros(10)
mu1 = np.ones(10)
sigma0 = np.ones(10, 10) * 0.1
sigma1 = np.ones(10, 10) * 0.1
fg0 = GaussianFeatureGen(mu0, sigma0)
fg1 = GaussianFeatureGen(mu1, sigma1)
else:
fg0 = feature_generators[0]
fg1 = feature_generators[1] if len(feature_generators) > 1 else feature_generators[0]
graphs1 = []
graphs2 = []
#for (i1, i2) in zip(np.random.choice(n_range, num_graphs),
# np.random.choice(n_range, num_graphs)):
# for (j1, j2) in zip(np.random.choice(m_range, num_graphs),
# np.random.choice(m_range, num_graphs)):
graphs0 = gen_ba(n_range, m_range, num_graphs, fg0)
graphs1 = gen_ba(n_range, m_range, num_graphs, fg1)
graphs = []
for i in range(num_graphs):
G = nx.disjoint_union(graphs0[i], graphs1[i])
n0 = graphs0[i].number_of_nodes()
for j in range(n0):
if np.random.rand() < inter_prob:
target = np.random.choice(G.number_of_nodes() - n0) + n0
G.add_edge(j, target)
graphs.append(G)
return graphs
def gen_2hier(num_graphs, num_clusters, n, m_range, inter_prob1, inter_prob2, feat_gen):
''' Each community is a BA graph.
Args:
inter_prob1: probability of one node connecting to any node in the other community within
the large cluster.
inter_prob2: probability of one node connecting to any node in the other community between
the large cluster.
'''
graphs = []
for i in range(num_graphs):
clusters2 = []
for j in range(len(num_clusters)):
clusters = gen_er(range(n, n+1), 0.5, num_clusters[j], feat_gen[0])
G = nx.disjoint_union_all(clusters)
for u1 in range(G.number_of_nodes()):
if np.random.rand() < inter_prob1:
target = np.random.choice(G.number_of_nodes() - n)
# move one cluster after to make sure it's not an intra-cluster edge
if target // n >= u1 // n:
target += n
G.add_edge(u1, target)
clusters2.append(G)
G = nx.disjoint_union_all(clusters2)
cluster_sizes_cum = np.cumsum([cluster2.number_of_nodes() for cluster2 in clusters2])
curr_cluster = 0
for u1 in range(G.number_of_nodes()):
if u1 >= cluster_sizes_cum[curr_cluster]:
curr_cluster += 1
if np.random.rand() < inter_prob2:
target = np.random.choice(G.number_of_nodes() -
clusters2[curr_cluster].number_of_nodes())
# move one cluster after to make sure it's not an intra-cluster edge
if curr_cluster == 0 or target >= cluster_sizes_cum[curr_cluster - 1]:
target += cluster_sizes_cum[curr_cluster]
G.add_edge(u1, target)
graphs.append(G)
return graphs
import abc
import networkx as nx
import numpy as np
import random
class FeatureGen(metaclass=abc.ABCMeta):
@abc.abstractmethod
def gen_node_features(self, G):
pass
class ConstFeatureGen(FeatureGen):
def __init__(self, val):
self.val = val
def gen_node_features(self, G):
feat_dict = {i:{'feat': self.val} for i in G.nodes()}
nx.set_node_attributes(G, feat_dict)
class GaussianFeatureGen(FeatureGen):
def __init__(self, mu, sigma):
self.mu = mu
self.sigma = sigma
def gen_node_features(self, G):
feat = np.random.multivariate_normal(mu, sigma, G.number_of_nodes())
feat_dict = {i:{'feat': feat[i]} for i in range(feat.shape[0])}
nx.set_node_attributes(G, feat_dict)
import networkx as nx
import numpy as np
import torch
import torch.utils.data
import util
class GraphSampler(torch.utils.data.Dataset):
''' Sample graphs and nodes in graph
'''
def __init__(self, G_list, features='default', normalize=True, assign_feat='default', max_num_nodes=0):
self.adj_all = []
self.len_all = []
self.feature_all = []
self.label_all = []
self.assign_feat_all = []
if max_num_nodes == 0:
self.max_num_nodes = max([G.number_of_nodes() for G in G_list])
else:
self.max_num_nodes = max_num_nodes
self.feat_dim = util.node_dict(G_list[0])[0]['feat'].shape[0]
for G in G_list:
adj = np.array(nx.to_numpy_matrix(G))
if normalize:
sqrt_deg = np.diag(1.0 / np.sqrt(np.sum(adj, axis=0, dtype=float).squeeze()))
adj = np.matmul(np.matmul(sqrt_deg, adj), sqrt_deg)
self.adj_all.append(adj)
self.len_all.append(G.number_of_nodes())
self.label_all.append(G.graph['label'])
# feat matrix: max_num_nodes x feat_dim
if features == 'default':
f = np.zeros((self.max_num_nodes, self.feat_dim), dtype=float)
for i,u in enumerate(G.nodes()):
f[i,:] = util.node_dict(G)[u]['feat']
self.feature_all.append(f)
elif features == 'id':
self.feature_all.append(np.identity(self.max_num_nodes))
elif features == 'deg-num':
degs = np.sum(np.array(adj), 1)
degs = np.expand_dims(np.pad(degs, [0, self.max_num_nodes - G.number_of_nodes()], 0),
axis=1)
self.feature_all.append(degs)
elif features == 'deg':
self.max_deg = 10
degs = np.sum(np.array(adj), 1).astype(int)
degs[degs>max_deg] = max_deg
feat = np.zeros((len(degs), self.max_deg + 1))
feat[np.arange(len(degs)), degs] = 1
feat = np.pad(feat, ((0, self.max_num_nodes - G.number_of_nodes()), (0, 0)),
'constant', constant_values=0)
f = np.zeros((self.max_num_nodes, self.feat_dim), dtype=float)
for i,u in enumerate(util.node_iter(G)):
f[i,:] = util.node_dict(G)[u]['feat']
feat = np.concatenate((feat, f), axis=1)
self.feature_all.append(feat)
elif features == 'struct':
self.max_deg = 10
degs = np.sum(np.array(adj), 1).astype(int)
degs[degs>10] = 10
feat = np.zeros((len(degs), self.max_deg + 1))
feat[np.arange(len(degs)), degs] = 1
degs = np.pad(feat, ((0, self.max_num_nodes - G.number_of_nodes()), (0, 0)),
'constant', constant_values=0)
clusterings = np.array(list(nx.clustering(G).values()))
clusterings = np.expand_dims(np.pad(clusterings,
[0, self.max_num_nodes - G.number_of_nodes()],
'constant'),
axis=1)
g_feat = np.hstack([degs, clusterings])
if 'feat' in util.node_dict(G)[0]:
node_feats = np.array([util.node_dict(G)[i]['feat'] for i in range(G.number_of_nodes())])
node_feats = np.pad(node_feats, ((0, self.max_num_nodes - G.number_of_nodes()), (0, 0)),
'constant')
g_feat = np.hstack([g_feat, node_feats])
self.feature_all.append(g_feat)
if assign_feat == 'id':
self.assign_feat_all.append(
np.hstack((np.identity(self.max_num_nodes), self.feature_all[-1])) )
else:
self.assign_feat_all.append(self.feature_all[-1])
self.feat_dim = self.feature_all[0].shape[1]
self.assign_feat_dim = self.assign_feat_all[0].shape[1]
def __len__(self):
return len(self.adj_all)
def __getitem__(self, idx):
adj = self.adj_all[idx]
num_nodes = adj.shape[0]
adj_padded = np.zeros((self.max_num_nodes, self.max_num_nodes))
adj_padded[:num_nodes, :num_nodes] = adj
# use all nodes for aggregation (baseline)
return {'adj':adj_padded,
'feats':self.feature_all[idx].copy(),
'label':self.label_all[idx],
'num_nodes': num_nodes,
'assign_feats':self.assign_feat_all[idx].copy()}
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
class SupervisedGraphSage(nn.Module):
''' GraphSage embeddings
'''
def __init__(self, num_classes, enc):
super(SupervisedGraphSage, self).__init__()
self.enc = enc
self.xent = nn.CrossEntropyLoss()
self.weight = nn.Parameter(torch.FloatTensor(enc.embed_dim, num_classes))
init.xavier_uniform(self.weight)
def forward(self, nodes):
embeds = self.enc(nodes)
scores = embeds.mm(self.weight)
return scores
def loss(self, nodes, labels):
scores = self.forward(nodes)
return self.xent(nn.softmax(scores), labels.squeeze())
import networkx as nx
import numpy as np
import scipy as sc
import os
import re
import util
def read_graphfile(datadir, dataname, max_nodes=None):
''' Read data from https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
graph index starts with 1 in file
Returns:
List of networkx objects with graph and node labels
'''
prefix = os.path.join(datadir, dataname, dataname)
filename_graph_indic = prefix + '_graph_indicator.txt'
# index of graphs that a given node belongs to
graph_indic={}
with open(filename_graph_indic) as f: # no problem
i=1
for line in f:
line=line.strip("\n")
graph_indic[i]=int(line)
i+=1
filename_nodes=prefix + '_node_labels.txt'
node_labels=[]
try:
with open(filename_nodes) as f:
for line in f:
line=line.strip("\n")
node_labels+=[int(line) - 1]
num_unique_node_labels = max(node_labels) + 1
except IOError:
print('No node labels')
filename_node_attrs=prefix + '_node_attributes.txt'
node_attrs=[]
try:
with open(filename_node_attrs) as f:
for line in f:
line = line.strip("\s\n")
attrs = [float(attr) for attr in re.split("[,\s]+", line) if not attr == '']
node_attrs.append(np.array(attrs))
except IOError:
print('No node attributes')
label_has_zero = False
filename_graphs=prefix + '_graph_labels.txt'
graph_labels=[]
# assume that all graph labels appear in the dataset
#(set of labels don't have to be consecutive)
label_vals = []
with open(filename_graphs) as f:
for line in f:
line=line.strip("\n")
val = int(line)
#if val == 0:
# label_has_zero = True
if val not in label_vals:
label_vals.append(val)
graph_labels.append(val)
#graph_labels = np.array(graph_labels)
label_map_to_int = {val: i for i, val in enumerate(label_vals)}
graph_labels = np.array([label_map_to_int[l] for l in graph_labels])
#if label_has_zero:
# graph_labels += 1
filename_adj=prefix + '_A.txt'
adj_list={i:[] for i in range(1,len(graph_labels)+1)}
index_graph={i:[] for i in range(1,len(graph_labels)+1)}
num_edges = 0
with open(filename_adj) as f:
for line in f:
line=line.strip("\n").split(",")
e0,e1=(int(line[0].strip(" ")),int(line[1].strip(" ")))
adj_list[graph_indic[e0]].append((e0,e1))
index_graph[graph_indic[e0]]+=[e0,e1]
num_edges += 1
for k in index_graph.keys():
index_graph[k]=[u-1 for u in set(index_graph[k])]
graphs=[]
for i in range(1,1+len(adj_list)):
# indexed from 1 here
G=nx.from_edgelist(adj_list[i])
if max_nodes is not None and G.number_of_nodes() > max_nodes:
continue
# add features and labels
G.graph['label'] = graph_labels[i-1]
for u in util.node_iter(G):
if len(node_labels) > 0:
node_label_one_hot = [0] * num_unique_node_labels
node_label = node_labels[u-1]
node_label_one_hot[node_label] = 1
util.node_dict(G)[u]['label'] = node_label_one_hot
if len(node_attrs) > 0:
util.node_dict(G)[u]['feat'] = node_attrs[u-1]
if len(node_attrs) > 0:
G.graph['feat_dim'] = node_attrs[0].shape[0]
# relabeling
mapping={}
it=0
for n in util.node_iter(G):
mapping[n]=it
it+=1
# indexed from 0
graphs.append(nx.relabel_nodes(G, mapping))
return graphs
This diff is collapsed.
import networkx
import numpy as np
def partition(embeddings):
''' Compute a partition of embeddings, where each partition is pooled together.
Args:
embeddings: N-by-D matrix, where N is the number of node embeddings, and D
is the embedding dimension.
'''
dist = np.dot(embeddings)
def kruskal(adj):
# initialize MST
MST = set()
edges = set()
num_nodes = adj.shape[0]
# collect all edges from graph G
for j in range(num_nodes):
for k in range(num_nodes):
if G.graph[j][k] != 0 and (k, j) not in edges:
edges.add((j, k))
# sort all edges in graph G by weights from smallest to largest
sorted_edges = sorted(edges, key=lambda e:G.graph[e[0]][e[1]])
uf = UF(G.vertices)
for e in sorted_edges:
u, v = e
# if u, v already connected, abort this edge
if uf.connected(u, v):
continue
# if not, connect them and add this edge to the MST
uf.union(u, v)
MST.add(e)
return MST
File added
'''
author: lmx
date: 11/17/2020
'''
# importation
import networkx as nx
import numpy
import sys
from glob import glob
from load_data import read_graphfile
from matplotlib import pyplot as plt
import torch
import math
from collections import Counter
#----------CONSTANTS----------
DATA_DIR = './data/' # parent folder
DATA_NAME = 'DD'
############
NEW_DIR = DATA_DIR # new path
NAME = 'DDD' # new name
NUM = 10 # new graph number
############
# fetch graphs
# g = read_graphfile(DATA_DIR, DATA_NAME)
# assert NUM <= len(g),'exceed maximum graph number'
# nodes = list(map(lambda x: len(x.nodes), g))
# edges = list(map(lambda x: len(x.edges), g))
#-> PART 1
# fetch basic information
base = '{}{}/{}_graph_indicator.txt'
old_dir = base.format(DATA_DIR, DATA_NAME, DATA_NAME)
new_dir = base.format(NEW_DIR, NAME, NAME)
with open(old_dir, 'r') as f:
c = f.read()
a = c.split('\n')
cnter = Counter(a)
cnter = list(zip(cnter.values(), cnter.keys()))
cnter = cnter[:-1]
def foo(x):
return eval(x[1])
cnter = list(sorted(cnter, key=foo))
cnter = list(map(lambda x: x[0], cnter))
base = '{}{}/{}_A.txt'
old_dir = base.format(DATA_DIR, DATA_NAME, DATA_NAME)
new_dir = base.format(NEW_DIR, NAME, NAME)
with open(old_dir, 'r') as f:
c = f.read()
old_adj = c.split('\n')
def foo(x):
x = x.split(',')
try:
assert 2 == len(x)
return [eval(x[0]), eval(x[1])]
except AssertionError:
pass
old_adj = list(map(foo, old_adj))
old_adj = old_adj[:-1]
NODE_NUM = sum(list(cnter[:NUM]))
i = 0
for i in range(len(old_adj)):
tmp = old_adj[i]
e0, e1 = tmp[0], tmp[1]
if max([e0, e1]) > NODE_NUM:
break
EDGE_NUM = i
print('There are {} graphs, {} nodes and {} edges.'.format(NUM, NODE_NUM, EDGE_NUM))
#-> PART 2
# Adjacent matrix
base = '{}{}/{}_A.txt'
old_dir = base.format(DATA_DIR, DATA_NAME, DATA_NAME)
new_dir = base.format(NEW_DIR, NAME, NAME)
with open(old_dir, 'r') as f:
c = f.read()
old_adj = c.split('\n')
new_adj = old_adj[:EDGE_NUM]
new_adj = '\n'.join(new_adj)
with open(new_dir, 'w') as f:
f.write(new_adj)
print('Write Adjacent Matrix in {}'.format(new_dir))
#-> PART 3
# Graph indicator
base = '{}{}/{}_graph_indicator.txt'
old_dir = base.format(DATA_DIR, DATA_NAME, DATA_NAME)
new_dir = base.format(NEW_DIR, NAME, NAME)
with open(old_dir, 'r') as f:
c = f.read()
old_ind = c.split('\n')
new_ind = old_ind[:NODE_NUM]
new_ind = '\n'.join(new_ind)
with open(new_dir, 'w') as f:
f.write(new_ind)
print('Write Graph Indicator in {}'.format(new_dir))
#-> PART 4
# Graph labels
base = '{}{}/{}_graph_labels.txt'
old_dir = base.format(DATA_DIR, DATA_NAME, DATA_NAME)
new_dir = base.format(NEW_DIR, NAME, NAME)
with open(old_dir, 'r') as f:
c = f.read()
old_label = c.split('\n')
new_label = old_label[:NUM]
new_label = '\n'.join(new_label)
with open(new_dir, 'w') as f:
f.write(new_label)
print('Write Graph Labels in {}'.format(new_dir))
#-> PART 5
# Node labels
base = '{}{}/{}_node_labels.txt'
old_dir = base.format(DATA_DIR, DATA_NAME, DATA_NAME)
new_dir = base.format(NEW_DIR, NAME, NAME)
with open(old_dir, 'r') as f:
c = f.read()
old_label = c.split('\n')
new_label = old_label[:NODE_NUM]
new_label = '\n'.join(new_label)
with open(new_dir, 'w') as f:
f.write(new_label)
print('Write Node Labels in {}'.format(new_dir))
\ No newline at end of file
import torch
import torch.nn as nn
from torch.nn import init
import torch.nn.functional as F
import numpy as np
class Set2Set(nn.Module):
def __init__(self, input_dim, hidden_dim, act_fn=nn.ReLU, num_layers=1):
'''
Args:
input_dim: input dim of Set2Set.
hidden_dim: the dim of set representation, which is also the INPUT dimension of
the LSTM in Set2Set.
This is a concatenation of weighted sum of embedding (dim input_dim), and the LSTM
hidden/output (dim: self.lstm_output_dim).
'''
super(Set2Set, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.num_layers = num_layers
if hidden_dim <= input_dim:
print('ERROR: Set2Set output_dim should be larger than input_dim')
# the hidden is a concatenation of weighted sum of embedding and LSTM output
self.lstm_output_dim = hidden_dim - input_dim
self.lstm = nn.LSTM(hidden_dim, input_dim, num_layers=num_layers, batch_first=True)
# convert back to dim of input_dim
self.pred = nn.Linear(hidden_dim, input_dim)
self.act = act_fn()
def forward(self, embedding):
'''
Args:
embedding: [batch_size x n x d] embedding matrix
Returns:
aggregated: [batch_size x d] vector representation of all embeddings
'''
batch_size = embedding.size()[0]
n = embedding.size()[1]
hidden = (torch.zeros(self.num_layers, batch_size, self.lstm_output_dim).cuda(),
torch.zeros(self.num_layers, batch_size, self.lstm_output_dim).cuda())
q_star = torch.zeros(batch_size, 1, self.hidden_dim).cuda()
for i in range(n):
# q: batch_size x 1 x input_dim
q, hidden = self.lstm(q_star, hidden)
# e: batch_size x n x 1
e = embedding @ torch.transpose(q, 1, 2)
a = nn.Softmax(dim=1)(e)
r = torch.sum(a * embedding, dim=1, keepdim=True)
q_star = torch.cat((q, r), dim=2)
q_star = torch.squeeze(q_star, dim=1)
out = self.act(self.pred(q_star))
return out
test.py 0 → 100644
from os import read
import torch
import torch.nn as nn
from torch.nn import init
import torch.nn.functional as F
import numpy as np
from torch.nn.modules.activation import ReLU
from set2set import Set2Set
from encoders import SoftPoolingGcnEncoder
# GCN basic operation
class GraphConv(nn.Module):
def __init__(self, input_dim, output_dim, add_self=False, normalize_embedding=False,
dropout=0.0, bias=True):
super(GraphConv, self).__init__()
self.add_self = add_self
self.dropout = dropout
if dropout > 0.001:
self.dropout_layer = nn.Dropout(p=dropout)
self.normalize_embedding = normalize_embedding
self.input_dim = input_dim
self.output_dim = output_dim
self.weight = nn.Parameter(torch.FloatTensor(input_dim, output_dim).cuda())
if bias:
self.bias = nn.Parameter(torch.FloatTensor(output_dim).cuda())
else:
self.bias = None
def forward(self, x, adj):
if self.dropout > 0.001:
x = self.dropout_layer(x)
y = torch.matmul(adj, x)
if self.add_self:
y += x
y = torch.matmul(y,self.weight)
if self.bias is not None:
y = y + self.bias
if self.normalize_embedding:
y = F.normalize(y, p=2, dim=2)
#print(y[0][0])
return y
class GConvModule(nn.Module):
def __init__(self, input_dim, hidden_dim, embedding_dim, label_dim, num_layers,
pred_hidden_dims=[], concat=True, bn=True, dropout=0.0, normalize=False, num_aggs=1,
args=None):
super(GConvModule, self).__init__()
add_self = not concat
self.conv_first = GraphConv(input_dim=input_dim, output_dim=hidden_dim, add_self=add_self,
normalize_embedding=normalize, bias=True)
self.conv_block = nn.ModuleList(
[GraphConv(input_dim=hidden_dim, output_dim=hidden_dim, add_self=add_self,
normalize_embedding=normalize, dropout=dropout, bias=True)
for i in range(num_layers-2)])
self.conv_last = GraphConv(input_dim=hidden_dim, output_dim=embedding_dim, add_self=add_self,
normalize_embedding=normalize, bias=True)
self.act = nn.ReLU()
self.bn = bn
self.num_aggs = num_aggs
self.concat = concat
if concat:
pred_input_dim = hidden_dim * (num_layers - 1) + embedding_dim
else:
pred_input_dim = embedding_dim
pred_input_dim = pred_input_dim * num_aggs
if len(pred_hidden_dims) == 0:
pred_model = nn.Linear(pred_input_dim, label_dim)
else:
pred_layers = []
for pred_dim in pred_hidden_dims:
pred_layers.append(nn.Linear(pred_input_dim, pred_dim))
pred_layers.append(self.act)
pred_input_dim = pred_dim
pred_layers.append(nn.Linear(pred_dim, label_dim))
pred_model = nn.Sequential(*pred_layers)
self.pred_block = pred_model
def apply_bn(self, x):
''' Batch normalization of 3D tensor x
'''
bn_module = nn.BatchNorm1d(x.size()[1]).cuda()
return bn_module(x)
def forward(self, x, adj, embedding_mask=None):
x = self.conv_first(x, adj)
x = self.act(x)
if self.bn:
x = self.apply_bn(x)
x_all = [x]
for i in range(len(self.conv_block)):
x = self.conv_block[i](x,adj)
x = self.act(x)
if self.bn:
x = self.apply_bn(x)
x_all.append(x)
x = self.conv_last(x,adj)
x_all.append(x)
# x_tensor: [batch_size x num_nodes x embedding]
x_tensor = torch.cat(x_all, dim=2)
if embedding_mask is not None:
x_tensor = x_tensor * embedding_mask
ypred = self.pred_block(x_tensor)
return ypred
if __name__=='__main__':
x = torch.rand(20, 100, 10).cuda()
adj = torch.rand(20, 100, 100).cuda()
#net = GConvModule(10, 10, 5, 3, num_layers=5)
net = SoftPoolingGcnEncoder(100, 10, 10, 5, 3, 5, 5, num_pooling=2,
assign_ratio=0.1, num_unpooling=2, unpool_ratio=0.1)
net = net.cuda()
a = net.forward(x, adj, range(20), True, True)
print(a)
'''
from load_data import read_graphfile
g = read_graphfile('./data', 'DND')
print(len(g))
print(g)
'''
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment