Skip to content
Snippets Groups Projects
Commit 2a1b1ca5 authored by manxilin's avatar manxilin
Browse files

init update

parents
No related branches found
No related tags found
No related merge requests found
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# editor
*.sw?
# data
figs/
results/
tmp/
log/
runs/
vis/
data/
data/REDDIT-MULTI-12K/
data/COLLAB
data/PROTEINS
data/PROTEINS_full
data/NCI1
## Author
Manxi Lin s192230
Mengge Hu s192113
Guangya Shen s200104
## Data set
https://drive.google.com/file/d/1nTM9c4HgIeb6iFauLQABuGjqDGpc43iv/view?usp=sharing
Unzip it in here
## Check our main result
- See `main.ipynb`
- Proof of our result: `./screenshots`
\ No newline at end of file
import torch
import torch.nn as nn
from torch.autograd import Variable
import random
"""
Set of modules for aggregating embeddings of neighbors.
"""
class MeanAggregator(nn.Module):
"""
Aggregates a node's embeddings using mean of neighbors' embeddings
"""
def __init__(self, features, cuda=False, gcn=False):
"""
Initializes the aggregator for a specific graph.
features -- function mapping LongTensor of node ids to FloatTensor of feature values.
cuda -- whether to use GPU
gcn --- whether to perform concatenation GraphSAGE-style, or add self-loops GCN-style
"""
super(MeanAggregator, self).__init__()
self.features = features
self.cuda = cuda
self.gcn = gcn
def forward(self, nodes, to_neighs, num_sample=10):
"""
nodes --- list of nodes in a batch
to_neighs --- list of sets, each set is the set of neighbors for node in batch
num_sample --- number of neighbors to sample. No sampling if None.
"""
# Local pointers to functions (speed hack)
_set = set
if not num_sample is None:
_sample = random.sample
samp_neighs = [_set(_sample(to_neigh,
num_sample,
)) if len(to_neigh) >= num_sample else to_neigh for to_neigh in to_neighs]
else:
samp_neighs = to_neighs
if self.gcn:
samp_neighs = [samp_neigh + set([nodes[i]]) for i, samp_neigh in enumerate(samp_neighs)]
unique_nodes_list = list(set.union(*samp_neighs))
unique_nodes = {n:i for i,n in enumerate(unique_nodes_list)}
mask = Variable(torch.zeros(len(samp_neighs), len(unique_nodes))) # (n x n')
column_indices = [unique_nodes[n] for samp_neigh in samp_neighs for n in samp_neigh] # for each neigh, get index in unique_nodes_list
row_indices = [i for i in range(len(samp_neighs)) for j in range(len(samp_neighs[i]))] # node index
mask[row_indices, column_indices] = 1 # adjacent matrix
if self.cuda:
mask = mask.cuda()
num_neigh = mask.sum(1, keepdim=True) # sum for each node (n x 1)
mask = mask.div(num_neigh)
if self.cuda:
embed_matrix = self.features(torch.LongTensor(unique_nodes_list).cuda())
else:
embed_matrix = self.features(torch.LongTensor(unique_nodes_list))
to_feats = mask.mm(embed_matrix) # mean over each node (AF)
return to_feats
# ENZYMES
python -m train --datadir=data --bmname=ENZYMES --cuda=3 --max-nodes=100 --num-classes=6
# ENZYMES - Diffpool
python -m train --bmname=ENZYMES --assign-ratio=0.1 --hidden-dim=30 --output-dim=30 --cuda=1 --num-classes=6 --method=soft-assign
# DD
python -m train --datadir=data --bmname=DD --cuda=0 --max-nodes=500 --epochs=1000 --num-classes=2
# DD - Diffpool
python -m train --bmname=DD --assign-ratio=0.1 --hidden-dim=64 --output-dim=64 --cuda=1 --num-classes=2 --method=soft-assign
import networkx as nx
import numpy as np
import torch
import pickle
import random
from graph_sampler import GraphSampler
def prepare_val_data(graphs, args, val_idx, max_nodes=0):
random.shuffle(graphs)
val_size = int(np.ceil((len(graphs)*(1-args.train_ratio-args.test_ratio))))
train_graphs = graphs[:val_idx * val_size]
if val_idx < 9:
train_graphs = train_graphs + graphs[(val_idx+1) * val_size :]
val_graphs = graphs[val_idx*val_size: (val_idx+1)*val_size]
print('Num training graphs: ', len(train_graphs),
'; Num validation graphs: ', len(val_graphs))
print('Number of graphs: ', len(graphs))
print('Number of edges: ', sum([G.number_of_edges() for G in graphs]))
print('Max, avg, std of graph size: ',
max([G.number_of_nodes() for G in graphs]), ', '
"{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', '
"{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs])))
# minibatch
dataset_sampler = GraphSampler(train_graphs, normalize=False, max_num_nodes=max_nodes,
features=args.feature_type)
train_dataset_loader = torch.utils.data.DataLoader(
dataset_sampler,
batch_size=args.batch_size,
shuffle=True,
num_workers=args.num_workers)
dataset_sampler = GraphSampler(val_graphs, normalize=False, max_num_nodes=max_nodes,
features=args.feature_type)
val_dataset_loader = torch.utils.data.DataLoader(
dataset_sampler,
batch_size=args.batch_size,
shuffle=False,
num_workers=args.num_workers)
return train_dataset_loader, val_dataset_loader, \
dataset_sampler.max_num_nodes, dataset_sampler.feat_dim, dataset_sampler.assign_feat_dim
This diff is collapsed.
This diff is collapsed.
# ENZYMES
# python -m train --datadir=data --bmname=ENZYMES --cuda=0 --max-nodes=100 --num-classes=6
# ENZYMES - Diffpool
#python -m train --bmname=ENZYMES --assign-ratio=0.1 --hidden-dim=30 --output-dim=30 --cuda=0 --epochs=1 --num-classes=6 --method=soft-assign --dropout=0.8
# DD
# python -m train --datadir=data --bmname=DD --cuda=0 --max-nodes=500 --epochs=1000 --num-classes=2
# DD - Diffpool
#python -m train --bmname=DD --assign-ratio=0.1 --hidden-dim=64 --output-dim=64 --cuda=0 --num-classes=2 --method=soft-assign --epochs=1
!python -m train --bmname=DD --batch-size=30 \
--dropout=0.3 --assign-ratio=0.5 --unpool-ratio=0.5 \
--hidden-dim=64 --output-dim=64 --cuda=0 --num-classes=2 \
--method=soft-assign --num-pool=3 --epochs=100 --num-unpool=3 \
--weight-decay=0
\ No newline at end of file
import networkx as nx
import numpy as np
import random
import gen.feat as featgen
import util
def gen_ba(n_range, m_range, num_graphs, feature_generator=None):
graphs = []
for i in np.random.choice(n_range, num_graphs):
for j in np.random.choice(m_range, 1):
graphs.append(nx.barabasi_albert_graph(i,j))
if feature_generator is None:
feature_generator = ConstFeatureGen(0)
for G in graphs:
feature_generator.gen_node_features(G)
return graphs
def gen_er(n_range, p, num_graphs, feature_generator=None):
graphs = []
for i in np.random.choice(n_range, num_graphs):
graphs.append(nx.erdos_renyi_graph(i,p))
if feature_generator is None:
feature_generator = ConstFeatureGen(0)
for G in graphs:
feature_generator.gen_node_features(G)
return graphs
def gen_2community_ba(n_range, m_range, num_graphs, inter_prob, feature_generators):
''' Each community is a BA graph.
Args:
inter_prob: probability of one node connecting to any node in the other community.
'''
if feature_generators is None:
mu0 = np.zeros(10)
mu1 = np.ones(10)
sigma0 = np.ones(10, 10) * 0.1
sigma1 = np.ones(10, 10) * 0.1
fg0 = GaussianFeatureGen(mu0, sigma0)
fg1 = GaussianFeatureGen(mu1, sigma1)
else:
fg0 = feature_generators[0]
fg1 = feature_generators[1] if len(feature_generators) > 1 else feature_generators[0]
graphs1 = []
graphs2 = []
#for (i1, i2) in zip(np.random.choice(n_range, num_graphs),
# np.random.choice(n_range, num_graphs)):
# for (j1, j2) in zip(np.random.choice(m_range, num_graphs),
# np.random.choice(m_range, num_graphs)):
graphs0 = gen_ba(n_range, m_range, num_graphs, fg0)
graphs1 = gen_ba(n_range, m_range, num_graphs, fg1)
graphs = []
for i in range(num_graphs):
G = nx.disjoint_union(graphs0[i], graphs1[i])
n0 = graphs0[i].number_of_nodes()
for j in range(n0):
if np.random.rand() < inter_prob:
target = np.random.choice(G.number_of_nodes() - n0) + n0
G.add_edge(j, target)
graphs.append(G)
return graphs
def gen_2hier(num_graphs, num_clusters, n, m_range, inter_prob1, inter_prob2, feat_gen):
''' Each community is a BA graph.
Args:
inter_prob1: probability of one node connecting to any node in the other community within
the large cluster.
inter_prob2: probability of one node connecting to any node in the other community between
the large cluster.
'''
graphs = []
for i in range(num_graphs):
clusters2 = []
for j in range(len(num_clusters)):
clusters = gen_er(range(n, n+1), 0.5, num_clusters[j], feat_gen[0])
G = nx.disjoint_union_all(clusters)
for u1 in range(G.number_of_nodes()):
if np.random.rand() < inter_prob1:
target = np.random.choice(G.number_of_nodes() - n)
# move one cluster after to make sure it's not an intra-cluster edge
if target // n >= u1 // n:
target += n
G.add_edge(u1, target)
clusters2.append(G)
G = nx.disjoint_union_all(clusters2)
cluster_sizes_cum = np.cumsum([cluster2.number_of_nodes() for cluster2 in clusters2])
curr_cluster = 0
for u1 in range(G.number_of_nodes()):
if u1 >= cluster_sizes_cum[curr_cluster]:
curr_cluster += 1
if np.random.rand() < inter_prob2:
target = np.random.choice(G.number_of_nodes() -
clusters2[curr_cluster].number_of_nodes())
# move one cluster after to make sure it's not an intra-cluster edge
if curr_cluster == 0 or target >= cluster_sizes_cum[curr_cluster - 1]:
target += cluster_sizes_cum[curr_cluster]
G.add_edge(u1, target)
graphs.append(G)
return graphs
import abc
import networkx as nx
import numpy as np
import random
class FeatureGen(metaclass=abc.ABCMeta):
@abc.abstractmethod
def gen_node_features(self, G):
pass
class ConstFeatureGen(FeatureGen):
def __init__(self, val):
self.val = val
def gen_node_features(self, G):
feat_dict = {i:{'feat': self.val} for i in G.nodes()}
nx.set_node_attributes(G, feat_dict)
class GaussianFeatureGen(FeatureGen):
def __init__(self, mu, sigma):
self.mu = mu
self.sigma = sigma
def gen_node_features(self, G):
feat = np.random.multivariate_normal(mu, sigma, G.number_of_nodes())
feat_dict = {i:{'feat': feat[i]} for i in range(feat.shape[0])}
nx.set_node_attributes(G, feat_dict)
import networkx as nx
import numpy as np
import torch
import torch.utils.data
import util
class GraphSampler(torch.utils.data.Dataset):
''' Sample graphs and nodes in graph
'''
def __init__(self, G_list, features='default', normalize=True, assign_feat='default', max_num_nodes=0):
self.adj_all = []
self.len_all = []
self.feature_all = []
self.label_all = []
self.assign_feat_all = []
if max_num_nodes == 0:
self.max_num_nodes = max([G.number_of_nodes() for G in G_list])
else:
self.max_num_nodes = max_num_nodes
self.feat_dim = util.node_dict(G_list[0])[0]['feat'].shape[0]
for G in G_list:
adj = np.array(nx.to_numpy_matrix(G))
if normalize:
sqrt_deg = np.diag(1.0 / np.sqrt(np.sum(adj, axis=0, dtype=float).squeeze()))
adj = np.matmul(np.matmul(sqrt_deg, adj), sqrt_deg)
self.adj_all.append(adj)
self.len_all.append(G.number_of_nodes())
self.label_all.append(G.graph['label'])
# feat matrix: max_num_nodes x feat_dim
if features == 'default':
f = np.zeros((self.max_num_nodes, self.feat_dim), dtype=float)
for i,u in enumerate(G.nodes()):
f[i,:] = util.node_dict(G)[u]['feat']
self.feature_all.append(f)
elif features == 'id':
self.feature_all.append(np.identity(self.max_num_nodes))
elif features == 'deg-num':
degs = np.sum(np.array(adj), 1)
degs = np.expand_dims(np.pad(degs, [0, self.max_num_nodes - G.number_of_nodes()], 0),
axis=1)
self.feature_all.append(degs)
elif features == 'deg':
self.max_deg = 10
degs = np.sum(np.array(adj), 1).astype(int)
degs[degs>max_deg] = max_deg
feat = np.zeros((len(degs), self.max_deg + 1))
feat[np.arange(len(degs)), degs] = 1
feat = np.pad(feat, ((0, self.max_num_nodes - G.number_of_nodes()), (0, 0)),
'constant', constant_values=0)
f = np.zeros((self.max_num_nodes, self.feat_dim), dtype=float)
for i,u in enumerate(util.node_iter(G)):
f[i,:] = util.node_dict(G)[u]['feat']
feat = np.concatenate((feat, f), axis=1)
self.feature_all.append(feat)
elif features == 'struct':
self.max_deg = 10
degs = np.sum(np.array(adj), 1).astype(int)
degs[degs>10] = 10
feat = np.zeros((len(degs), self.max_deg + 1))
feat[np.arange(len(degs)), degs] = 1
degs = np.pad(feat, ((0, self.max_num_nodes - G.number_of_nodes()), (0, 0)),
'constant', constant_values=0)
clusterings = np.array(list(nx.clustering(G).values()))
clusterings = np.expand_dims(np.pad(clusterings,
[0, self.max_num_nodes - G.number_of_nodes()],
'constant'),
axis=1)
g_feat = np.hstack([degs, clusterings])
if 'feat' in util.node_dict(G)[0]:
node_feats = np.array([util.node_dict(G)[i]['feat'] for i in range(G.number_of_nodes())])
node_feats = np.pad(node_feats, ((0, self.max_num_nodes - G.number_of_nodes()), (0, 0)),
'constant')
g_feat = np.hstack([g_feat, node_feats])
self.feature_all.append(g_feat)
if assign_feat == 'id':
self.assign_feat_all.append(
np.hstack((np.identity(self.max_num_nodes), self.feature_all[-1])) )
else:
self.assign_feat_all.append(self.feature_all[-1])
self.feat_dim = self.feature_all[0].shape[1]
self.assign_feat_dim = self.assign_feat_all[0].shape[1]
def __len__(self):
return len(self.adj_all)
def __getitem__(self, idx):
adj = self.adj_all[idx]
num_nodes = adj.shape[0]
adj_padded = np.zeros((self.max_num_nodes, self.max_num_nodes))
adj_padded[:num_nodes, :num_nodes] = adj
# use all nodes for aggregation (baseline)
return {'adj':adj_padded,
'feats':self.feature_all[idx].copy(),
'label':self.label_all[idx],
'num_nodes': num_nodes,
'assign_feats':self.assign_feat_all[idx].copy()}
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
class SupervisedGraphSage(nn.Module):
''' GraphSage embeddings
'''
def __init__(self, num_classes, enc):
super(SupervisedGraphSage, self).__init__()
self.enc = enc
self.xent = nn.CrossEntropyLoss()
self.weight = nn.Parameter(torch.FloatTensor(enc.embed_dim, num_classes))
init.xavier_uniform(self.weight)
def forward(self, nodes):
embeds = self.enc(nodes)
scores = embeds.mm(self.weight)
return scores
def loss(self, nodes, labels):
scores = self.forward(nodes)
return self.xent(nn.softmax(scores), labels.squeeze())
import networkx as nx
import numpy as np
import scipy as sc
import os
import re
import util
def read_graphfile(datadir, dataname, max_nodes=None):
''' Read data from https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
graph index starts with 1 in file
Returns:
List of networkx objects with graph and node labels
'''
prefix = os.path.join(datadir, dataname, dataname)
filename_graph_indic = prefix + '_graph_indicator.txt'
# index of graphs that a given node belongs to
graph_indic={}
with open(filename_graph_indic) as f: # no problem
i=1
for line in f:
line=line.strip("\n")
graph_indic[i]=int(line)
i+=1
filename_nodes=prefix + '_node_labels.txt'
node_labels=[]
try:
with open(filename_nodes) as f:
for line in f:
line=line.strip("\n")
node_labels+=[int(line) - 1]
num_unique_node_labels = max(node_labels) + 1
except IOError:
print('No node labels')
filename_node_attrs=prefix + '_node_attributes.txt'
node_attrs=[]
try:
with open(filename_node_attrs) as f:
for line in f:
line = line.strip("\s\n")
attrs = [float(attr) for attr in re.split("[,\s]+", line) if not attr == '']
node_attrs.append(np.array(attrs))
except IOError:
print('No node attributes')
label_has_zero = False
filename_graphs=prefix + '_graph_labels.txt'
graph_labels=[]
# assume that all graph labels appear in the dataset
#(set of labels don't have to be consecutive)
label_vals = []
with open(filename_graphs) as f:
for line in f:
line=line.strip("\n")
val = int(line)
#if val == 0:
# label_has_zero = True
if val not in label_vals:
label_vals.append(val)
graph_labels.append(val)
#graph_labels = np.array(graph_labels)
label_map_to_int = {val: i for i, val in enumerate(label_vals)}
graph_labels = np.array([label_map_to_int[l] for l in graph_labels])
#if label_has_zero:
# graph_labels += 1
filename_adj=prefix + '_A.txt'
adj_list={i:[] for i in range(1,len(graph_labels)+1)}
index_graph={i:[] for i in range(1,len(graph_labels)+1)}
num_edges = 0
with open(filename_adj) as f:
for line in f:
line=line.strip("\n").split(",")
e0,e1=(int(line[0].strip(" ")),int(line[1].strip(" ")))
adj_list[graph_indic[e0]].append((e0,e1))
index_graph[graph_indic[e0]]+=[e0,e1]
num_edges += 1
for k in index_graph.keys():
index_graph[k]=[u-1 for u in set(index_graph[k])]
graphs=[]
for i in range(1,1+len(adj_list)):
# indexed from 1 here
G=nx.from_edgelist(adj_list[i])
if max_nodes is not None and G.number_of_nodes() > max_nodes:
continue
# add features and labels
G.graph['label'] = graph_labels[i-1]
for u in util.node_iter(G):
if len(node_labels) > 0:
node_label_one_hot = [0] * num_unique_node_labels
node_label = node_labels[u-1]
node_label_one_hot[node_label] = 1
util.node_dict(G)[u]['label'] = node_label_one_hot
if len(node_attrs) > 0:
util.node_dict(G)[u]['feat'] = node_attrs[u-1]
if len(node_attrs) > 0:
G.graph['feat_dim'] = node_attrs[0].shape[0]
# relabeling
mapping={}
it=0
for n in util.node_iter(G):
mapping[n]=it
it+=1
# indexed from 0
graphs.append(nx.relabel_nodes(G, mapping))
return graphs
This diff is collapsed.
import networkx
import numpy as np
def partition(embeddings):
''' Compute a partition of embeddings, where each partition is pooled together.
Args:
embeddings: N-by-D matrix, where N is the number of node embeddings, and D
is the embedding dimension.
'''
dist = np.dot(embeddings)
def kruskal(adj):
# initialize MST
MST = set()
edges = set()
num_nodes = adj.shape[0]
# collect all edges from graph G
for j in range(num_nodes):
for k in range(num_nodes):
if G.graph[j][k] != 0 and (k, j) not in edges:
edges.add((j, k))
# sort all edges in graph G by weights from smallest to largest
sorted_edges = sorted(edges, key=lambda e:G.graph[e[0]][e[1]])
uf = UF(G.vertices)
for e in sorted_edges:
u, v = e
# if u, v already connected, abort this edge
if uf.connected(u, v):
continue
# if not, connect them and add this edge to the MST
uf.union(u, v)
MST.add(e)
return MST
File added
'''
author: lmx
date: 11/17/2020
'''
# importation
import networkx as nx
import numpy
import sys
from glob import glob
from load_data import read_graphfile
from matplotlib import pyplot as plt
import torch
import math
from collections import Counter
#----------CONSTANTS----------
DATA_DIR = './data/' # parent folder
DATA_NAME = 'DD'
############
NEW_DIR = DATA_DIR # new path
NAME = 'DDD' # new name
NUM = 10 # new graph number
############
# fetch graphs
# g = read_graphfile(DATA_DIR, DATA_NAME)
# assert NUM <= len(g),'exceed maximum graph number'
# nodes = list(map(lambda x: len(x.nodes), g))
# edges = list(map(lambda x: len(x.edges), g))
#-> PART 1
# fetch basic information
base = '{}{}/{}_graph_indicator.txt'
old_dir = base.format(DATA_DIR, DATA_NAME, DATA_NAME)
new_dir = base.format(NEW_DIR, NAME, NAME)
with open(old_dir, 'r') as f:
c = f.read()
a = c.split('\n')
cnter = Counter(a)
cnter = list(zip(cnter.values(), cnter.keys()))
cnter = cnter[:-1]
def foo(x):
return eval(x[1])
cnter = list(sorted(cnter, key=foo))
cnter = list(map(lambda x: x[0], cnter))
base = '{}{}/{}_A.txt'
old_dir = base.format(DATA_DIR, DATA_NAME, DATA_NAME)
new_dir = base.format(NEW_DIR, NAME, NAME)
with open(old_dir, 'r') as f:
c = f.read()
old_adj = c.split('\n')
def foo(x):
x = x.split(',')
try:
assert 2 == len(x)
return [eval(x[0]), eval(x[1])]
except AssertionError:
pass
old_adj = list(map(foo, old_adj))
old_adj = old_adj[:-1]
NODE_NUM = sum(list(cnter[:NUM]))
i = 0
for i in range(len(old_adj)):
tmp = old_adj[i]
e0, e1 = tmp[0], tmp[1]
if max([e0, e1]) > NODE_NUM:
break
EDGE_NUM = i
print('There are {} graphs, {} nodes and {} edges.'.format(NUM, NODE_NUM, EDGE_NUM))
#-> PART 2
# Adjacent matrix
base = '{}{}/{}_A.txt'
old_dir = base.format(DATA_DIR, DATA_NAME, DATA_NAME)
new_dir = base.format(NEW_DIR, NAME, NAME)
with open(old_dir, 'r') as f:
c = f.read()
old_adj = c.split('\n')
new_adj = old_adj[:EDGE_NUM]
new_adj = '\n'.join(new_adj)
with open(new_dir, 'w') as f:
f.write(new_adj)
print('Write Adjacent Matrix in {}'.format(new_dir))
#-> PART 3
# Graph indicator
base = '{}{}/{}_graph_indicator.txt'
old_dir = base.format(DATA_DIR, DATA_NAME, DATA_NAME)
new_dir = base.format(NEW_DIR, NAME, NAME)
with open(old_dir, 'r') as f:
c = f.read()
old_ind = c.split('\n')
new_ind = old_ind[:NODE_NUM]
new_ind = '\n'.join(new_ind)
with open(new_dir, 'w') as f:
f.write(new_ind)
print('Write Graph Indicator in {}'.format(new_dir))
#-> PART 4
# Graph labels
base = '{}{}/{}_graph_labels.txt'
old_dir = base.format(DATA_DIR, DATA_NAME, DATA_NAME)
new_dir = base.format(NEW_DIR, NAME, NAME)
with open(old_dir, 'r') as f:
c = f.read()
old_label = c.split('\n')
new_label = old_label[:NUM]
new_label = '\n'.join(new_label)
with open(new_dir, 'w') as f:
f.write(new_label)
print('Write Graph Labels in {}'.format(new_dir))
#-> PART 5
# Node labels
base = '{}{}/{}_node_labels.txt'
old_dir = base.format(DATA_DIR, DATA_NAME, DATA_NAME)
new_dir = base.format(NEW_DIR, NAME, NAME)
with open(old_dir, 'r') as f:
c = f.read()
old_label = c.split('\n')
new_label = old_label[:NODE_NUM]
new_label = '\n'.join(new_label)
with open(new_dir, 'w') as f:
f.write(new_label)
print('Write Node Labels in {}'.format(new_dir))
\ No newline at end of file
import torch
import torch.nn as nn
from torch.nn import init
import torch.nn.functional as F
import numpy as np
class Set2Set(nn.Module):
def __init__(self, input_dim, hidden_dim, act_fn=nn.ReLU, num_layers=1):
'''
Args:
input_dim: input dim of Set2Set.
hidden_dim: the dim of set representation, which is also the INPUT dimension of
the LSTM in Set2Set.
This is a concatenation of weighted sum of embedding (dim input_dim), and the LSTM
hidden/output (dim: self.lstm_output_dim).
'''
super(Set2Set, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.num_layers = num_layers
if hidden_dim <= input_dim:
print('ERROR: Set2Set output_dim should be larger than input_dim')
# the hidden is a concatenation of weighted sum of embedding and LSTM output
self.lstm_output_dim = hidden_dim - input_dim
self.lstm = nn.LSTM(hidden_dim, input_dim, num_layers=num_layers, batch_first=True)
# convert back to dim of input_dim
self.pred = nn.Linear(hidden_dim, input_dim)
self.act = act_fn()
def forward(self, embedding):
'''
Args:
embedding: [batch_size x n x d] embedding matrix
Returns:
aggregated: [batch_size x d] vector representation of all embeddings
'''
batch_size = embedding.size()[0]
n = embedding.size()[1]
hidden = (torch.zeros(self.num_layers, batch_size, self.lstm_output_dim).cuda(),
torch.zeros(self.num_layers, batch_size, self.lstm_output_dim).cuda())
q_star = torch.zeros(batch_size, 1, self.hidden_dim).cuda()
for i in range(n):
# q: batch_size x 1 x input_dim
q, hidden = self.lstm(q_star, hidden)
# e: batch_size x n x 1
e = embedding @ torch.transpose(q, 1, 2)
a = nn.Softmax(dim=1)(e)
r = torch.sum(a * embedding, dim=1, keepdim=True)
q_star = torch.cat((q, r), dim=2)
q_star = torch.squeeze(q_star, dim=1)
out = self.act(self.pred(q_star))
return out
test.py 0 → 100644
from os import read
import torch
import torch.nn as nn
from torch.nn import init
import torch.nn.functional as F
import numpy as np
from torch.nn.modules.activation import ReLU
from set2set import Set2Set
from encoders import SoftPoolingGcnEncoder
# GCN basic operation
class GraphConv(nn.Module):
def __init__(self, input_dim, output_dim, add_self=False, normalize_embedding=False,
dropout=0.0, bias=True):
super(GraphConv, self).__init__()
self.add_self = add_self
self.dropout = dropout
if dropout > 0.001:
self.dropout_layer = nn.Dropout(p=dropout)
self.normalize_embedding = normalize_embedding
self.input_dim = input_dim
self.output_dim = output_dim
self.weight = nn.Parameter(torch.FloatTensor(input_dim, output_dim).cuda())
if bias:
self.bias = nn.Parameter(torch.FloatTensor(output_dim).cuda())
else:
self.bias = None
def forward(self, x, adj):
if self.dropout > 0.001:
x = self.dropout_layer(x)
y = torch.matmul(adj, x)
if self.add_self:
y += x
y = torch.matmul(y,self.weight)
if self.bias is not None:
y = y + self.bias
if self.normalize_embedding:
y = F.normalize(y, p=2, dim=2)
#print(y[0][0])
return y
class GConvModule(nn.Module):
def __init__(self, input_dim, hidden_dim, embedding_dim, label_dim, num_layers,
pred_hidden_dims=[], concat=True, bn=True, dropout=0.0, normalize=False, num_aggs=1,
args=None):
super(GConvModule, self).__init__()
add_self = not concat
self.conv_first = GraphConv(input_dim=input_dim, output_dim=hidden_dim, add_self=add_self,
normalize_embedding=normalize, bias=True)
self.conv_block = nn.ModuleList(
[GraphConv(input_dim=hidden_dim, output_dim=hidden_dim, add_self=add_self,
normalize_embedding=normalize, dropout=dropout, bias=True)
for i in range(num_layers-2)])
self.conv_last = GraphConv(input_dim=hidden_dim, output_dim=embedding_dim, add_self=add_self,
normalize_embedding=normalize, bias=True)
self.act = nn.ReLU()
self.bn = bn
self.num_aggs = num_aggs
self.concat = concat
if concat:
pred_input_dim = hidden_dim * (num_layers - 1) + embedding_dim
else:
pred_input_dim = embedding_dim
pred_input_dim = pred_input_dim * num_aggs
if len(pred_hidden_dims) == 0:
pred_model = nn.Linear(pred_input_dim, label_dim)
else:
pred_layers = []
for pred_dim in pred_hidden_dims:
pred_layers.append(nn.Linear(pred_input_dim, pred_dim))
pred_layers.append(self.act)
pred_input_dim = pred_dim
pred_layers.append(nn.Linear(pred_dim, label_dim))
pred_model = nn.Sequential(*pred_layers)
self.pred_block = pred_model
def apply_bn(self, x):
''' Batch normalization of 3D tensor x
'''
bn_module = nn.BatchNorm1d(x.size()[1]).cuda()
return bn_module(x)
def forward(self, x, adj, embedding_mask=None):
x = self.conv_first(x, adj)
x = self.act(x)
if self.bn:
x = self.apply_bn(x)
x_all = [x]
for i in range(len(self.conv_block)):
x = self.conv_block[i](x,adj)
x = self.act(x)
if self.bn:
x = self.apply_bn(x)
x_all.append(x)
x = self.conv_last(x,adj)
x_all.append(x)
# x_tensor: [batch_size x num_nodes x embedding]
x_tensor = torch.cat(x_all, dim=2)
if embedding_mask is not None:
x_tensor = x_tensor * embedding_mask
ypred = self.pred_block(x_tensor)
return ypred
if __name__=='__main__':
x = torch.rand(20, 100, 10).cuda()
adj = torch.rand(20, 100, 100).cuda()
#net = GConvModule(10, 10, 5, 3, num_layers=5)
net = SoftPoolingGcnEncoder(100, 10, 10, 5, 3, 5, 5, num_pooling=2,
assign_ratio=0.1, num_unpooling=2, unpool_ratio=0.1)
net = net.cuda()
a = net.forward(x, adj, range(20), True, True)
print(a)
'''
from load_data import read_graphfile
g = read_graphfile('./data', 'DND')
print(len(g))
print(g)
'''
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment