Skip to content

Instantly share code, notes, and snippets.

@Tikiten
Tikiten / encode_onehot.py
Last active April 3, 2023 11:56
[encode_onehot] #python #deeplearning
import numpy as np
path="D:/Spyder/pygcn-master/data/cora/"
dataset = "cora"
idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset),
dtype=np.dtype(str))
raw_labels = idx_features_labels[:, -1]
classes = set(raw_labels)
@Tikiten
Tikiten / trans_to_continuous_number.py
Last active April 3, 2023 11:56
[compressed discrete number to a continuous integer interval] #python #不连续编号压缩到连续区间
#这里idx表示原来的序号,可能是稀疏离散的,一共2708个
idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
# 通过建立论文序号的序列,得到论文序号的字典,将原有序号压缩到0-2707之间
#这句是建立原有的样本序号到连续的离散序号(0,1,2,……,2707)之间的映射关系。
#样本的序号与样本一一对应,即不存在重复的情况,所以排序后的样本序号直接从0开始数就OK。
idx_map = {j: i for i, j in enumerate(idx)}
@Tikiten
Tikiten / build_adj.py
Created August 5, 2020 07:58
[construct graph and build adjacency matrix] #python #pytorch #GNN #GCN #构建图和邻接矩阵
# 读取图的边(论文间的引用关系)
# cora.cites共5429行, 每一行有两个论文编号,表示第一个编号的论文先写,第二个编号的论文引用第一个编号的论文。
#需要注意,这里的样本序号还是原始的样本序号,还没映射到预处理后的序号。
edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset),
dtype=np.int32)
# 进行一次论文序号的映射
# 论文编号没有用,需要重新的其进行编号(从0开始),然后对原编号进行替换。
# 所以目的是把离散的原始的编号,变成0 - 2707的连续编号
#edges_unordered是(5129,2),flatten成(10858,),.get得到一个function
#map(func,iterable,...),如果没有.get,idx_map是一个dict,无法用map
@Tikiten
Tikiten / normalize.py
Last active April 3, 2023 11:56
[normalization function] #python #pytorch #normalize
'''归一化函数
这里实现的是D-1A的形式和D-1/2AD-1/2'''
def normalize(mx):
"""Row-normalize sparse matrix"""
rowsum = np.array(mx.sum(1)) # (2708, 1)
r_inv = np.power(rowsum, -1).flatten() # (2708,),意为2708个数的一维数组,并非一个列向量
r_inv[np.isinf(r_inv)] = 0. # 处理除数为0导致的inf
#sp.diag提取和创建稀疏带和对角矩阵,用于压缩稀疏矩阵
r_mat_inv = sp.diags(r_inv)
mx = r_mat_inv.dot(mx) #注意sp返回的是array,所以这里是numpy.dot计算的是矩阵乘法
@Tikiten
Tikiten / accuracy.py
Last active April 3, 2023 11:56
[accuracy] #python #计算准确率
'''计算accuracy
labels的传入形式:
[4 2 0 ... 1 6 4]
<class 'numpy.ndarray'>
(2708,)'''
def accuracy(output, labels):
'''output是一个tensor,则tensor.max(1)[1]返回的是最大值所在的下标
>>> a = torch.arange(6).view(3, 2)
>>> a
tensor([[0, 1],
@Tikiten
Tikiten / sparse_mx_to_torch_sparse_tensor.py
Created August 5, 2020 08:13
[sparse_mx_to_torch_sparse_tensor] #python #稀疏矩阵转稀疏张量
'''稀疏矩阵转稀疏张量
>>> i = torch.LongTensor([[0, 1, 1],
[2, 0, 2]])
>>> v = torch.FloatTensor([3, 4, 5]) #如0行2列的值为3
>>> torch.sparse.FloatTensor(i, v, torch.Size([2,3])).to_dense()
0 0 3
4 0 5
[torch.FloatTensor of size 2x3]
'''
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
@Tikiten
Tikiten / S_data_with_t-SNE.py
Last active April 3, 2023 11:56
[S_data_with_t-SNE] #python #tSNE可视化
# coding='utf-8'
"""# 一个对 S 曲线数据集上进行各种降维的说明。"""
from time import time
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import NullFormatter
from sklearn import manifold, datasets
@Tikiten
Tikiten / MNIST_with_t-SNE.py
Last active April 3, 2023 11:56
[MNIST_with_t-SNE] #python #tSNE可视化MNIST
# coding='utf-8'
"""t-SNE 对手写数字进行可视化"""
from time import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.manifold import TSNE
@Tikiten
Tikiten / class_name.py
Created August 6, 2020 08:18
[class_name] #python #给类或者类的对象添加打印内容
def __repr__(self):
return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
#首先用__class__将self实例变量指向类,然后再去调用__name__类属性,通常情况__name__的值为‘__main__’
@Tikiten
Tikiten / adjust_learning_rate.py
Last active April 3, 2023 11:56
[adjust_learning_rate] #python #pytorch #在特定iterations处调整学习率 #学习率 #动态调整学习率
'''# SSD300 CONFIGS
voc = {
'num_classes': 13,
'lr_steps': (80000, 100000, 120000),
'max_iter': 120000,
'feature_maps': [38, 19, 10, 5, 3, 1],
'min_dim': 300,
'steps': [8, 16, 32, 64, 100, 300],
'min_sizes': [30, 60, 111, 162, 213, 264],
'max_sizes': [60, 111, 162, 213, 264, 315],