1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
| import torch model = torch.load('t-sne模型文件/AliExpress_US_sharedbottom.pt') model.eval()
from datasets.aliexpress import AliExpressDataset train_dataset = AliExpressDataset('tsne.csv')
from torch.utils.data import DataLoader import random import numpy as np seed = 2022 random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) train_data_loader = DataLoader(train_dataset, batch_size=1, num_workers=4, shuffle=False)
"""t-SNE对手写数字进行可视化""" from time import time import numpy as np import matplotlib.pyplot as plt import tqdm device = 'cuda:0' from sklearn import datasets from sklearn.manifold import TSNE from sklearn.decomposition import PCA
def plot_embedding(data, label, title): x_min, x_max = np.min(data, 0), np.max(data, 0) data = (data - x_min) / (x_max - x_min)
fig = plt.figure() ax = plt.subplot(111) colors = ['r', 'b'] for i in range(data.shape[0]): plt.scatter(data[i, 0], data[i, 1], c=colors[label[i]], alpha=0.5) plt.xticks([]) plt.yticks([]) plt.title(title) return fig
def main(): with torch.no_grad(): data1 = [] data2 = [] for categorical_fields, numerical_fields, labels in tqdm.tqdm(train_data_loader, smoothing=0, mininterval=1.0): categorical_fields, numerical_fields, labels = categorical_fields.to(device), numerical_fields.to(device), labels.to(device) y, _ ,cross= model(categorical_fields, numerical_fields) data1 += cross[0].cpu().numpy().tolist() data2 += cross[1].cpu().numpy().tolist() data = np.concatenate((data1, data2), axis=0) label = len(data1)*[0] + len(data2)*[1] n_samples, n_features = data.shape print(data.shape) print('data.shape',data.shape) print('label',label) print('label中数字有',len(set(label)),'个不同的数字') print('data有',n_samples,'个样本') print('每个样本',n_features,'维数据') print('Computing t-SNE embedding') tsne = TSNE(n_components=2, init='pca', random_state=0) t0 = time() result = tsne.fit_transform(data) print('result.shape',result.shape) fig = plot_embedding(result, label, 't-SNE embedding of the digits (time %.2fs)' % (time() - t0)) plt.show(fig)
if __name__ == '__main__': main()
|