#!/bin/python3 import sys import json import numpy as np with open("videospiele.json") as f: lines = [ line.strip() for line in f.readlines()] print(lines[0]) lines = " ".join(lines) adjlist = json.loads(lines) def colorize(word): return f"\u001b[0;33m{word}\u001b[0m" def softmax(distr): temperature = 1.5 def f(x): return np.exp(x / temperature) for i in range(len(distr)): distr[i] = f(distr[i]) Σ = sum(distr) distr /= Σ return distr indices = set([ *adjlist.keys(), *sum(adjlist.values(), [])]) id_to_link = dict(enumerate(indices)) link_to_id = dict([ [word, idx] for [idx, word] in enumerate(indices)]) N = len(indices) from numpy import matrix as M, array print("allocating array") m = array([0]) m.resize(N, N) M = array([0.0]) M.resize(N, N) print("processing bigrams") for i in indices: if i not in adjlist: continue for j in adjlist[i]: # i -> j iid, jid = link_to_id[i], link_to_id[j] #print(f"{i} {iid} -> {j} {jid}") m[iid, jid] = 1 print("normalizing matrix") d = 0.85 uniform = np.full(N, 1/N) for i in range(N): row = m[i] Σ = sum(row) if Σ == 0: M[i] = uniform else: M[i] = (1-d)*uniform + d * m[i] * (1.0/Σ) print("Done preparing") from numpy import linalg as LA print("Computing eigenvalues") ews, evs = LA.eig(M.transpose()) ews_1 = [ (i, v) for (i, v) in enumerate(ews) if abs(v - 1) < 0.00001 ] print(f"Found {len(ews_1)} eigenvectors to eigenvalue 1") ew_idx = ews_1[0][0] ev = evs[:,ew_idx] print("Normalizing eigenvalue to a PDF") ev = np.real(ev / sum(ev)) print("Stored eigenvector for eigenvalue 1 in `ev`") print("Verify that `ev @ M == ev`") print("All pages with P > 1/N") hits = sorted([ (f"{v*100:.2f}%", id_to_link[i] ) for (i, v) in enumerate(ev) if v > 1/N ])