76 lines
1.8 KiB
Python
Executable File
76 lines
1.8 KiB
Python
Executable File
#!/bin/python3
|
|
import sys
|
|
import json
|
|
import numpy as np
|
|
|
|
with open("videospiele.json") as f:
|
|
lines = [ line.strip() for line in f.readlines()]
|
|
print(lines[0])
|
|
lines = " ".join(lines)
|
|
adjlist = json.loads(lines)
|
|
|
|
def colorize(word):
|
|
return f"\u001b[0;33m{word}\u001b[0m"
|
|
|
|
def softmax(distr):
|
|
temperature = 1.5
|
|
def f(x):
|
|
return np.exp(x / temperature)
|
|
for i in range(len(distr)):
|
|
distr[i] = f(distr[i])
|
|
Σ = sum(distr)
|
|
distr /= Σ
|
|
return distr
|
|
|
|
indices = set([ *adjlist.keys(), *sum(adjlist.values(), [])])
|
|
id_to_link = dict(enumerate(indices))
|
|
link_to_id = dict([ [word, idx] for [idx, word] in enumerate(indices)])
|
|
|
|
N = len(indices)
|
|
from numpy import matrix as M, array
|
|
|
|
print("allocating array")
|
|
m = array([0])
|
|
m.resize(N, N)
|
|
M = array([0.0])
|
|
M.resize(N, N)
|
|
|
|
print("processing bigrams")
|
|
for i in indices:
|
|
if i not in adjlist: continue
|
|
for j in adjlist[i]:
|
|
# i -> j
|
|
iid, jid = link_to_id[i], link_to_id[j]
|
|
#print(f"{i} {iid} -> {j} {jid}")
|
|
m[iid, jid] = 1
|
|
|
|
print("normalizing matrix")
|
|
d = 0.85
|
|
uniform = np.full(N, 1/N)
|
|
|
|
for i in range(N):
|
|
row = m[i]
|
|
Σ = sum(row)
|
|
if Σ == 0:
|
|
M[i] = uniform
|
|
else:
|
|
M[i] = (1-d)*uniform + d * m[i] * (1.0/Σ)
|
|
|
|
print("Done preparing")
|
|
|
|
from numpy import linalg as LA
|
|
print("Computing eigenvalues")
|
|
ews, evs = LA.eig(M.transpose())
|
|
ews_1 = [ (i, v) for (i, v) in enumerate(ews) if abs(v - 1) < 0.00001 ]
|
|
print(f"Found {len(ews_1)} eigenvectors to eigenvalue 1")
|
|
ew_idx = ews_1[0][0]
|
|
ev = evs[:,ew_idx]
|
|
print("Normalizing eigenvalue to a PDF")
|
|
ev = np.real(ev / sum(ev))
|
|
|
|
print("Stored eigenvector for eigenvalue 1 in `ev`")
|
|
print("Verify that `ev @ M == ev`")
|
|
|
|
print("All pages with P > 1/N")
|
|
hits = sorted([ (f"{v*100:.2f}%", id_to_link[i] ) for (i, v) in enumerate(ev) if v > 1/N ])
|