Update scripts

This commit is contained in:
Dominic Zimmer
2023-06-16 12:32:22 +02:00
parent 6b99136661
commit 4d10148e49
4 changed files with 36 additions and 87 deletions

View File

@@ -45,6 +45,10 @@ while i < len(lines):
print("Parsing lines")
corpus = " ".join(truelines)
#with open("hp_parsed.txt", "w") as f:
# f.writelines(corpus)
tokens = corpus.split(" ")
words = set(tokens)
id_to_word = dict(enumerate(words))
@@ -55,8 +59,10 @@ from numpy import matrix as M, array
#m = M( [ [ 0 for _ in range(len(words)) ] for _ in range(len(words)) ] )
print("allocating array")
# ich zähle alle folgeworte i→j
m = array([0])
m.resize(N, N)
# die wahrscheinlichkeiten
M = array([0.0])
M.resize(N, N)

1
hp/hp_parsed.txt Normal file

File diff suppressed because one or more lines are too long