Update scripts

This commit is contained in:
Dominic Zimmer 2023-06-16 12:32:22 +02:00
parent 6b99136661
commit 4d10148e49
4 changed files with 36 additions and 87 deletions

View File

@ -45,6 +45,10 @@ while i < len(lines):
print("Parsing lines")
corpus = " ".join(truelines)
#with open("hp_parsed.txt", "w") as f:
# f.writelines(corpus)
tokens = corpus.split(" ")
words = set(tokens)
id_to_word = dict(enumerate(words))
@ -55,8 +59,10 @@ from numpy import matrix as M, array
#m = M( [ [ 0 for _ in range(len(words)) ] for _ in range(len(words)) ] )
print("allocating array")
# ich zähle alle folgeworte i→j
m = array([0])
m.resize(N, N)
# die wahrscheinlichkeiten
M = array([0.0])
M.resize(N, N)

1
hp/hp_parsed.txt Normal file

File diff suppressed because one or more lines are too long

View File

@ -15218,12 +15218,12 @@ body[data-format='mobile'] .jp-OutputArea-child .jp-OutputArea-output {
</div>
</div>
</div>
</div><div id="cell-id=f8a3a4d9" class="jp-Cell jp-CodeCell jp-Notebook-cell ">
</div><div id="cell-id=f8a3a4d9" class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs ">
<div class="jp-Cell-inputWrapper">
<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
</div>
<div class="jp-InputArea jp-Cell-inputArea">
<div class="jp-InputPrompt jp-InputArea-prompt">In&nbsp;[11]:</div>
<div class="jp-InputPrompt jp-InputArea-prompt">In&nbsp;[7]:</div>
<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
<div class="CodeMirror cm-s-jupyter">
<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
@ -15235,24 +15235,6 @@ body[data-format='mobile'] .jp-OutputArea-child .jp-OutputArea-output {
<span class="n">N</span> <span class="o">=</span> <span class="mi">1_000_000</span> <span class="c1"># Simulationen</span>
<span class="c1"># Los geht&#39;s!</span>
<span class="n">position</span> <span class="o">=</span> <span class="mi">0</span> <span class="c1"># 0 : links, 1: rechts</span>
<span class="n">counter</span> <span class="o">=</span> <span class="p">[</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span> <span class="p">]</span> <span class="c1"># anzahl besuche links/rechts</span>
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">N</span><span class="p">):</span>
<span class="k">if</span> <span class="n">position</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">if</span> <span class="n">uniform</span><span class="p">()</span> <span class="o">&lt;</span> <span class="n">p</span><span class="p">:</span> <span class="c1"># [0.0, 0.1, ..., 1.0] &lt; 0.6 ??</span>
<span class="n">counter</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="n">position</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">-</span> <span class="n">position</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">counter</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">uniform</span><span class="p">()</span> <span class="o">&lt;</span> <span class="n">q</span><span class="p">:</span> <span class="c1"># [0.0, 0.1, ..., 1.0] &lt; 0.2 ??</span>
<span class="n">counter</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="n">position</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">-</span> <span class="n">position</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">counter</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Der Frosch war </span><span class="si">{</span><span class="n">counter</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="si">}</span><span class="s2"> mal links und </span><span class="si">{</span><span class="n">counter</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="si">}</span><span class="s2"> mal rechts&quot;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;In dezimal: </span><span class="si">{</span><span class="n">counter</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">/</span><span class="n">N</span><span class="si">:</span><span class="s2">.4f</span><span class="si">}</span><span class="s2"> und </span><span class="si">{</span><span class="n">counter</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">/</span><span class="n">N</span><span class="si">:</span><span class="s2">.4f</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
</pre></div>
</div>
@ -15260,34 +15242,12 @@ body[data-format='mobile'] .jp-OutputArea-child .jp-OutputArea-output {
</div>
</div>
<div class="jp-Cell-outputWrapper">
<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
</div>
<div class="jp-OutputArea jp-Cell-outputArea">
<div class="jp-OutputArea-child">
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain">
<pre>Der Frosch war 250283 mal links und 749718 mal rechts
In dezimal: 0.2503 und 0.7497
</pre>
</div>
</div>
</div>
</div>
</div><div id="cell-id=41017b8e" class="jp-Cell jp-CodeCell jp-Notebook-cell ">
<div class="jp-Cell-inputWrapper">
<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
</div>
<div class="jp-InputArea jp-Cell-inputArea">
<div class="jp-InputPrompt jp-InputArea-prompt">In&nbsp;[2]:</div>
<div class="jp-InputPrompt jp-InputArea-prompt">In&nbsp;[8]:</div>
<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
<div class="CodeMirror cm-s-jupyter">
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">visits</span> <span class="o">=</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span>
@ -15328,9 +15288,9 @@ In dezimal: 0.2503 und 0.7497
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain">
<pre>100000/100000
0.2509 of the time at 0 (25094 total)
0.7491 of the time at 1 (74906 total)
<pre>1000000/1000000
0.2501 of the time at 0 (250098 total)
0.7499 of the time at 1 (749902 total)
</pre>
</div>
</div>
@ -15848,7 +15808,7 @@ $$
<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
</div>
<div class="jp-InputArea jp-Cell-inputArea">
<div class="jp-InputPrompt jp-InputArea-prompt">In&nbsp;[29]:</div>
<div class="jp-InputPrompt jp-InputArea-prompt">In&nbsp;[19]:</div>
<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
<div class="CodeMirror cm-s-jupyter">
<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">numpy</span> <span class="kn">import</span> <span class="n">array</span>
@ -15859,8 +15819,8 @@ $$
<span class="p">[</span><span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.4</span><span class="p">]]</span>
<span class="p">)</span>
<span class="n">x0</span> <span class="o">=</span> <span class="p">[</span><span class="mi">1</span><span class="o">/</span><span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="o">/</span><span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="o">/</span><span class="mi">3</span><span class="p">]</span> <span class="c1"># regen</span>
<span class="n">x0</span> <span class="o">@</span> <span class="n">M</span> <span class="o">@</span> <span class="n">M</span> <span class="o">@</span> <span class="n">M</span> <span class="o">@</span> <span class="n">M</span> <span class="o">@</span> <span class="n">M</span> <span class="o">@</span> <span class="n">M</span> <span class="o">@</span> <span class="n">M</span> <span class="o">@</span> <span class="n">M</span> <span class="o">@</span> <span class="n">M</span> <span class="o">@</span> <span class="n">M</span> <span class="o">@</span> <span class="n">M</span> <span class="o">@</span> <span class="n">M</span> <span class="o">@</span> <span class="n">M</span> <span class="o">@</span> <span class="n">M</span>
<span class="n">x0</span> <span class="o">=</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span>
<span class="n">x0</span>
</pre></div>
</div>
@ -15876,13 +15836,13 @@ $$
<div class="jp-OutputArea jp-Cell-outputArea">
<div class="jp-OutputArea-child jp-OutputArea-executeResult">
<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[29]:</div>
<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[19]:</div>
<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain">
<pre>array([0.40909064, 0.34848494, 0.24242442])</pre>
<pre>[1, 0, 0]</pre>
</div>
</div>
@ -15896,11 +15856,12 @@ $$
<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
</div>
<div class="jp-InputArea jp-Cell-inputArea">
<div class="jp-InputPrompt jp-InputArea-prompt">In&nbsp;[34]:</div>
<div class="jp-InputPrompt jp-InputArea-prompt">In&nbsp;[21]:</div>
<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
<div class="CodeMirror cm-s-jupyter">
<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">numpy</span> <span class="kn">import</span> <span class="n">linalg</span>
<span class="n">x0</span> <span class="o">@</span> <span class="n">linalg</span><span class="o">.</span><span class="n">matrix_power</span><span class="p">(</span><span class="n">M</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span> <span class="c1"># schnelles Exponenzieren</span>
<span class="n">x0</span> <span class="o">=</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span>
<span class="n">x0</span> <span class="o">@</span> <span class="n">linalg</span><span class="o">.</span><span class="n">matrix_power</span><span class="p">(</span><span class="n">M</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span> <span class="c1"># schnelles Exponenzieren</span>
</pre></div>
</div>
@ -15916,13 +15877,13 @@ $$
<div class="jp-OutputArea jp-Cell-outputArea">
<div class="jp-OutputArea-child jp-OutputArea-executeResult">
<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[34]:</div>
<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[21]:</div>
<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain">
<pre>array([0.40909091, 0.34848485, 0.24242424])</pre>
<pre>array([0.40908903, 0.34848547, 0.2424255 ])</pre>
</div>
</div>
@ -15955,12 +15916,12 @@ $$<p>aus. Hierbei ist $\lambda != 0$ eine Zahl, die man auch <em>Eigenwert</em>
</div>
</div>
</div>
</div><div id="cell-id=9b76997c" class="jp-Cell jp-CodeCell jp-Notebook-cell ">
</div><div id="cell-id=9b76997c" class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs ">
<div class="jp-Cell-inputWrapper">
<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
</div>
<div class="jp-InputArea jp-Cell-inputArea">
<div class="jp-InputPrompt jp-InputArea-prompt">In&nbsp;[5]:</div>
<div class="jp-InputPrompt jp-InputArea-prompt">In&nbsp;[&nbsp;]:</div>
<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
<div class="CodeMirror cm-s-jupyter">
<div class=" highlight hl-ipython3"><pre><span></span><span class="c1"># linalg.eig berechnet Rechts-Eigenvektoren von Matrizen, wir suchen allerdings Links-Eigenvektoren</span>
@ -15986,29 +15947,6 @@ $$<p>aus. Hierbei ist $\lambda != 0$ eine Zahl, die man auch <em>Eigenwert</em>
</div>
</div>
<div class="jp-Cell-outputWrapper">
<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
</div>
<div class="jp-OutputArea jp-Cell-outputArea">
<div class="jp-OutputArea-child">
<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain">
<pre>0.9999999999999989
[0.40909091 0.34848485 0.24242424]
[0.40909091 0.34848485 0.24242424]
</pre>
</div>
</div>
</div>
</div>
</div>
<div id="cell-id=b7f0b716" class="jp-Cell jp-MarkdownCell jp-Notebook-cell">
<div class="jp-Cell-inputWrapper">
@ -16058,7 +15996,7 @@ Wie können wir aus den 7 Fröschen <strong>fair</strong> einen auswählen?</p>
<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
</div>
<div class="jp-InputArea jp-Cell-inputArea">
<div class="jp-InputPrompt jp-InputArea-prompt">In&nbsp;[45]:</div>
<div class="jp-InputPrompt jp-InputArea-prompt">In&nbsp;[28]:</div>
<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
<div class="CodeMirror cm-s-jupyter">
<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">numpy</span> <span class="kn">import</span> <span class="n">array</span>
@ -16080,7 +16018,7 @@ Wie können wir aus den 7 Fröschen <strong>fair</strong> einen auswählen?</p>
<span class="p">[</span> <span class="mf">0.0</span> <span class="p">,</span> <span class="mf">0.0</span> <span class="p">,</span> <span class="mf">0.0</span> <span class="p">,</span> <span class="mf">0.0</span> <span class="p">,</span> <span class="mf">0.0</span> <span class="p">,</span> <span class="mf">0.0</span> <span class="p">,</span> <span class="mf">0.0</span> <span class="p">,</span> <span class="mf">0.0</span> <span class="p">,</span> <span class="mf">0.0</span> <span class="p">,</span> <span class="mf">0.0</span> <span class="p">,</span> <span class="mf">0.0</span> <span class="p">,</span> <span class="mf">0.0</span> <span class="p">,</span> <span class="mf">0.0</span> <span class="p">,</span> <span class="mf">1.0</span><span class="p">]</span>
<span class="p">])</span>
<span class="n">v</span> <span class="o">=</span> <span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span> <span class="p">])</span>
<span class="n">v</span> <span class="o">@</span> <span class="n">linalg</span><span class="o">.</span><span class="n">matrix_power</span><span class="p">(</span><span class="n">M</span><span class="p">,</span> <span class="mi">3</span><span class="o">*</span><span class="mi">7</span><span class="p">)</span>
<span class="n">v</span> <span class="o">@</span> <span class="n">linalg</span><span class="o">.</span><span class="n">matrix_power</span><span class="p">(</span><span class="n">M</span><span class="p">,</span> <span class="mi">3</span><span class="o">*</span><span class="mi">100</span><span class="p">)</span>
</pre></div>
</div>
@ -16096,16 +16034,16 @@ Wie können wir aus den 7 Fröschen <strong>fair</strong> einen auswählen?</p>
<div class="jp-OutputArea jp-Cell-outputArea">
<div class="jp-OutputArea-child jp-OutputArea-executeResult">
<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[45]:</div>
<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[28]:</div>
<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain">
<pre>array([4.76837158e-07, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.42857075e-01,
1.42857075e-01, 1.42857075e-01, 1.42857075e-01, 1.42857075e-01,
1.42857075e-01, 1.42857075e-01])</pre>
<pre>array([4.90909347e-91, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.42857143e-01,
1.42857143e-01, 1.42857143e-01, 1.42857143e-01, 1.42857143e-01,
1.42857143e-01, 1.42857143e-01])</pre>
</div>
</div>
@ -16157,6 +16095,10 @@ Wie können wir aus den 7 Fröschen <strong>fair</strong> einen auswählen?</p>
<div class="alert alert-info">
<p>Mit dem <code>hp.txt</code>, generiere Harry Potter fanfiction! Ein brandneues Universum an Unsinn erwartet dich!</p>
</div>
<p><a href="https://dominic.leafbla.de/hp_parsed.txt">https://dominic.leafbla.de/hp_parsed.txt</a>
<a href="https://dominic.leafbla.de/forschungstage">https://dominic.leafbla.de/forschungstage</a></p>
<h3 id="Code-Beispiele">Code Beispiele<a class="anchor-link" href="#Code-Beispiele">&#182;</a></h3><p><a href="https://dominic.leafbla.de/crawler.py">https://dominic.leafbla.de/crawler.py</a> (Crawlt Wiki)
<a href="https://dominic.leafbla.de/markov.py">https://dominic.leafbla.de/markov.py</a> (Rechnet mit den Daten davon)</p>
</div>
</div>