Commit 21423e5f authored by David Michael Barr's avatar David Michael Barr
Browse files

doc: ingest archive for rate-control regression

parent 2e673191
......@@ -7,10 +7,12 @@
from glob import glob
from matplotlib import pyplot as plt
import numpy as np
from pprint import pprint
import tarfile
from tqdm import tqdm_notebook
# Klotz, Jerome H. "UPDATING SIMPLE LINEAR REGRESSION."
# Statistica Sinica 5, no. 1 (1995): 399-403.
# http://www.jstor.org/stable/24305577
def online_simple_regression(accumulator, x, y):
......@@ -59,16 +61,22 @@
def aggregate(queues, partials):
for fti, queue in queues.items():
x, y = np.concatenate(queue[0]), np.concatenate(queue[1])
partials[fti] = online_simple_regression(partials.get(fti, None), x, y)
queues.clear()
partials = dict()
for base in sorted(glob('*.y4m')):
queues = dict()
for q in range(1, 256):
collect('%s/%d.txt' % (base, q), queues)
# https://ba.rr-dav.id.au/data/rav1e/rc-data.tar.xz
with tarfile.open('rc-data.tar.xz', 'r:xz') as tf:
queues, last_name = dict(), None
for ti in tqdm_notebook(tf, total=1077*255, leave=False):
name = ti.name.split('/')[0]
if last_name and name != last_name:
aggregate(queues, partials)
last_name = name
collect(tf.extractfile(ti), queues)
aggregate(queues, partials)
pprint(partials)
```
%% Cell type:code id: tags:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment