notebook
This commit is contained in:
+52
-14
@@ -149,6 +149,50 @@ def filer_url(filer, seaweed_path):
|
||||
return filer.rstrip("/") + "/" + enc
|
||||
|
||||
|
||||
SLICE = 16 << 20 # 16 MB na Range usek (plny GET fileru je patologicky pomaly)
|
||||
|
||||
|
||||
def download_resumable(url, dst, size, retries=8):
|
||||
"""Stahuje po Range usecich do .part (filer servíruje plny GET ~50x pomaleji).
|
||||
Pri vypadku navaze tam, kde skoncil. Vraci (ok, posledni_chyba)."""
|
||||
tmp = dst.with_suffix(".part")
|
||||
have = tmp.stat().st_size if tmp.exists() else 0
|
||||
if have > size: # poskozeny zbytek -> od zacatku
|
||||
tmp.unlink()
|
||||
have = 0
|
||||
last = None
|
||||
fails = 0
|
||||
while have < size:
|
||||
end = min(have + SLICE, size) - 1
|
||||
try:
|
||||
with requests.get(url, headers={"Range": f"bytes={have}-{end}"},
|
||||
stream=True, timeout=(15, 90)) as r:
|
||||
if r.status_code not in (206, 200):
|
||||
r.raise_for_status()
|
||||
if r.status_code == 200: # filer ignoroval Range -> cely soubor
|
||||
tmp.unlink(missing_ok=True)
|
||||
with open(tmp, "wb") as f:
|
||||
for chunk in r.iter_content(1 << 20):
|
||||
f.write(chunk)
|
||||
break
|
||||
with open(tmp, "ab") as f:
|
||||
for chunk in r.iter_content(1 << 20):
|
||||
f.write(chunk)
|
||||
have = tmp.stat().st_size
|
||||
fails = 0 # usek prosel -> reset retry
|
||||
except Exception as e:
|
||||
last = e
|
||||
fails += 1
|
||||
if fails > retries:
|
||||
break
|
||||
time.sleep(min(2 ** fails, 20))
|
||||
have = tmp.stat().st_size if tmp.exists() else 0
|
||||
ok = tmp.exists() and tmp.stat().st_size == size
|
||||
if ok:
|
||||
tmp.replace(dst)
|
||||
return ok, last
|
||||
|
||||
|
||||
def build_plan(db):
|
||||
kurzy = {k["_id"]: k for k in db.kurzy.find({})}
|
||||
vids = list(db.materialy.find(
|
||||
@@ -231,24 +275,18 @@ def main():
|
||||
skipped += 1
|
||||
continue
|
||||
print(f"[{n}/{len(plan)}] ↓ {size/1024**2:.1f}MB {fn}", flush=True)
|
||||
try:
|
||||
url = filer_url(args.filer, path)
|
||||
ts = time.time()
|
||||
# timeout=(connect, read) -> zaseknute spojeni spadne rychle
|
||||
with requests.get(url, stream=True, timeout=(15, 90)) as r:
|
||||
r.raise_for_status()
|
||||
tmp = dst.with_suffix(".part")
|
||||
with open(tmp, "wb") as f:
|
||||
for chunk in r.iter_content(1 << 20):
|
||||
f.write(chunk)
|
||||
tmp.replace(dst)
|
||||
url = filer_url(args.filer, path)
|
||||
ts = time.time()
|
||||
ok, err = download_resumable(url, dst, size)
|
||||
if ok:
|
||||
done += 1
|
||||
dl_bytes += size
|
||||
sp = size / 1024**2 / max(time.time() - ts, 0.1)
|
||||
msg = f"[{n}/{len(plan)}] OK {size/1024**2:.1f}MB ({sp:.1f} MB/s) {fn}"
|
||||
except Exception as e:
|
||||
msg = (f"[{n}/{len(plan)}] OK {size/1024**2:.1f}MB "
|
||||
f"({sp:.1f} MB/s) {fn}")
|
||||
else:
|
||||
failed += 1
|
||||
msg = f"[{n}/{len(plan)}] FAIL {fn} :: {e}"
|
||||
msg = f"[{n}/{len(plan)}] FAIL {fn} :: {err}"
|
||||
print(msg, flush=True)
|
||||
log.write(msg + "\n")
|
||||
log.flush()
|
||||
|
||||
Reference in New Issue
Block a user