Source code for tensorpack.utils.fs

# -*- coding: utf-8 -*-
# File: fs.py


import errno
import os
import tqdm
from six.moves import urllib

from . import logger
from .utils import execute_only_once

__all__ = ['mkdir_p', 'download', 'recursive_walk', 'get_dataset_path', 'normpath']


[docs]def mkdir_p(dirname): """ Like "mkdir -p", make a dir recursively, but do nothing if the dir exists Args: dirname(str): """ assert dirname is not None if dirname == '' or os.path.isdir(dirname): return try: os.makedirs(dirname) except OSError as e: if e.errno != errno.EEXIST: raise e
[docs]def download(url, dir, filename=None, expect_size=None): """ Download URL to a directory. Will figure out the filename automatically from URL, if not given. """ mkdir_p(dir) if filename is None: filename = url.split('/')[-1] fpath = os.path.join(dir, filename) if os.path.isfile(fpath): if expect_size is not None and os.stat(fpath).st_size == expect_size: logger.info("File {} exists! Skip download.".format(filename)) return fpath else: logger.warn("File {} exists. Will overwrite with a new download!".format(filename)) def hook(t): last_b = [0] def inner(b, bsize, tsize=None): if tsize is not None: t.total = tsize t.update((b - last_b[0]) * bsize) last_b[0] = b return inner try: with tqdm.tqdm(unit='B', unit_scale=True, miniters=1, desc=filename) as t: fpath, _ = urllib.request.urlretrieve(url, fpath, reporthook=hook(t)) statinfo = os.stat(fpath) size = statinfo.st_size except IOError: logger.error("Failed to download {}".format(url)) raise assert size > 0, "Downloaded an empty file from {}!".format(url) if expect_size is not None and size != expect_size: logger.error("File downloaded from {} does not match the expected size!".format(url)) logger.error("You may have downloaded a broken file, or the upstream may have modified the file.") # TODO human-readable size logger.info('Succesfully downloaded ' + filename + ". " + str(size) + ' bytes.') return fpath
[docs]def recursive_walk(rootdir): """ Yields: str: All files in rootdir, recursively. """ for r, dirs, files in os.walk(rootdir): for f in files: yield os.path.join(r, f)
[docs]def get_dataset_path(*args): """ Get the path to some dataset under ``$TENSORPACK_DATASET``. Args: args: strings to be joined to form path. Returns: str: path to the dataset. """ d = os.environ.get('TENSORPACK_DATASET', None) if d is None: d = os.path.join(os.path.expanduser('~'), 'tensorpack_data') if execute_only_once(): logger.warn("Env var $TENSORPACK_DATASET not set, using {} for datasets.".format(d)) if not os.path.isdir(d): mkdir_p(d) logger.info("Created the directory {}.".format(d)) assert os.path.isdir(d), d return os.path.join(d, *args)
[docs]def normpath(path): """ Normalizes a path to a folder by taking into consideration remote storages like Cloud storaged referenced by '://' at the beginning of the path. Args: args: path to be normalized. Returns: str: normalized path. """ return path if '://' in path else os.path.normpath(path)
if __name__ == '__main__': download('http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz', '.')