diff options
author | Tibor Frank <tifrank@cisco.com> | 2018-05-21 08:50:42 +0200 |
---|---|---|
committer | Tibor Frank <tifrank@cisco.com> | 2018-05-24 11:47:06 +0000 |
commit | 372eab0eac428149d547b2d6eb2ce43cd0d750f6 (patch) | |
tree | d066fe7d4cffce1c061490365e982558b36fc979 /resources/tools/presentation/input_data_files.py | |
parent | 1a6941f8af88693f19ea5c4b372234cc7078f04e (diff) |
CSIT-1101: Optimize input data processing
- Add multiprocessing
Change-Id: Iaeb9dcf0f4523de379ecb7f775b941d9deb080b2
Signed-off-by: Tibor Frank <tifrank@cisco.com>
Diffstat (limited to 'resources/tools/presentation/input_data_files.py')
-rw-r--r-- | resources/tools/presentation/input_data_files.py | 136 |
1 files changed, 75 insertions, 61 deletions
diff --git a/resources/tools/presentation/input_data_files.py b/resources/tools/presentation/input_data_files.py index 9e0cfa6931..cde6d1acc4 100644 --- a/resources/tools/presentation/input_data_files.py +++ b/resources/tools/presentation/input_data_files.py @@ -16,11 +16,9 @@ Download all data. """ import re -import logging -from os import rename +from os import rename, mkdir from os.path import join -from shutil import move from zipfile import ZipFile, is_zipfile, BadZipfile from httplib import responses from requests import get, codes, RequestException, Timeout, TooManyRedirects, \ @@ -38,31 +36,33 @@ SEPARATOR = "__" REGEX_RELEASE = re.compile(r'(\D*)(\d{4}|master)(\D*)') -def _download_file(url, file_name): +def _download_file(url, file_name, log): """Download a file with input data. :param url: URL to the file to download. :param file_name: Name of file to download. + :param log: List of log messages. :type url: str :type file_name: str + :type log: list of tuples (severity, msg) :returns: True if the download was successful, otherwise False. :rtype: bool """ success = False try: - logging.info(" Connecting to '{0}' ...".format(url)) + log.append(("INFO", " Connecting to '{0}' ...".format(url))) response = get(url, stream=True) code = response.status_code - logging.info(" {0}: {1}".format(code, responses[code])) + log.append(("INFO", " {0}: {1}".format(code, responses[code]))) if code != codes["OK"]: return False - logging.info(" Downloading the file '{0}' to '{1}' ...". - format(url, file_name)) + log.append(("INFO", " Downloading the file '{0}' to '{1}' ...". + format(url, file_name))) file_handle = open(file_name, "wb") for chunk in response.iter_content(chunk_size=CHUNK_SIZE): @@ -71,38 +71,38 @@ def _download_file(url, file_name): file_handle.close() success = True except ConnectionError as err: - logging.error("Not possible to connect to '{0}'.".format(url)) - logging.debug(str(err)) + log.append(("ERROR", "Not possible to connect to '{0}'.".format(url))) + log.append(("DEBUG", str(err))) except HTTPError as err: - logging.error("Invalid HTTP response from '{0}'.".format(url)) - logging.debug(str(err)) + log.append(("ERROR", "Invalid HTTP response from '{0}'.".format(url))) + log.append(("DEBUG", str(err))) except TooManyRedirects as err: - logging.error("Request exceeded the configured number " - "of maximum re-directions.") - logging.debug(str(err)) + log.append(("ERROR", "Request exceeded the configured number " + "of maximum re-directions.")) + log.append(("DEBUG", str(err))) except Timeout as err: - logging.error("Request timed out.") - logging.debug(str(err)) + log.append(("ERROR", "Request timed out.")) + log.append(("DEBUG", str(err))) except RequestException as err: - logging.error("Unexpected HTTP request exception.") - logging.debug(str(err)) + log.append(("ERROR", "Unexpected HTTP request exception.")) + log.append(("DEBUG", str(err))) except (IOError, ValueError, KeyError) as err: - logging.error("Download failed.") - logging.debug(str(err)) + log.append(("ERROR", "Download failed.")) + log.append(("DEBUG", str(err))) - logging.info(" Download finished.") + log.append(("INFO", " Download finished.")) return success -def _unzip_file(spec, job, build): +def _unzip_file(spec, build, pid, log): """Unzip downloaded source file. :param spec: Specification read form the specification file. - :param job: Name of the Jenkins job. :param build: Information about the build. + :param log: List of log messages. :type spec: Specification - :type job: str :type build: dict + :type log: list of tuples (severity, msg) :returns: True if the download was successful, otherwise False. :rtype: bool """ @@ -110,44 +110,48 @@ def _unzip_file(spec, job, build): data_file = spec.input["extract"] file_name = build["file-name"] directory = spec.environment["paths"]["DIR[WORKING,DATA]"] + tmp_dir = join(directory, str(pid)) + try: + mkdir(tmp_dir) + except OSError: + pass new_name = "{0}{1}{2}".format(file_name.rsplit('.')[-2], SEPARATOR, data_file.split("/")[-1]) - logging.info(" Unzipping: '{0}' from '{1}'.". - format(data_file, file_name)) + + log.append(("INFO", " Unzipping: '{0}' from '{1}'.". + format(data_file, file_name))) try: with ZipFile(file_name, 'r') as zip_file: - zip_file.extract(data_file, directory) - logging.info(" Moving {0} to {1} ...". - format(join(directory, data_file), directory)) - move(join(directory, data_file), directory) - logging.info(" Renaming the file '{0}' to '{1}'". - format(join(directory, data_file.split("/")[-1]), - new_name)) - rename(join(directory, data_file.split("/")[-1]), - new_name) - spec.set_input_file_name(job, build["build"], - new_name) + zip_file.extract(data_file, tmp_dir) + log.append(("INFO", " Renaming the file '{0}' to '{1}'". + format(join(tmp_dir, data_file), new_name))) + rename(join(tmp_dir, data_file), new_name) + build["file-name"] = new_name return True except (BadZipfile, RuntimeError) as err: - logging.error("Failed to unzip the file '{0}': {1}.". - format(file_name, str(err))) + log.append(("ERROR", "Failed to unzip the file '{0}': {1}.". + format(file_name, str(err)))) return False except OSError as err: - logging.error("Failed to rename the file '{0}': {1}.". - format(data_file, str(err))) + log.append(("ERROR", "Failed to rename the file '{0}': {1}.". + format(data_file, str(err)))) return False -def download_and_unzip_data_file(spec, job, build): +def download_and_unzip_data_file(spec, job, build, pid, log): """Download and unzip a source file. :param spec: Specification read form the specification file. :param job: Name of the Jenkins job. :param build: Information about the build. + :param pid: PID of the process executing this method. + :param log: List of log messages. :type spec: Specification :type job: str :type build: dict + :type pid: int + :type log: list of tuples (severity, msg) :returns: True if the download was successful, otherwise False. :rtype: bool """ @@ -158,7 +162,7 @@ def download_and_unzip_data_file(spec, job, build): elif spec.input["file-name"].endswith(".gz"): url = spec.environment["urls"]["URL[NEXUS,LOG]"] else: - logging.error("Not supported file format.") + log.append(("ERROR", "Not supported file format.")) return False elif job.startswith("hc2vpp-"): url = spec.environment["urls"]["URL[JENKINS,HC]"] @@ -174,27 +178,35 @@ def download_and_unzip_data_file(spec, job, build): format(job=job, sep=SEPARATOR, build=build["build"], name=file_name)) # Download the file from the defined source (Jenkins, logs.fd.io): - success = _download_file(url, new_name) + success = _download_file(url, new_name, log) + + if success and new_name.endswith(".zip"): + if not is_zipfile(new_name): + success = False # If not successful, download from docs.fd.io: if not success: - logging.info(" Trying to download from https://docs.fd.io:") + log.append(("INFO", " Trying to download from https://docs.fd.io:")) release = re.search(REGEX_RELEASE, job).group(2) - nexus_file_name = "{job}{sep}{build}{sep}{name}". \ - format(job=job, sep=SEPARATOR, build=build["build"], name=file_name) - try: - release = "rls{0}".format(int(release)) - except ValueError: - pass - url = "{url}/{release}/{dir}/{file}". \ - format(url=spec.environment["urls"]["URL[NEXUS]"], - release=release, - dir=spec.environment["urls"]["DIR[NEXUS]"], - file=nexus_file_name) - success = _download_file(url, new_name) + for rls in (release, "master"): + nexus_file_name = "{job}{sep}{build}{sep}{name}". \ + format(job=job, sep=SEPARATOR, build=build["build"], + name=file_name) + try: + rls = "rls{0}".format(int(rls)) + except ValueError: + pass + url = "{url}/{release}/{dir}/{file}". \ + format(url=spec.environment["urls"]["URL[NEXUS]"], + release=rls, + dir=spec.environment["urls"]["DIR[NEXUS]"], + file=nexus_file_name) + success = _download_file(url, new_name, log) + if success: + break if success: - spec.set_input_file_name(job, build["build"], new_name) + build["file-name"] = new_name else: return False @@ -205,12 +217,14 @@ def download_and_unzip_data_file(spec, job, build): else: rename(new_name, new_name[:-3]) execute_command("gzip --keep {0}".format(new_name[:-3])) - spec.set_input_file_name(job, build["build"], new_name[:-3]) + build["file-name"] = new_name[:-3] if new_name.endswith(".zip"): if is_zipfile(new_name): - return _unzip_file(spec, job, build) + return _unzip_file(spec, build, pid, log) else: + log.append(("ERROR", + "Zip file '{0}' is corrupted.".format(new_name))) return False else: return True |