"""S3 publish library."""
+import glob
import gzip
import logging
import os
import subprocess
import sys
import tempfile
-from mimetypes import MimeTypes
import boto3
from botocore.exceptions import ClientError
)
logging.getLogger(u"botocore").setLevel(logging.INFO)
-COMPRESS_MIME = (
- u"text/html",
- u"text/xml",
- u"text/plain",
- u"application/octet-stream"
-)
-
-def compress(src_fpath):
- """Compress a single file.
-
- :param src_fpath: Input file path.
- :type src_fpath: str
+FILE_TYPE = {
+ u"xml": u"application/xml",
+ u"html": u"text/html",
+ u"txt": u"text/plain",
+ u"log": u"text/plain",
+ u"css": u"text/css",
+ u"md": u"text/markdown",
+ u"rst": u"text/x-rst",
+ u"csv": u"text/csv",
+ u"svg": u"image/svg+xml",
+ u"jpg": u"image/jpeg",
+ u"png": u"image/png",
+ u"gif": u"image/gif",
+ u"js": u"application/javascript",
+ u"pdf": u"application/pdf",
+ u"json": u"application/json",
+ u"otf": u"font/otf",
+ u"ttf": u"font/ttf",
+ u"woff": u"font/woff",
+ u"woff2": u"font/woff2"
+}
+
+
+def compress_text(src_dpath):
+ """Compress all text files in directory.
+
+ :param src_dpath: Input dir path.
+ :type src_dpath: str
"""
- with open(src_fpath, u"rb") as orig_file:
- with gzip.open(src_fpath + ".gz", u"wb") as zipped_file:
- zipped_file.writelines(orig_file)
+ save_dir = os.getcwd()
+ os.chdir(src_dpath)
+
+ compress_types = [
+ "**/*.html",
+ "**/*.log",
+ "**/*.txt",
+ "**/*.xml",
+ "**/*.json"
+ ]
+ paths = []
+ for _type in compress_types:
+ search = os.path.join(src_dpath, _type)
+ paths.extend(glob.glob(search, recursive=True))
+
+ for _file in paths:
+ # glob may follow symlink paths that open can't find
+ if os.path.exists(_file):
+ gz_file = u"{}.gz".format(_file)
+ with open(_file, "rb") as src, gzip.open(gz_file, "wb") as dest:
+ shutil.copyfileobj(src, dest)
+ os.remove(_file)
+
+ os.chdir(save_dir)
def copy_archives(workspace):
raise RuntimeError(u"Not a directory.")
else:
logging.debug("Archives dir {} does exist.".format(archives_dir))
- for file_or_dir in os.listdir(archives_dir):
- f = os.path.join(archives_dir, file_or_dir)
+ for item in os.listdir(archives_dir):
+ src = os.path.join(archives_dir, item)
+ dst = os.path.join(dest_dir, item)
try:
- logging.debug(u"Copying " + f)
- shutil.copy(f, dest_dir)
+ if os.path.isdir(src):
+ shutil.copytree(src, dst, symlinks=False, ignore=None)
+ else:
+ shutil.copy2(src, dst)
except shutil.Error as e:
logging.error(e)
- raise RuntimeError(u"Could not copy " + f)
+ raise RuntimeError(u"Could not copy " + src)
else:
logging.error(u"Archives dir does not exist.")
raise RuntimeError(u"Missing directory " + archives_dir)
:type src_fpath: str
:type s3_path: str
"""
- mime_guess = MimeTypes().guess_type(src_fpath)
- mime = mime_guess[0]
- encoding = mime_guess[1]
- if not mime:
- mime = u"application/octet-stream"
-
- if u"logs" in s3_bucket:
- if mime in COMPRESS_MIME and encoding != u"gzip":
- compress(src_fpath)
- src_fpath = src_fpath + u".gz"
- s3_path = s3_path + u".gz"
+ def is_gzip_file(filepath):
+ with open(filepath, u"rb") as test_f:
+ return test_f.read(2) == b"\x1f\x8b"
+
+ if os.path.isdir(src_fpath):
+ return
+ if os.path.isfile(src_fpath):
+ file_name, file_extension = os.path.splitext(src_fpath)
+ content_encoding = u""
+ content_type = u"application/octet-stream"
+ if is_gzip_file(src_fpath):
+ file_name, file_extension = os.path.splitext(file_name)
+ content_encoding = "gzip"
+ content_type = FILE_TYPE.get(
+ file_extension.strip("."),
+ u"application/octet-stream"
+ )
- extra_args = {u"ContentType": mime}
+ extra_args = dict()
+ extra_args[u"ContentType"] = content_type
+ if content_encoding:
+ extra_args[u"ContentEncoding"] = content_encoding
try:
- logging.info(u"Attempting to upload file " + src_fpath)
s3_resource.Bucket(s3_bucket).upload_file(
src_fpath, s3_path, ExtraArgs=extra_args
)
:type s3_path: str
:type docs_dir: str
"""
- s3_resource = boto3.resource(u"s3")
+ try:
+ s3_resource = boto3.resource(
+ u"s3",
+ endpoint_url=os.environ[u"AWS_ENDPOINT_URL"]
+ )
+ except KeyError:
+ s3_resource = boto3.resource(
+ u"s3"
+ )
upload_recursive(
s3_resource=s3_resource,
:type build_url: str
:type workspace: str
"""
- s3_resource = boto3.resource(
- u"s3",
- endpoint_url=os.environ[u"AWS_ENDPOINT_URL"]
- )
+ try:
+ s3_resource = boto3.resource(
+ u"s3",
+ endpoint_url=os.environ[u"AWS_ENDPOINT_URL"]
+ )
+ except KeyError:
+ s3_resource = boto3.resource(
+ u"s3"
+ )
previous_dir = os.getcwd()
work_dir = tempfile.mkdtemp(prefix="backup-s3.")
with open(u"_build-details.log", u"w+") as f:
f.write(u"build-url: " + build_url)
- with open(u"_sys-info.log", u"w+") as f:
- sys_cmds = []
-
- logging.debug(u"Platform: " + sys.platform)
- if sys.platform == u"linux" or sys.platform == u"linux2":
- sys_cmds = [
- [u"uname", u"-a"],
- [u"lscpu"],
- [u"nproc"],
- [u"df", u"-h"],
- [u"free", u"-m"],
- [u"ip", u"addr"],
- [u"sar", u"-b", u"-r", u"-n", u"DEV"],
- [u"sar", u"-P", u"ALL"],
- ]
-
- for c in sys_cmds:
- try:
- output = subprocess.check_output(c).decode(u"utf-8")
- except FileNotFoundError:
- logging.debug(u"Command not found: " + c)
- continue
-
- cmd = u" ".join(c)
- output = u"---> " + cmd + "\n" + output + "\n"
- f.write(output)
- logging.info(output)
-
# Magic string used to trim console logs at the appropriate level during
# wget.
MAGIC_STRING = u"-----END_OF_BUILD-----"
six.text_type(resp.content.decode(u"utf-8").split(MAGIC_STRING)[0])
)
+ compress_text(work_dir)
+
upload_recursive(
s3_resource=s3_resource,
s3_bucket=s3_bucket,