Source code for cluster.data.data_node_image

import os
import zipfile
import numpy as np
from PIL import Image, ImageFilter
from cluster.data.data_node import DataNode
from master.workflow.data.workflow_data_image import WorkFlowDataImage
from time import gmtime, strftime
from common import utils
from common.utils import *
import shutil
import tensorflow as tf
from third_party.yolo.yolo.net.yolo_tiny_net import YoloTinyNet
import cv2
import requests
import logging

[docs]class DataNodeImage(DataNode):
    """
    """
    # yolo
[docs]    def get_confirm_token(self, response):
        for key, value in response.cookies.items():
            if key.startswith('download_warning'):
                return value

        return None

[docs]    def save_response_content(self, response, destination):
        CHUNK_SIZE = 32768

        with open(destination, "wb") as f:
            for chunk in response.iter_content(CHUNK_SIZE):
                if chunk:  # filter out keep-alive new chunks
                    f.write(chunk)

[docs]    def download_file_from_google_drive(self, URL, destination):
        session = requests.Session()

        response = session.get(URL, params={'id': 1}, stream=True)
        token = self.get_confirm_token(response)

        if token:
            params = {'id': 1, 'confirm': token}
            response = session.get(URL, params=params, stream=True)

        self.save_response_content(response, destination)

[docs]    def process_predicts(self, predicts):
        p_classes = predicts[0, :, :, 0:20]
        C = predicts[0, :, :, 20:22]
        coordinate = predicts[0, :, :, 22:]

        p_classes = np.reshape(p_classes, (7, 7, 1, 20))
        C = np.reshape(C, (7, 7, 2, 1))

        P = C * p_classes

        index = np.argmax(P)

        index = np.unravel_index(index, P.shape)

        class_num = index[3]

        coordinate = np.reshape(coordinate, (7, 7, 2, 4))

        max_coordinate = coordinate[index[0], index[1], index[2], :]

        xcenter = max_coordinate[0]
        ycenter = max_coordinate[1]
        w = max_coordinate[2]
        h = max_coordinate[3]

        xcenter = (index[1] + xcenter) * (self.x_size / 7.0)
        ycenter = (index[0] + ycenter) * (self.y_size / 7.0)

        w = w * self.x_size
        h = h * self.y_size

        xmin = xcenter - w / 2.0
        ymin = ycenter - h / 2.0

        xmax = xmin + w
        ymax = ymin + h

        return xmin, ymin, xmax, ymax, class_num

[docs]    def yolo_detection(self):
        # logging.info("run yolo")
        set_filepaths(self.output_yolo)
        common_params = {'image_size': self.x_size, 'num_classes': 20, 'batch_size': 1}
        net_params = {'cell_size': 7, 'boxes_per_cell': 2, 'weight_decay': 0.0005}

        net = YoloTinyNet(common_params, net_params, test=True)

        img = tf.placeholder(tf.float32, (1, self.x_size, self.y_size, self.channel))
        predicts = net.inference(img)
        saver = tf.train.Saver(net.trainable_collection)
        return saver, predicts, img

    # image convert
[docs]    def image_convert(self, sess, dataconf, img, filename, forder=None):
        set_flag = "N"
        if forder == None:# forder None = predict call
            set_flag = "Y"
            forder = "tmp"
        else:# forder exist = make hdf5
            if self.set_flag == "N":
                self.set_flag = "Y"
                set_flag = "Y"

        if set_flag == "Y":
            self.x_size = dataconf["preprocess"]["x_size"]
            self.y_size = dataconf["preprocess"]["y_size"]
            self.channel = dataconf["preprocess"]["channel"]
            self.directory = dataconf["source_path"]
            self.output_yolo = dataconf["source_path"] + "_yolo"
            self.model_yolo = get_yolo_path()
            self.yolo_tiny = self.model_yolo + '/yolo_tiny.ckpt'
            self.yolo_face = self.model_yolo + '/YOLO_face.tar.gz'
            self.yolo_model = self.yolo_tiny
            self.tiny_url = 'https://drive.google.com/uc?id=0B-yiAeTLLamRekxqVE01Yi1RRlk&export=download'
            self.face_url = "https://drive.google.com/uc?id=0B2JbaJSrWLpzMzR5eURGN2dMTk0&export=download"
            try:
                self.yolo = dataconf["preprocess"]["yolo"]
                if self.yolo == "Y" or self.yolo == "y":
                    if os.path.isfile(self.yolo_model):
                        None
                    else:  # yolo_tiny down :
                        try:
                            self.download_file_from_google_drive(self.tiny_url, self.yolo_tiny)
                            self.download_file_from_google_drive(self.face_url, self.yolo_face)
                        except:
                            logging.info("Error : yolo_tiny,ckpt down.")

                    saver, self.predicts, self.img_ph = self.yolo_detection()
                    saver.restore(sess, self.yolo_model)
            except:
                self.yolo = "N"

        # png -> jpg
        pngidx = str(type(img)).find("PngImageFile")
        if pngidx > -1:
            img = img.convert("RGBA")
            bg = Image.new("RGBA", img.size, (255, 255, 255))
            bg.paste(img, (0, 0), img)
            filename = "Conv_" + str(filename)
            bg.save(self.directory + '/' + forder + '/' + filename)
            img = Image.open(self.directory + '/' + forder + '/' + filename)

        # grey color
        if self.channel == 1:
            img = img.convert('L')

        # image cropping
        longer_side = max(img.size)
        horizontal_padding = (longer_side - img.size[0]) / 2
        vertical_padding = (longer_side - img.size[1]) / 2
        img = img.crop(
            (
                -horizontal_padding,
                -vertical_padding,
                img.size[0] + horizontal_padding,
                img.size[1] + vertical_padding
            )
        )

        # set_filepaths(self.output_yolo + '/' + forder)
        # img.save(self.output_yolo + '/' + forder + '/' + filename)

        # image resize
        img = img.resize((self.x_size, self.y_size), Image.ANTIALIAS)
        img = np.array(img)

        # yolo
        if self.yolo == "Y" or self.yolo == "y":
            if self.x_size<385 or self.y_size<385:
                logging.info("Error : The Yolo x_size or y_size must be greater than 385 pixel")

            else:
                try:
                    resized_img = cv2.resize(img, (self.x_size, self.y_size))
                    img = np.array(img)

                    y_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
                    y_img = y_img.astype(np.float32)
                    y_img = y_img / 255.0 * 2 - 1
                    y_img = np.reshape(y_img, (1, self.x_size, self.y_size, self.channel))
                    np_predict = sess.run(self.predicts, feed_dict={self.img_ph: y_img})
                    xmin, ymin, xmax, ymax, class_num = self.process_predicts(np_predict)
                    resized_img = resized_img[int(ymin):int(ymax), int(xmin):int(xmax)]

                    if self.yolo == "y":
                        set_filepaths(self.output_yolo + '/' + forder)
                        np_img = Image.fromarray(resized_img)
                        np_img.save(self.output_yolo + '/' + forder + '/' + filename)
                        img = cv2.resize(resized_img, (self.x_size, self.y_size))
                except Exception as e:
                    print("yolo file save error......................................." + str(filename))
                    print(e)

        return sess, img

[docs]    def run(self, conf_data):
        try:
            logging.info("run DataNodeImage")
            nnid = conf_data['nn_id']
            node_id = conf_data['node_id']
            wf_ver = conf_data['wf_ver']
            net_conf_id = self._find_netconf_node_id(nnid, wf_ver = wf_ver)
            netconf = WorkFlowDataImage().get_step_source(net_conf_id)
            dataconf = WorkFlowDataImage().get_step_source(node_id)
            if dataconf == {}:
                logging.info("/cluster/data/data_node_image DataNodeImage run dataconf("+node_id+") is not Exist")
                return
            else:
                logging.info(node_id)

            directory = dataconf["source_path"]
            output_directory = dataconf["store_path"]
            self.set_flag = "N"

            output_filename = strftime("%Y-%m-%d-%H:%M:%S", gmtime())
            output_path = os.path.join(output_directory, output_filename)

            labels = netconf['labels']
            try:
                filesize = dataconf["preprocess"]["filesize"]
            except:
                filesize = 1000000

            # unzip & remove zip
            ziplist = os.listdir(directory)
            for zipname in ziplist:
                if zipname.find(".zip") > -1:
                    print("Zip=" + zipname)
                    fantasy_zip = zipfile.ZipFile(directory + '/' + zipname)
                    fantasy_zip.extractall(directory)
                    fantasy_zip.close()
                    os.remove(directory + "/" + zipname)

            forderlist = os.listdir(directory)
            forderlist.sort()

            filecnt = 0
            image_arr = []
            lable_arr = []
            shape_arr = []
            name_arr = []
            processcnt = 1
            createcnt = 1
            tf.reset_default_graph()

            with tf.Session() as sess:
                for forder in forderlist:
                    try:
                        filelist = os.listdir(directory + '/' + forder)
                    except Exception as e:
                        logging.info(e)
                        continue

                    for filename in filelist:
                        try:
                            #PNG -> JPEG
                            img = Image.open(directory + '/' + forder + '/' + filename)
                            sess, img = self.image_convert(sess, dataconf, img, filename, forder)

                            img = img.reshape([-1, self.x_size, self.y_size, self.channel])
                            img = img.flatten()

                            image_arr.append(img)
                            shape_arr.append(img.shape)
                            lable_arr.append(forder.encode('utf8'))
                            name_arr.append(filename.encode('utf8'))
                            filecnt += 1

                            if filecnt >= filesize :
                                output_path_sub = output_path+"_"+str(createcnt)
                                hdf_create(self, output_path_sub, filecnt, self.channel, image_arr, shape_arr, lable_arr, name_arr)

                                filecnt = 0
                                image_arr = []
                                lable_arr = []
                                shape_arr = []
                                name_arr = []
                                createcnt += 1

                            print("Processcnt="+ str(processcnt) + " File=" + directory + " forder=" + forder + "  name=" + filename)
                        except:
                            print("Processcnt="+ str(processcnt) + " ErrorFile=" + directory + " forder=" + forder + "  name=" + filename)
                        processcnt += 1
                    shutil.rmtree(self.directory + "/" + forder)
                    try:
                        idx = labels.index(forder)
                    except:
                        labels.append(forder)

            if filecnt > 0:
                output_path_sub = output_path + "_" + str(createcnt)
                hdf_create(self, output_path_sub, filecnt, self.channel, image_arr, shape_arr, lable_arr, name_arr)

            netconf["labels"] = labels
            WorkFlowDataImage().put_step_source_ori(net_conf_id, netconf)

            return None

        except Exception as e:
            logging.info(e)
            raise Exception(e)



    def _init_node_parm(self, node_id):
        return None

    def _set_progress_state(self):
        return None

[docs]    def load_data(self, node_id="", parm = 'all'):
        dataconf = WorkFlowDataImage().get_step_source(node_id)
        output_directory = dataconf["store_path"]
        return  utils.get_filepaths(output_directory)