Machine Learning

Serving

func paint(path string) (probability float32, index int, category string ){

	float32s := processImage(path)

	model := LoadModel("/home/jiaanguo/codespace/python/paint/models/final/", []string{"serve"}, nil)
	fakeInput, _ := tf.NewTensor(float32s)

	if err := fakeInput.Reshape([]int64{1, 64, 64, 1}); err != nil {
		return 0, 0, ""
	}
	results := model.Exec([]tf.Output{
		model.Op("StatefulPartitionedCall", 0),
	}, map[tf.Output]*tf.Tensor{
		model.Op("serving_default_input_2", 0): fakeInput,
	})

	predictions := results[0].Value().([][]float32)[0]
	indexMap := readJson("/home/jiaanguo/codespace/python/paint/bin/classes.json")
	maxP, maxI, maxC := float32(0.0), 0, ""
	for i, p := range predictions {
		if p > maxP {
			maxP = p
			maxI = i
			maxC = indexMap[i]
		}
	}
	//glog.Infoln(predictions)
	glog.Infof("Max P %f\n", maxP)
	glog.Infof("Max I %d\n", maxI)
	glog.Infof("Max C %s\n", maxC)
	return maxP, maxI, maxC
}

// Model represents a trained model
type Model struct {
	saved *tf.SavedModel
}

func LoadModel(modelPath string, modelNames []string, options *tf.SessionOptions) (model *Model) {
	var err error
	model = new(Model)
	model.saved, err = tf.LoadSavedModel(modelPath, modelNames, options)
	if err != nil {
		glog.Errorf("LoadSavedModel(): %v", err)
	}

	//log.Println("List possible ops in graphs")
	//for _, operation := range model.saved.Graph.Operations() {
	//	log.Printf("Op name: %v", operation.Name())
	//}
	return model
}

// Exec executes the nodes/tensors that must be present in the loaded model
// feedDict values to feed to placeholders (that must have been saved in the model definition)
// panics on error
func (model *Model) Exec(tensors []tf.Output, feedDict map[tf.Output]*tf.Tensor) (results []*tf.Tensor) {
	var err error
	if results, err = model.saved.Session.Run(feedDict, tensors, nil); err == nil {
		return results
	}
	panic(err)
}

// Op extracts the output in position idx of the tensor with the specified name from the model graph
func (model *Model) Op(name string, idx int) tf.Output {
	op := model.saved.Graph.Operation(name)
	if op == nil {
		glog.Errorf("op %s not found", name)
	}
	nout := op.NumOutputs()
	if nout <= idx {
		glog.Errorf("op %s has %d outputs. Requested output number %d", name, nout, idx)
	}
	return op.Output(idx)
}

func readJson(path string) map[int]string {
	indexMap := make(map[int]string)
	classMap := make(map[string]int)

	str, err := ioutil.ReadFile(path)
	if err != nil {
		glog.Errorf("File not found: %s", path)
	}

	err = json.Unmarshal(str, &classMap)
	if err != nil {
		glog.Errorf("Unable to marshal: %v", err)
	}
	for k, v := range classMap {
		indexMap[v] = k
	}
	return indexMap
}

func processImage(path string) [4096]float32 {
	filename := path
	srcImg := gocv.IMRead(filename, gocv.IMReadColor)

	if srcImg.Empty() {
		glog.Errorf("Error reading image from: %v\n", filename)
		return [4096]float32{}
	}
	defer srcImg.Close()
	dstImg0 := gocv.NewMat()
	dstImg1 := gocv.NewMat()
	dstImg2 := gocv.NewMat()
	dstImg3 := gocv.NewMat()
	dstImg4 := gocv.NewMat()
	dstImg5 := gocv.NewMat()

	defer dstImg0.Close()
	defer dstImg1.Close()
	defer dstImg2.Close()
	defer dstImg3.Close()
	defer dstImg4.Close()
	defer dstImg5.Close()

	maxY := srcImg.Size()[0]
	maxX := srcImg.Size()[1]

	// crop a square matrix
	dstImg0 = srcImg.Region(image.Rect(0, maxY/2 - maxX/2, maxX, maxY/2 + maxX/2))

	// resize to dataset size
	size1 := image.Point{X:256.0,Y:256.0}
	gocv.Resize(dstImg0, &dstImg1, size1, 0, 0, gocv.InterpolationArea)

	// resize to network size
	gocv.CvtColor(dstImg1, &dstImg2, gocv.ColorRGBToGray)
	gocv.BitwiseNot(dstImg2, &dstImg3)
	size2 := image.Point{X:64.0,Y:64.0}
	gocv.Resize(dstImg3, &dstImg4, size2,0, 0, gocv.InterpolationLanczos4)
	gocv.Threshold(dstImg4, &dstImg5, 50, 255, gocv.ThresholdBinary | gocv.ThresholdOtsu)

	//glog.Infoln(dstImg5.Type())
	uint8s, err := dstImg5.DataPtrUint8()
	if err != nil {
		glog.Errorf("Unable to extract floats: %v", err)
	}
	float32s := [4096]float32{}
	for i, v := range uint8s {
		float32s[i] = float32(v) / 255.0
	}

	//glog.Infoln(float32s)
	gocv.IMWrite("resized0.png", dstImg0)
	gocv.IMWrite("resized1.png", dstImg1)
	gocv.IMWrite("resized2.png", dstImg2)
	gocv.IMWrite("resized3.png", dstImg3)
	gocv.IMWrite("resized4.png", dstImg4)
	gocv.IMWrite("resized5.png", dstImg5)

	return float32s
}

Training

DataSet Google QuickDraw

Code (Accuracy only reaches 61%, still need a lot of improvements later.)

Some preprocessing code adapted from here

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']


############################### Global Variable ###############################
name = "/home/jiaanguo/codespace/python/paint/models"
path = "/home/jiaanguo/codespace/python/paint/bin"
G = 1


################################################################################
def unpack_drawing(file_handle):
    key_id, = unpack('Q', file_handle.read(8))
    countrycode, = unpack('2s', file_handle.read(2))
    recognized, = unpack('b', file_handle.read(1))
    timestamp, = unpack('I', file_handle.read(4))
    n_strokes, = unpack('H', file_handle.read(2))
    image = []
    for i in range(n_strokes):
        n_points, = unpack('H', file_handle.read(2))
        fmt = str(n_points) + 'B'
        x = unpack(fmt, file_handle.read(n_points))
        y = unpack(fmt, file_handle.read(n_points))
        image.append((x, y))

    return {
        'key_id': key_id,
        'countrycode': countrycode,
        'recognized': recognized,
        'timestamp': timestamp,
        'image': image
    }


def distance(a, b):
    return np.power((np.power((a[0] - b[0]), 2) + np.power((a[1] - b[1]), 2)), 1. / 2)


def norm(image):
    return image.astype('float32') / 255.


def global_min_max(coords):
    x, y = [], []
    # coords [((x1, x2, x3), (y1, y2, y3)), ((x1, x2), (y1, y2)) ... ]
    for i in range(len(coords)):
        # x = [x1, x2, x3] min max
        x.append(int(min(coords[i][0])))
        x.append(int(max(coords[i][0])))

        # y = [y1, y2, y3] min max
        y.append(int(min(coords[i][1])))
        y.append(int(max(coords[i][1])))

    # global min: min of min, global max: max of max
    return min(x), max(x), min(y), max(y)


class QDPrep:

    def __init__(self, path, to_drop, random_state=42, chunk_size=64, max_dataset_size=1000000, trsh=100, normed=True,
                 train_portion=0.9, k=0.05, min_points=10, min_edges=3, dot_size=3, offset=5, img_size=(64, 64)):

        # pseudo random number generator
        self.prng = RandomState(random_state)

        # dot_size = 3
        self.dot_size = dot_size
        # offset 5 + 3 // 2  (// return integer)
        self.offset = offset + dot_size // 2
        # thre 100
        self.trsh = trsh

        # normalisation = true
        self.normed = normed
        # image size = (64, 64)
        self.img_size = img_size
        # max_dataset_size = 1,000,000 (1 million)
        self.max_dataset_size = max_dataset_size
        # train_portion = 1,000,000 * 0.9
        self.train_portion = int(max_dataset_size * train_portion)

        # min_edges = 3
        self.min_edges = min_edges
        # min_points = 3
        self.min_points = min_points

        # path to ? = /home/shapes/first_level/quickdraw
        # /home/jiaanguo/codespace/python/paint/bin
        self.path = path
        # k = 0.05
        self.k = k
        # chunk_size = 64
        self.chunk_size = chunk_size

        # ['train', 'bottlecap', 'beard', 'dishwasher', 'The Mona Lisa', 'sun', 'shovel', ... ] 345 classes
        # glob.glob() 返回所有匹配的文件路径列表。
        self.classes = [f.split('/')[-1].split('.')[0] for f in glob.glob(os.path.join(self.path, '*.bin'))]

        # drop unwanted classes
        self.classes = {k: i for i, k in enumerate(self.classes) if k not in to_drop}

        # images per class 1,000,000 // 345 = 2898 (// return integer)
        self.images_per_class = max_dataset_size // len(self.classes)

        # {'train':0, 'bottlecap':1, 'beard':2, 'dishwasher':3, 'The Mona Lisa':4, 'sun':5, ... } key-class, value-index
        with open(self.path + '/classes.json', 'w') as f:
            json.dump(self.classes, f)

        self.names = []
        self.binaries = {}
        for key in tqdm(self.classes, desc='read classes binaries', ascii=True):
            # unpack images_per_class drawings for each class
            # for each drawing, represented by lines
            # i['image'] = [((53, 56), (255, 110)), ((56, 61, 4, 0, 28, 75, 182, 187), (255, 97, 91, 35, 2, 0, 9, 18))]
            self.binaries[key] = [i['image'] for i in list(self.unpack_drawings('%s/%s.bin' % (self.path, key)))]
            # ['train_0', 'train_1', 'train_2', ... 'train_images_per_class-1']
            self.names.extend([key + '_' + str(i) for i in range(len(self.binaries[key]))])
        # shuffle names
        self.prng.shuffle(self.names)
        print(" [INFO] %s files & %s classes prepared" % (len(self.names), len(self.classes)))
        # [INFO] 1000155 files & 345 classes prepared

    # unpack images_per_class drawings from *.bin file
    def unpack_drawings(self, filename):
        with open(filename, 'rb') as f:
            i = 0
            while i <= self.images_per_class:
                i += 1
                try:
                    yield unpack_drawing(f)
                except struct.error:
                    break

    def OHE(self, y):
        if type(y) != int:
            ohe = np.zeros((len(y), len(self.classes)))
            ohe[np.arange(len(y)), y.astype('int64')] = 1
        else:
            ohe = np.zeros(len(self.classes))
            ohe[y] = 1
        return ohe

    def quickdraw_coords2img(self, image):
        image = np.array([[list(j) for j in i] for i in image])
        if self.img_size:
            min_dists, dists = {}, [[] for i in range(len(image))]
            for i in range(len(image)):
                for j in range(len(image[i][0])):
                    dists[i].append(distance([0, 0], [image[i][0][j], image[i][1][j]]))
                min_dists[min(dists[i])] = i

            min_dist = min(list(min_dists.keys()))
            min_index = min_dists[min_dist]
            start_point = [image[min_index][0][dists[min_index].index(min_dist)],
                           image[min_index][1][dists[min_index].index(min_dist)]]
            for i in range(len(image)):
                for j in range(len(image[i][0])):
                    image[i][0][j] = image[i][0][j] - start_point[0]
                    image[i][1][j] = image[i][1][j] - start_point[1]

            min_x, max_x, min_y, max_y = global_min_max(image)
            scaleX = ((max_x - min_x) / (self.img_size[0] - (self.offset * 2 - 1)))
            scaleY = ((max_y - min_y) / (self.img_size[1] - (self.offset * 2 - 1)))
            for i in range(len(image)):
                for j in range(len(image[i][0])):
                    image[i][0][j] = image[i][0][j] / scaleX
                    image[i][1][j] = image[i][1][j] / scaleY

        min_x, max_x, min_y, max_y = global_min_max(image)
        img = Image.new("RGB", (max_x - min_x + self.offset * 2, max_y - min_y + self.offset * 2), "white")
        draw = ImageDraw.Draw(img)

        for j in range(len(image)):
            for i in range(len(image[j][0]))[1:]:
                x, y = image[j][0][i - 1], image[j][1][i - 1]
                x_n, y_n = image[j][0][i], image[j][1][i]
                x -= min_x - self.offset;
                y -= min_y - self.offset
                x_n -= min_x - self.offset;
                y_n -= min_y - self.offset
                draw.line([(x, y), (x_n, y_n)], fill="black", width=self.dot_size)

        if self.img_size:
            return {'img': img, 'scaleX': scaleX, 'scaleY': scaleY, 'start_point': start_point}
        return {'img': img}

    def run_generator(self, val_mode=False):
        pics, targets, i, n = [], [], 0, 0
        lims = [0, self.train_portion]
        if val_mode:
            lims = [self.train_portion, None]
        length = len(self.names[lims[0]:lims[1]])
        N = length // self.chunk_size
        while True:
            for name in self.names[lims[0]:lims[1]]:
                class_name, no = name.split('_')
                target = self.classes[class_name]
                coords = self.binaries[class_name][int(no)]

                img = np.array(self.quickdraw_coords2img(coords)['img'])
                img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
                img = cv2.bitwise_not(img)
                img = cv2.resize(img, self.img_size, Image.LANCZOS)
                img = cv2.threshold(img, self.trsh, 255,
                                    cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

                if self.normed:
                    img = norm(img)

                img = img[:, :, np.newaxis]
                pics.append(img)
                targets.append(self.OHE(target))
                i += 1
                if n == N and i == (length % self.chunk_size):
                    yield (np.array(pics), np.array(targets))

                elif i == self.chunk_size:
                    out_pics, out_target = np.array(pics), np.array(targets)
                    pics, targets, i = [], [], 0
                    n += 1
                    yield (out_pics, out_target)

if __name__ == '__main__':
    print("[INFO] GPU devices:%s" % get_available_gpus())

    try:
        # recursive delete model directory
        rmtree(name)
    except:
        pass
    # recreate model directory
    os.mkdir(name)

    ################################################################################

    batch_size = 64 * G
    num_epochs = 15  # 15
    img_size = (64, 64)
    network = 'MobileNetV2'  # 'InceptionV3' or 'MobileNetV2'
    params = {
        'include_top': True,
        'weights': None,
        'input_tensor': Input(shape=img_size + (1,))  # shape=(None, 64, 64, 1) dtype=float32
    }
    reader = QDPrep(path, [], random_state=42, chunk_size=batch_size,
                    max_dataset_size=1000000, trsh=100, normed=True,
                    train_portion=0.9, k=0.05, min_points=10,
                    min_edges=3, dot_size=3, offset=5, img_size=img_size)

    ################################################################################

    num_classes = len(reader.classes)
    params['classes'] = num_classes

    if G <= 1:
        print("[INFO] training with 1 GPU...")

        # network = 'MobileNetV2', params = params
        # multi_model = get_model(network, params)
        # model_json = multi_model.to_json()

        multi_model = tf.keras.applications.MobileNetV2(
            input_shape=None,
            alpha=1.0,
            include_top=True,
            weights=None,
            input_tensor=Input(shape=img_size + (1,)),
            pooling=None,
            classes=num_classes,
            classifier_activation="softmax",
        )
        model_json = multi_model.to_json()
        with open(name + "/model.json", "w") as json_file:
            json_file.write(model_json)
    adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, decay=0.0, clipnorm=5)
    multi_model.compile(optimizer=adam, loss='categorical_crossentropy',
                        metrics=["accuracy", lambda x, y: top_k_categorical_accuracy(x, y, 5)])

    multi_model.summary()
    with open(name + '/model_summary.txt', 'w') as f:
        multi_model.summary(print_fn=lambda x: f.write(x + '\n'))

    train_steps = reader.train_portion // batch_size
    val_steps = (reader.max_dataset_size - reader.train_portion) // batch_size

    checkpoint = ModelCheckpoint(name + '/checkpoint_weights.hdf5', monitor='val_loss', verbose=1,
                                 save_best_only=True, mode='min', save_weights_only=False)
    clr = CyclicLR(base_lr=0.001, max_lr=0.006, step_size=train_steps * 2, mode='exp_range', gamma=0.99994)

    print("[INFO] training network...")

    H = multi_model.fit_generator(reader.run_generator(val_mode=False),
                                  steps_per_epoch=train_steps, epochs=num_epochs, shuffle=False, verbose=1,
                                  validation_data=reader.run_generator(val_mode=True), validation_steps=val_steps,
                                  use_multiprocessing=False, workers=1, callbacks=[checkpoint, clr])

    # Use TF to save the graph model instead of Keras save model to load it in Golang
    tf.saved_model.save(multi_model, name + "/final")

    multi_model.save_weights(name + "/final_weights.h5")
    multi_model.save(name + "/final_model.hdf5")
    pickle.dump(H.history, open(name + '/loss_history.pickle.dat', 'wb'))
    print("[INFO] Finished!")