Similar Product Recommender system using Deep Learning for an online e-commerce store
A tutorial on building a recommender that will allow users to select a specific type of shirt and search for similar pattern of shirts from the inventory
- Import libraries required for file operations
- Data preparation
- Model training
- Inference pipeline
- Finding top 10 similar shirts
- Model persistence
import os
import pickle
from glob import glob
# import basic numerical libraries
import numpy as np
import pandas as pd
# import keras libraries for image recognition
from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing import image as kimage
# download and unzip shirts folder from the directory
!wget https://raw.githubusercontent.com/sparsh-ai/rec-data-public/master/shirts.zip
!unzip shirts.zip
shirts_dict = dict()
for shirt in glob('shirts/*.jpg'): # load all shirts
img = kimage.load_img(shirt, target_size=(224, 224)) # VGG accepts images in 224 X 224 pixels
img = preprocess_input(np.expand_dims(kimage.img_to_array(img), axis=0)) # so some preprocessing
id = shirt.split('/')[-1].split('.')[0]
shirts_dict[id] = img # map image & shirt id
model = VGG16(include_top=False, weights='imagenet')
shirts_matrix = np.zeros([no_of_shirts, 25088]) # initialize the matrix with zeros
for i, (id, img) in enumerate(shirts_dict.items()):
shirts_matrix[i, :] = model.predict(img).ravel() # flatten the matrix
model.summary()
matrix_id_to_shirt_id = dict()
shirt_id_to_matrix_id = dict()
for i, (id, img) in enumerate(shirts_dict.items()):
matrix_id_to_shirt_id[i] = id
shirt_id_to_matrix_id[id] = i
from IPython.display import Image
Image('shirts/1015.jpg')
import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
images = []
for shirt in closest_shirts:
shirt = 'shirts/'+shirt+'.jpg'
for img_path in glob.glob(shirt):
images.append(mpimg.imread(img_path))
plt.figure(figsize=(20,10))
columns = 5
for i, image in enumerate(images):
plt.subplot(len(images) / columns + 1, columns, i + 1)
plt.imshow(image)
from sklearn.externals import joblib
joblib.dump(similarity, 'similarity.pkl')
joblib.dump(shirt_id_to_matrix_id, 'shirt_id_to_matrix_id.pkl')
joblib.dump(matrix_id_to_shirt_id, 'matrix_id_to_shirt_id.pkl')
loaded_model = joblib.load('similarity.pkl')
closest_ids = np.argsort(loaded_model[target_id, :])[::-1][0:10]
closest_shirts = [matrix_id_to_shirt_id[matrix_id] for matrix_id in closest_ids]
closest_shirts