| Current Path : /home/deltalab/PMS/recommendations/recommender-system-batch/ |
| Current File : //home/deltalab/PMS/recommendations/recommender-system-batch/hybrid_RS.py |
from datetime import datetime
from os import path
import numpy as np
from numpy import timedelta64
from _library.data_utils import data_loader
from components.collectionBased_RS import collectionBased_RS
from components.coPurchase_RS import associationRules_RS
from components.semiRandom_RS import semiRandom_RS
from components.typeBased_RS import typeBased_RS
from components.vendorBased_RS import vendorBased_RS
# "enhanced_assRules", "collectionBased", "simple_assRules", "vendorBased", "typeBased"
def load_data(relevant_columns = ['Vendor', 'SKU', 'Title', 'Product Type', 'Type id', 'Linked regions'],
main_product_identifier = 'SKU', excluded_products = [], debug = False):
print("\n" + 120 * "-")
print("-" * 26, "Loading the old inTrentino data (i.e., orders, platform products)", "-" * 27)
print(120 * "-", "\n")
# Load the orders
orders_df = data_loader.load_orders()
#orders_df = orders_df if not debug else orders_df.sample(frac = 0.01, random_state = 101)
# 0) Load all the products of inTrentino
products_df = data_loader.generate_platformData(orders_df, col_names = relevant_columns,
inTrentino_flag = True,
excluded_products = excluded_products)
print(f"PRODUCT ATTRIBUTES ({len(products_df.columns)}):", ', '.join(products_df.columns), '\n')
# 0) ALTERNATIVE: load the pre-computed platform products
# products_df, product_names = data_loader.load_inTrentino_products(app_settings['excluded_products'])
# Extract the product names
product_names = products_df[main_product_identifier].tolist()
return orders_df, products_df, product_names
def initialize_assRules_engine(products_df, app_settings, coPurchase_versionName, main_product_identifier, product_identifier,load_remotely):
# [RS_1A] Initialization
assRules_recom_system = associationRules_RS(products_df, coPurchase_versionName, product_identifier,load_remotely)
assRules_recom_system.set_params(
unique_product_identifier = main_product_identifier,
excluded_products = app_settings['excluded_products'],
excluded_link_types = app_settings["excluded_link_types"],
force_perfect_match = app_settings['associationRules_RS']['force_perfect_match'],
hide_raw_scores = app_settings['associationRules_RS']['hide_raw_scores'],
filter_source_platform = app_settings['associationRules_RS']['filter_source_platform'],
verbose = app_settings['associationRules_RS']['verbose']
)
return assRules_recom_system
def run_associationRules(assRules_recom_system, recommendations_info, recommendations_byProduct, enhanced_version = False):
if enhanced_version:
message = 'Enhanced with collections'
else:
message = 'Simple'
print(120 * "-")
print("-" * (45 - len(message)), f"Assocation rules: {message}", "-" * (45 - len(message)))
print(120 * "-", "\n")
start_time = datetime.now()
# [RS_1A] Generate recommendation for all items
recommendations_assRules = assRules_recom_system.generate_assRulesBased_recommendations(
flag_enhanced_assRules = enhanced_version)
rs_codeName = assRules_recom_system.rs_name
rs_category_based = assRules_recom_system.category_based
# Compute computational time
raw_duration = datetime.now() - start_time
computational_time = timedelta64(raw_duration, 's' if raw_duration.total_seconds() < 60 else 'm')
print(f'\nDuration:', computational_time)
# [RS_1A] Add its recommendations to the general file
recommendations_info[rs_codeName]['duration'] = computational_time
recommendations_info[rs_codeName]['products'] = list()
for product_name, recommended_items in recommendations_assRules.items():
if len(recommended_items) > 0:
# Avoid adding the recommended item if
# a) is categegory based
# b) the reference product has already some recommendations
if rs_category_based and len(recommendations_byProduct[product_name]) > 1:
continue
recommendations_byProduct[product_name.upper()].extend(recommended_items)
recommendations_info[rs_codeName]['products'].append(product_name)
def initialize_collectionBased_engine(products_df, orders_df, app_settings, main_product_identifier, product_identifier):
# [RS_2] Import collection names grouped by type
# collection_names = data_loader.load_groupedCollectionNames()
file_path = path.join('_library', 'INDACO_collectionCodes.json')
collection_names, _ = data_loader.load_collectionTypes(file_path, verbose = True)
# [RS_2] Initialize
collectionBased_recom_system = collectionBased_RS(
orders_df = orders_df,
platform_products_df = products_df,
grouped_collection_names = collection_names,
unique_product_identifier = main_product_identifier,
product_identifier = product_identifier)
collectionBased_recom_system.set_params(
excluded_link_types = app_settings["excluded_link_types"],
force_one_linked_type = app_settings['collectionBased_RS']["force_one_linked_type"],
bundle_dimension = app_settings['collectionBased_RS']["bundle_dimension"],
filter_source_platform = app_settings['collectionBased_RS']["filter_source_platform"],
drop_similar_categories = app_settings["drop_similar_categories"],
merge_collection_type = app_settings['collectionBased_RS']["merge_collection_type"],
force_single_item = app_settings['collectionBased_RS']["force_single_item"],
verbose = app_settings['collectionBased_RS']["verbose"]
)
return collectionBased_recom_system
def run_collectionBased(collectionBased_recom_system, recommendations_info, recommendations_byProduct):
print(120 * "-")
print("-" * 51, "Collection based", "-" * 51)
print(120 * "-", "\n")
# [RS_2] Generate recommendation for all items
start_time = datetime.now()
recommendations_collectionBased = collectionBased_recom_system.generate_collectionBased_recommendations()
# Compute computational time
raw_duration = datetime.now() - start_time
computational_time = timedelta64(raw_duration, 's' if raw_duration.total_seconds() < 60 else 'm')
print(f'\nDuration:', computational_time)
rs_codeName = collectionBased_recom_system.rs_name
rs_category_based = collectionBased_recom_system.category_based
# [RS_2] Add its recommendations to the general file
recommendations_info[rs_codeName]['duration'] = computational_time
recommendations_info[rs_codeName]['products'] = list()
for product_name, grouped_recommendations in recommendations_collectionBased.items():
for recommended_items in grouped_recommendations.values():
if len(recommended_items) > 0:
# Avoid adding the recommended item if
# a) is categegory based
# b) the reference product has already some recommendations
if rs_category_based and len(recommendations_byProduct[product_name]) > 1:
continue
recommendations_byProduct[product_name.upper()].extend(recommended_items)
recommendations_info[rs_codeName]['products'].append(product_name)
def initialize_typeBased_engine(products_df, product_identifier, type_attribute_name, filterSamePlatform):
# [RS_1A] Initialization
typeBased_recom_system = typeBased_RS(products_df, product_identifier, type_attribute_name, filterSamePlatform)
return typeBased_recom_system
def run_typeBased(typeBased_recom_system, recommendations_info, recommendations_byProduct):
print(120 * "-")
print("-" * 51, "Type based", "-" * 51)
print(120 * "-", "\n")
# [RS_2] Generate recommendation for all items
start_time = datetime.now()
recommendations_typeBased = typeBased_recom_system.generate_typeBased_recommendations()
# Compute computational time
raw_duration = datetime.now() - start_time
computational_time = timedelta64(raw_duration, 's' if raw_duration.total_seconds() < 60 else 'm')
print(f'\nDuration:', computational_time)
rs_codeName = typeBased_recom_system.rs_name
# [RS_2] Add its recommendations to the general file
recommendations_info[rs_codeName]['duration'] = computational_time
recommendations_info[rs_codeName]['products'] = list()
for product_name, recommended_items in recommendations_typeBased.items():
if len(recommended_items) > 0:
# Avoid adding the recommended item if the reference product has already some recommendations
#recommended_skus = np.unique([recom['item_sku'] for recom in recommendations_byProduct[product_name]])
#if len(recommended_skus) > 8:
# continue
recommendations_byProduct[product_name.upper()].extend(recommended_items)
recommendations_info[rs_codeName]['products'].append(product_name)
def initialize_vendorBased_engine(products_df, product_identifier, filterSamePlatform):
# [RS_1A] Initialization
vendorBased_recom_system = vendorBased_RS(products_df, product_identifier, filterSamePlatform)
return vendorBased_recom_system
def run_vendorBased(vendorBased_recom_system, recommendations_info, recommendations_byProduct):
print(120 * "-")
print("-" * 51, "Vendor based", "-" * 51)
print(120 * "-", "\n")
rs_codeName = vendorBased_recom_system.rs_name
# [RS_2] Generate recommendation for all items
start_time = datetime.now()
recommendations_vendorBased = vendorBased_recom_system.generate_vendorBased_recommendations()
# Compute computational time
raw_duration = datetime.now() - start_time
computational_time = timedelta64(raw_duration, 's' if raw_duration.total_seconds() < 60 else 'm')
print(f'\nDuration:', computational_time)
# [RS_2] Add its recommendations to the general file
recommendations_info[rs_codeName]['duration'] = computational_time
recommendations_info[rs_codeName]['products'] = list()
for product_name, recommended_items in recommendations_vendorBased.items():
if len(recommended_items) > 0:
# Avoid adding the recommended item if the reference product has already some recommendations
#recommended_skus = np.unique([recom['item_sku'] for recom in recommendations_byProduct[product_name]])
#if len(recommended_skus) > 8:
# continue
recommendations_byProduct[product_name.upper()].extend(recommended_items)
recommendations_info[rs_codeName]['products'].append(product_name)
def run_randomProducts(allProducts_df, recommendations_byProduct, recommendations_info,
indacoCategories, filter_source_platform, num_target_item = 4):
print(120 * "-")
print("-" * 51, "Random products", "-" * 51)
print(120 * "-", "\n")
# Initialize the approach
semiRandomBased_recom_system = semiRandom_RS(allProducts_df, indacoCategories, filter_source_platform)
rs_codeName = semiRandomBased_recom_system.rs_codeName
# Initialize
start_time = datetime.now()
recommendations_info[rs_codeName] = {'products': list()}
# Generate the connected products if necessary
for product_sku, recommended_items in recommendations_byProduct.items():
# Retrive the recommendations from the previous methods
previous_recommendedSKUs = np.unique([recom['item_sku'].upper() for recom in recommended_items])
num_missing_products = num_target_item - len(previous_recommendedSKUs)
# Trigger the approach if the item has equal (add an extra item) or fewer linked products than the target threshold
if num_missing_products >= 0:
# Generate the connected products semi-randomly
random_items = semiRandomBased_recom_system.generate_connectedProducts(
product_sku, previous_recommendedSKUs, num_randomItems = num_missing_products + 1)
# Attach the new items
recommendations_byProduct[product_sku.upper()].extend(random_items)
recommendations_info[rs_codeName]['products'].append(product_sku)
# Compute the elaped time
raw_duration = datetime.now() - start_time
recommendations_info[rs_codeName]['duration'] = timedelta64(raw_duration, 's')
print("Duration:", recommendations_info[rs_codeName]['duration'])
return recommendations_byProduct