| Current Path : /home/deltalab/PMS/recommendations/recommender-system-batch/ |
| Current File : //home/deltalab/PMS/recommendations/recommender-system-batch/app.py |
#----------------------------------------------------------------------------
# Created By: Marco Bronzini
# Company: Delta Informatica S.p.A. (DeltaLAB)
# Project: INDACO
# Task: Recommender system
# Created Date: Dic. 2022
# ---------------------------------------------------------------------------
from os.path import join
import hybrid_RS
from _library import launcher_utils
from _library.data_utils.data_loader import load_indacoData
from _library.data_utils.io_toolkit import read_settings
from _library.data_utils.mongodb_utils import write_recommendations_to_mongodb, write_recommendations_to_mongodb_legacy, simplified_SKUs
if __name__ == '__main__':
# ----------------------------------- APP SETTING ------------------------------------------------
# 0.A) Read App settings
app_settings = read_settings()
# 0.B) Visualize intro
launcher_utils.visualize_intro(app_settings)
# 0.C) Set the main attribute names
main_product_identifier = app_settings['unique_product_identifier']
type_attribute_name = 'Product Type'
# ------------------------------------ INDACO ----------------------------------------------------
# [INDACO] Read indaco products and orders
db_service,indacoProducts_df, indacoOrders_df, indacoCategories, indacoUserProfiles = load_indacoData("etl",
visualize_aggregated_territories = True)
# ---------------------------- OLD ORDERS FROM INTRENTINO --------------------------------------------
# [Old inTrentino] Load the old data of the platform inTrentino
#inTrentino_orders_df, inTrentino_products_df, inTrentino_productNames = hybrid_RS.load_data(
# main_product_identifier = main_product_identifier,
# excluded_products = app_settings['excluded_products'])
# ----------------------------------- PRE-PROCESSING ----------------------------------------------
# [PRE-PROCESSING A] Map (old SKUs with the INDACO SKUs) and simplify (keep only the first part of) the SKUs
#products_df, product_names, sku_mapping = map_indacoSKUs(indacoProducts_df, inTrentino_products_df, main_product_identifier)
products_df, product_names, sku_mapping = simplified_SKUs(indacoProducts_df, main_product_identifier)
# [PRE-PROCESSING B] Add the simplified SKUs to the orders
reversed_skuMapping = {indaco_sku: simplified_sku for simplified_sku, indaco_sku in sku_mapping.items()}
indacoOrders_df.insert(loc = 4, column = 'SKU', value = indacoOrders_df['sku'].apply(
lambda indaco_sku: reversed_skuMapping[indaco_sku]))
indacoOrders_df.rename(columns = {'sku': 'indaco_sku'}, inplace = True)
# [PRE-PROCESSING C] If selected, merge the the two list of orders (INDACO and old inTrentino)
#if app_settings['merge_old_inTrentinoOrders']:
# indacoOrders_df = launcher_utils.merge_order_dfs(indacoOrders_df, inTrentino_orders_df, sku_mapping)
# ------------------------------------- PROFILING CUSTOMERS ---------------------------------------------------
if app_settings['profile_customers']:
userProfiles = indacoUserProfiles
# Add the general profile
userProfiles[-1] = {}
print("\nUSER PROFILES:")
for user_id, user_profile in userProfiles.items():
if len(user_profile.keys()) == 0:
continue
# Map the indaco SKUs with the simplified SKUs
user_profile['unique_products'] = [reversed_skuMapping[product] for product in user_profile['unique_products']]
user_profile['shopping_baskets'] = {
transactionId: [reversed_skuMapping[product] for product in shopping_basket]
for transactionId, shopping_basket in user_profile['shopping_baskets'].items()}
# Visualize the user profiles
launcher_utils.visualize_user_profile(user_id, user_profile)
userProfiles = dict(sorted(userProfiles.items(), key = lambda dictItem: dictItem[0]))
else:
# Add the general profile
userProfiles = {-1: {}}
# --------------------------------------- INITIALIZE APP -------------------------------------------------------
# Visualize the products
launcher_utils.visualize_products(products_df, all_columns = True)
# Visualize the orders
launcher_utils.visualize_orders(indacoOrders_df, last_k_orders = 10)
# Initialize the container for the recommendation (i.e., dict)
recommendations_byProduct = {name.upper(): list() for name in sorted(product_names)}
# Initialize the container for the information of the generated recommendations
rs_engines = launcher_utils.generate_rsEngine_names(app_settings)
recommendations_info = {name: dict() for name in rs_engines}
# ------------------------------------------------------------------------------------------
# ----------------------------------- Recommend engines ------------------------------------
# ------------------------------------------------------------------------------------------
# ----------------------------------- (1) Item-based ---------------------------------------
# --------------------------------- (2) Category-based -------------------------------------
# ------------------------------------------------------------------------------------------
if app_settings['add_categoryBased_variants']:
item_product_identifiers = [main_product_identifier, type_attribute_name]
else:
item_product_identifiers = [main_product_identifier]
for product_identifier in item_product_identifiers:
print(120 * "-")
print("-" * 51, f'RS using "{product_identifier}"', "-" * 51)
print(120 * "-", "\n")
# ------------------------------------------------------------------------------------------
# --------------------------- (1A) Association rules: Simple -------------------------------
# --------------------------- (1B) Association rules: Enhanced -----------------------------
# ---------------------------- (2) Collection based RS -------------------------------------
# ------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------
# -------------------------------- (1) Association rules -----------------------------------
# ------------------------------------------------------------------------------------------
if 'simple_assRules' in app_settings['rs_priority'] or 'enhanced_assRules' in app_settings['rs_priority']:
# Select the version of co-purchase
# 1) [NEW] INDACO DATA --> 'indaco'
# 2) [OLD] INTRENTINO DATA --> 'OLDinTrentino'
coPurchase_versionName = app_settings['associationRules_RS']['coPurchase_versionName']
# Initialize the engine
assRules_recom_system = hybrid_RS.initialize_assRules_engine(products_df, app_settings,
coPurchase_versionName,
main_product_identifier,
product_identifier,app_settings['loadRemotely'])
# ------------------------------------------------------------------------------------------
# ----------------------------- A) Simple association rules --------------------------------
# ------------------------------------------------------------------------------------------
if 'simple_assRules' in app_settings['rs_priority']:
hybrid_RS.run_associationRules(assRules_recom_system, recommendations_info,
recommendations_byProduct, enhanced_version = False)
# ------------------------------------------------------------------------------------------
# -------------------------------- (1) Association rules -----------------------------------
# ------------------------------------------------------------------------------------------
# ----------------- B) Enhanced association rules [i.e., with explaination]-----------------
# ------------------------------------------------------------------------------------------
if 'enhanced_assRules' in app_settings['rs_priority']:
hybrid_RS.run_associationRules(assRules_recom_system, recommendations_info,
recommendations_byProduct, enhanced_version = True)
# ------------------------------------------------------------------------------------------
# ------------------------------- (2) Collection-based RS ---------------------------------
# ------------------------------------------------------------------------------------------
if 'collectionBased' in app_settings['rs_priority']:
collectionBased_recom_system = hybrid_RS.initialize_collectionBased_engine(products_df,
indacoOrders_df.copy(),
app_settings,
main_product_identifier,
product_identifier)
hybrid_RS.run_collectionBased(collectionBased_recom_system, recommendations_info,
recommendations_byProduct)
# ------------------------------------------------------------------------------------------
# ------------------------------- (3) Simple type-based RS --------------------------------
# ------------------------------------------------------------------------------------------
filter_source_platform = app_settings['collectionBased_RS']['filter_source_platform']
if 'typeBased' in app_settings['rs_priority']:
# Initialize the method
typeBased_recom_system = hybrid_RS.initialize_typeBased_engine(products_df, main_product_identifier,
type_attribute_name, filter_source_platform)
# Run the method
hybrid_RS.run_typeBased(typeBased_recom_system, recommendations_info, recommendations_byProduct)
# ------------------------------------------------------------------------------------------
# ------------------------------- (4) Simple vendor-based RS --------------------------------
# ------------------------------------------------------------------------------------------
if 'vendorBased' in app_settings['rs_priority']:
vendorBased_recom_system = hybrid_RS.initialize_vendorBased_engine(products_df, main_product_identifier,
filter_source_platform)
# Run the method
hybrid_RS.run_vendorBased(vendorBased_recom_system, recommendations_info, recommendations_byProduct)
# ------------------------------------------------------------------------------------------
# ------------------------- (5) Last chance, random products ------------------------------
# ------------------------------------------------------------------------------------------
recommendations_byProduct = hybrid_RS.run_randomProducts(products_df, recommendations_byProduct,
recommendations_info, indacoCategories,
filter_source_platform)
# ------------------------------------------------------------------------------------------
# -------------------------------- [END] Recommend engines ---------------------------------
# ------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------
# -------------------------------- Personalized product order ---------------------------------
# ------------------------------------------------------------------------------------------
recommendations_byUser = dict()
# Personalize the product order
for user_id, user_profile in userProfiles.items():
print("\n" + 110 * "-")
print("-" * 45, f"RECOMMENDATION FOR", "-" * 45)
print(110 * "-")
launcher_utils.visualize_user_profile(user_id, user_profile)
print(90 * "-")
# ------------------------------------------------------------------------------------------
# ------------------------------ Re-arrange recommendations --------------------------------
# ------------------------------------------------------------------------------------------
rs_priorities = list(recommendations_info.keys())
sorted_recommendations_byProduct = launcher_utils.rearrange_recommendations(recommendations_byProduct.copy(),
products_df, app_settings,
rs_priorities, user_profile,
main_product_identifier)
# Save the personalized order
recommendations_byUser[user_id] = sorted_recommendations_byProduct
# ------------------------------------------------------------------------------------------
# ------------------------------- Visualize recommendations --------------------------------
# ------------------------------------------------------------------------------------------
#lancher_utils.visualize_recommendations(recommendations_byProduct)
# ------------------------------------------------------------------------------------------
# -------------------------------- Compute Item coverage -----------------------------------
# ------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------------
# --------------------- Visualize item coverage of the RS methods -------------------------
# ------------------------------------------------------------------------------------------
info_df = launcher_utils.compute_methodsCoverage(sorted_recommendations_byProduct, recommendations_info)
# ------------------------------------------------------------------------------------------
# ------------------------------ Save recommendations (JSON) -------------------------------
# ------------------------------------------------------------------------------------------
saving_folder = launcher_utils.save_recommendations(sorted_recommendations_byProduct, info_df,
app_settings,
base_saving_folder = 'recommendations',
user_id = user_id,
base_file_name = 'raccomandazioni_inTrentino')
if main_product_identifier == 'SKU':
relevant_columns = ['Product Type', 'Title', 'Vendor', 'Seller', 'SKU', 'production_areas','inTrentino_source']
products_bySKU = products_df[relevant_columns].sort_values(by = ['Product Type', 'Title', 'SKU'])
products_bySKU.to_excel(join(saving_folder, 'SKU_prodotti.xlsx'),
index = False, sheet_name = "Products", freeze_panes = (1, 1))
# Save all the linked products
recommendations_byUser[-2] = {product_sku: [product['item_sku'] for product in linked_products]
for product_sku, linked_products in recommendations_byProduct.items()}
# ------------------------------------------------------------------------------------------
# ------------------------------ Saving to MongoDB -----------------------------------------
# ------------------------------------------------------------------------------------------
if app_settings['write_on_mongoDB']:
write_recommendations_to_mongodb(db_service,recommendations_byUser, sku_mapping)
# ------ LEGACY SAVING METHOD ------------
# Collection: "Products": add the attributes "linkedProducts" & "recomExplaination"
print('\n[LEGACY] Saving the recommendations in "Products"\n')
generalUserRecommendations = recommendations_byUser[-1]
write_recommendations_to_mongodb_legacy(generalUserRecommendations, sku_mapping, verbose = False)
print("\n" + 90 * "-")
print("-" * 42,"END", "-" * 43)
print(90 * "-")