Your IP : 216.73.217.13


Current Path : /home/deltalab/PMS/recommendations/recommender-system-batch/
Upload File :
Current File : //home/deltalab/PMS/recommendations/recommender-system-batch/app.py

#----------------------------------------------------------------------------
# Created By: Marco Bronzini
# Company: Delta Informatica S.p.A. (DeltaLAB)
# Project: INDACO
# Task: Recommender system
# Created Date: Dic. 2022
# ---------------------------------------------------------------------------

from os.path import join

import hybrid_RS
from _library import launcher_utils
from _library.data_utils.data_loader import load_indacoData
from _library.data_utils.io_toolkit import read_settings
from _library.data_utils.mongodb_utils import write_recommendations_to_mongodb, write_recommendations_to_mongodb_legacy, simplified_SKUs

if __name__ == '__main__':
    
    # ----------------------------------- APP SETTING ------------------------------------------------
    
    # 0.A) Read App settings
    app_settings = read_settings()
    
    # 0.B) Visualize intro
    launcher_utils.visualize_intro(app_settings)
    
    # 0.C) Set the main attribute names
    main_product_identifier = app_settings['unique_product_identifier']
    type_attribute_name = 'Product Type'
    
    # ------------------------------------ INDACO ----------------------------------------------------
    
    # [INDACO] Read indaco products and orders
    db_service,indacoProducts_df, indacoOrders_df, indacoCategories, indacoUserProfiles = load_indacoData("etl",
        visualize_aggregated_territories = True)
    
    # ---------------------------- OLD ORDERS FROM INTRENTINO --------------------------------------------
     
    # [Old inTrentino] Load the old data of the platform inTrentino
    #inTrentino_orders_df, inTrentino_products_df, inTrentino_productNames = hybrid_RS.load_data(
    #    main_product_identifier = main_product_identifier, 
    #    excluded_products = app_settings['excluded_products'])
    
    # ----------------------------------- PRE-PROCESSING ----------------------------------------------
    
    # [PRE-PROCESSING A] Map (old SKUs with the INDACO SKUs) and simplify (keep only the first part of) the SKUs 
    #products_df, product_names, sku_mapping = map_indacoSKUs(indacoProducts_df, inTrentino_products_df, main_product_identifier)
    products_df, product_names, sku_mapping = simplified_SKUs(indacoProducts_df, main_product_identifier)
    
    # [PRE-PROCESSING B] Add the simplified SKUs to the orders
    reversed_skuMapping = {indaco_sku: simplified_sku for simplified_sku, indaco_sku in sku_mapping.items()}
    indacoOrders_df.insert(loc = 4, column = 'SKU', value = indacoOrders_df['sku'].apply(
        lambda indaco_sku: reversed_skuMapping[indaco_sku]))
    indacoOrders_df.rename(columns = {'sku': 'indaco_sku'}, inplace = True)
    
    # [PRE-PROCESSING C] If selected, merge the the two list of orders (INDACO and old inTrentino)
    #if app_settings['merge_old_inTrentinoOrders']:
    #    indacoOrders_df = launcher_utils.merge_order_dfs(indacoOrders_df, inTrentino_orders_df, sku_mapping)
        
    # ------------------------------------- PROFILING CUSTOMERS ---------------------------------------------------    
    if app_settings['profile_customers']:
        
        userProfiles = indacoUserProfiles
        
        # Add the general profile
        userProfiles[-1] = {}
        
        print("\nUSER PROFILES:")
        for user_id, user_profile in userProfiles.items():
            
            if len(user_profile.keys()) == 0:
                continue
            
            # Map the indaco SKUs with the simplified SKUs
            user_profile['unique_products'] = [reversed_skuMapping[product] for product in user_profile['unique_products']]
            user_profile['shopping_baskets'] = {
                transactionId: [reversed_skuMapping[product] for product in shopping_basket] 
                for transactionId, shopping_basket in user_profile['shopping_baskets'].items()}                
            
            # Visualize the user profiles
            launcher_utils.visualize_user_profile(user_id, user_profile)
        userProfiles = dict(sorted(userProfiles.items(), key = lambda dictItem: dictItem[0]))
    else:
        
        # Add the general profile
        userProfiles = {-1: {}}
    # --------------------------------------- INITIALIZE APP -------------------------------------------------------
    
    # Visualize the products
    launcher_utils.visualize_products(products_df, all_columns = True)
   
    # Visualize the orders
    launcher_utils.visualize_orders(indacoOrders_df, last_k_orders = 10)

    # Initialize the container for the recommendation (i.e., dict)
    recommendations_byProduct = {name.upper(): list() for name in sorted(product_names)}
    
    # Initialize the container for the information of the generated recommendations
    rs_engines = launcher_utils.generate_rsEngine_names(app_settings)
    recommendations_info = {name: dict() for name in rs_engines}
    
    # ------------------------------------------------------------------------------------------
    # ----------------------------------- Recommend engines ------------------------------------
    # ------------------------------------------------------------------------------------------
    # ----------------------------------- (1) Item-based ---------------------------------------
    # --------------------------------- (2) Category-based -------------------------------------
    # ------------------------------------------------------------------------------------------
    if app_settings['add_categoryBased_variants']:
        item_product_identifiers = [main_product_identifier, type_attribute_name]
    else:
        item_product_identifiers = [main_product_identifier]

    for product_identifier in item_product_identifiers:
        print(120 * "-")
        print("-" * 51, f'RS using "{product_identifier}"', "-" * 51)
        print(120 * "-", "\n")

        # ------------------------------------------------------------------------------------------
        # --------------------------- (1A) Association rules: Simple -------------------------------
        # --------------------------- (1B) Association rules: Enhanced -----------------------------
        # ---------------------------- (2) Collection based RS -------------------------------------
        # ------------------------------------------------------------------------------------------
        
        # ------------------------------------------------------------------------------------------
        # -------------------------------- (1) Association rules -----------------------------------
        # ------------------------------------------------------------------------------------------
        if 'simple_assRules' in app_settings['rs_priority'] or 'enhanced_assRules' in app_settings['rs_priority']:
            # Select the version of co-purchase 
            # 1) [NEW] INDACO DATA     --> 'indaco' 
            # 2) [OLD] INTRENTINO DATA --> 'OLDinTrentino'
            coPurchase_versionName = app_settings['associationRules_RS']['coPurchase_versionName']
            
            # Initialize the engine
            assRules_recom_system = hybrid_RS.initialize_assRules_engine(products_df, app_settings, 
                                                                            coPurchase_versionName,
                                                                            main_product_identifier,
                                                                            product_identifier,app_settings['loadRemotely'])
            
        # ------------------------------------------------------------------------------------------     
        # ----------------------------- A) Simple association rules --------------------------------
        # ------------------------------------------------------------------------------------------
        if 'simple_assRules' in app_settings['rs_priority']:
            hybrid_RS.run_associationRules(assRules_recom_system, recommendations_info, 
                                        recommendations_byProduct, enhanced_version = False)
        
        # ------------------------------------------------------------------------------------------
        # -------------------------------- (1) Association rules -----------------------------------
        # ------------------------------------------------------------------------------------------
        # ----------------- B) Enhanced association rules [i.e., with explaination]-----------------
        # ------------------------------------------------------------------------------------------    
        if 'enhanced_assRules' in app_settings['rs_priority']:
            hybrid_RS.run_associationRules(assRules_recom_system, recommendations_info, 
                                                recommendations_byProduct, enhanced_version = True)
            
        # ------------------------------------------------------------------------------------------
        # ------------------------------- (2) Collection-based RS  ---------------------------------
        # ------------------------------------------------------------------------------------------
        if 'collectionBased' in app_settings['rs_priority']:
            collectionBased_recom_system = hybrid_RS.initialize_collectionBased_engine(products_df, 
                                                                                    indacoOrders_df.copy(), 
                                                                                    app_settings,
                                                                                    main_product_identifier,
                                                                                    product_identifier)
            hybrid_RS.run_collectionBased(collectionBased_recom_system, recommendations_info, 
                                        recommendations_byProduct)
    
    # ------------------------------------------------------------------------------------------
    # ------------------------------- (3) Simple type-based RS  --------------------------------
    # ------------------------------------------------------------------------------------------
    filter_source_platform = app_settings['collectionBased_RS']['filter_source_platform']
    if 'typeBased' in app_settings['rs_priority']:
        
        # Initialize the method
        typeBased_recom_system = hybrid_RS.initialize_typeBased_engine(products_df, main_product_identifier,
                                                                    type_attribute_name, filter_source_platform)
        
        # Run the method
        hybrid_RS.run_typeBased(typeBased_recom_system, recommendations_info, recommendations_byProduct)
    
    # ------------------------------------------------------------------------------------------
    # ------------------------------- (4) Simple vendor-based RS  --------------------------------
    # ------------------------------------------------------------------------------------------
    if 'vendorBased' in app_settings['rs_priority']:
        vendorBased_recom_system = hybrid_RS.initialize_vendorBased_engine(products_df, main_product_identifier,
                                                                        filter_source_platform)
        
        # Run the method
        hybrid_RS.run_vendorBased(vendorBased_recom_system, recommendations_info, recommendations_byProduct)
    
    # ------------------------------------------------------------------------------------------
    # ------------------------- (5) Last chance, random products  ------------------------------
    # ------------------------------------------------------------------------------------------
    recommendations_byProduct = hybrid_RS.run_randomProducts(products_df, recommendations_byProduct, 
                                                            recommendations_info, indacoCategories,
                                                            filter_source_platform)

    # ------------------------------------------------------------------------------------------
    # -------------------------------- [END] Recommend engines ---------------------------------
    # ------------------------------------------------------------------------------------------

    # ------------------------------------------------------------------------------------------
    # -------------------------------- Personalized product order ---------------------------------
    # ------------------------------------------------------------------------------------------
    recommendations_byUser = dict()
    
    # Personalize the product order
    for user_id, user_profile in userProfiles.items():
        
        print("\n" + 110 * "-")
        print("-" * 45, f"RECOMMENDATION FOR", "-" * 45)
        print(110 * "-")
        launcher_utils.visualize_user_profile(user_id, user_profile)
        print(90 * "-")
    
        # ------------------------------------------------------------------------------------------
        # ------------------------------ Re-arrange recommendations --------------------------------
        # ------------------------------------------------------------------------------------------
        rs_priorities = list(recommendations_info.keys())
        sorted_recommendations_byProduct = launcher_utils.rearrange_recommendations(recommendations_byProduct.copy(), 
                                                                                    products_df, app_settings, 
                                                                                    rs_priorities, user_profile,
                                                                                    main_product_identifier)

        # Save the personalized order
        recommendations_byUser[user_id] = sorted_recommendations_byProduct
        
        # ------------------------------------------------------------------------------------------
        # ------------------------------- Visualize recommendations --------------------------------
        # ------------------------------------------------------------------------------------------
        #lancher_utils.visualize_recommendations(recommendations_byProduct)
        
        # ------------------------------------------------------------------------------------------
        # -------------------------------- Compute Item coverage -----------------------------------
        # ------------------------------------------------------------------------------------------
        # ------------------------------------------------------------------------------------------
        # --------------------- Visualize item coverage of the RS methods  -------------------------
        # ------------------------------------------------------------------------------------------
        info_df = launcher_utils.compute_methodsCoverage(sorted_recommendations_byProduct, recommendations_info)

        # ------------------------------------------------------------------------------------------
        # ------------------------------ Save recommendations (JSON) -------------------------------
        # ------------------------------------------------------------------------------------------
        saving_folder = launcher_utils.save_recommendations(sorted_recommendations_byProduct, info_df, 
                                                            app_settings,
                                                            base_saving_folder =  'recommendations', 
                                                            user_id = user_id, 
                                                            base_file_name = 'raccomandazioni_inTrentino')
        
        if main_product_identifier == 'SKU':
            relevant_columns = ['Product Type', 'Title', 'Vendor', 'Seller', 'SKU', 'production_areas','inTrentino_source']
            products_bySKU = products_df[relevant_columns].sort_values(by = ['Product Type', 'Title', 'SKU'])
            products_bySKU.to_excel(join(saving_folder, 'SKU_prodotti.xlsx'), 
                                    index = False, sheet_name = "Products", freeze_panes = (1, 1))
        
    # Save all the linked products
    recommendations_byUser[-2] = {product_sku: [product['item_sku'] for product in linked_products] 
                                  for product_sku, linked_products in recommendations_byProduct.items()}
    
    # ------------------------------------------------------------------------------------------
    # ------------------------------ Saving to MongoDB -----------------------------------------
    # ------------------------------------------------------------------------------------------    
    if app_settings['write_on_mongoDB']:
        write_recommendations_to_mongodb(db_service,recommendations_byUser, sku_mapping)
        
        # ------ LEGACY SAVING METHOD ------------
        # Collection: "Products": add the attributes "linkedProducts" & "recomExplaination"
        print('\n[LEGACY] Saving the recommendations in "Products"\n')
        generalUserRecommendations = recommendations_byUser[-1]
        write_recommendations_to_mongodb_legacy(generalUserRecommendations, sku_mapping, verbose = False)
    
    print("\n" + 90 * "-")
    print("-" * 42,"END", "-" * 43)
    print(90 * "-")