Your IP : 216.73.217.13


Current Path : /home/deltalab/PMS/recommendations/recommender-system-batch/
Upload File :
Current File : //home/deltalab/PMS/recommendations/recommender-system-batch/hybrid_RS.py

from datetime import datetime
from os import path

import numpy as np
from numpy import timedelta64

from _library.data_utils import data_loader
from components.collectionBased_RS import collectionBased_RS
from components.coPurchase_RS import associationRules_RS
from components.semiRandom_RS import semiRandom_RS
from components.typeBased_RS import typeBased_RS
from components.vendorBased_RS import vendorBased_RS

# "enhanced_assRules", "collectionBased", "simple_assRules", "vendorBased", "typeBased"

def load_data(relevant_columns = ['Vendor', 'SKU', 'Title', 'Product Type', 'Type id', 'Linked regions'], 
              main_product_identifier = 'SKU', excluded_products = [], debug = False): 
        
    print("\n" + 120 * "-") 
    print("-" * 26, "Loading the old inTrentino data (i.e., orders, platform products)", "-" * 27)
    print(120 * "-", "\n")
    
    # Load the orders
    orders_df = data_loader.load_orders()
    #orders_df = orders_df if not debug else orders_df.sample(frac = 0.01, random_state = 101)
    
    # 0) Load all the products of inTrentino
    products_df = data_loader.generate_platformData(orders_df, col_names = relevant_columns,
                                                    inTrentino_flag = True, 
                                                    excluded_products = excluded_products)
    print(f"PRODUCT ATTRIBUTES ({len(products_df.columns)}):", ', '.join(products_df.columns), '\n')
        
    # 0) ALTERNATIVE: load the pre-computed platform products
    # products_df, product_names = data_loader.load_inTrentino_products(app_settings['excluded_products'])
     
    # Extract the product names    
    product_names = products_df[main_product_identifier].tolist()
    
    return orders_df, products_df, product_names

def initialize_assRules_engine(products_df, app_settings, coPurchase_versionName, main_product_identifier, product_identifier,load_remotely):
    
    # [RS_1A] Initialization
    assRules_recom_system = associationRules_RS(products_df, coPurchase_versionName, product_identifier,load_remotely)
    assRules_recom_system.set_params(
        unique_product_identifier = main_product_identifier,
        excluded_products = app_settings['excluded_products'],
        excluded_link_types = app_settings["excluded_link_types"],
        force_perfect_match = app_settings['associationRules_RS']['force_perfect_match'],
        hide_raw_scores = app_settings['associationRules_RS']['hide_raw_scores'],
        filter_source_platform = app_settings['associationRules_RS']['filter_source_platform'], 
        verbose = app_settings['associationRules_RS']['verbose']
    )
    
    return assRules_recom_system
    
def run_associationRules(assRules_recom_system, recommendations_info, recommendations_byProduct, enhanced_version = False):
    if enhanced_version:
        message = 'Enhanced with collections'
    else:
        message = 'Simple'
    
    print(120 * "-")
    print("-" * (45 - len(message)), f"Assocation rules: {message}", "-" * (45 - len(message)))
    print(120 * "-", "\n")
    
    start_time = datetime.now()
    
    # [RS_1A] Generate recommendation for all items
    recommendations_assRules = assRules_recom_system.generate_assRulesBased_recommendations(
        flag_enhanced_assRules = enhanced_version)
    
    rs_codeName = assRules_recom_system.rs_name
    rs_category_based = assRules_recom_system.category_based
    
    # Compute computational time
    raw_duration = datetime.now() - start_time
    computational_time = timedelta64(raw_duration, 's' if raw_duration.total_seconds() < 60 else 'm')
    print(f'\nDuration:', computational_time)
    
    # [RS_1A] Add its recommendations to the general file
    recommendations_info[rs_codeName]['duration'] = computational_time
    recommendations_info[rs_codeName]['products'] = list()  
    for product_name, recommended_items in recommendations_assRules.items():
        if len(recommended_items) > 0:
            
            # Avoid adding the recommended item if 
            # a) is categegory based 
            # b) the reference product has already some recommendations
            if rs_category_based and len(recommendations_byProduct[product_name]) > 1:
                continue
            
            recommendations_byProduct[product_name.upper()].extend(recommended_items)
            recommendations_info[rs_codeName]['products'].append(product_name)

def initialize_collectionBased_engine(products_df, orders_df, app_settings, main_product_identifier, product_identifier):
        
    # [RS_2] Import collection names grouped by type
    # collection_names = data_loader.load_groupedCollectionNames()
    file_path = path.join('_library', 'INDACO_collectionCodes.json')
    collection_names, _ = data_loader.load_collectionTypes(file_path, verbose = True)

    # [RS_2] Initialize
    collectionBased_recom_system = collectionBased_RS(
        orders_df = orders_df, 
        platform_products_df = products_df, 
        grouped_collection_names = collection_names, 
        unique_product_identifier = main_product_identifier, 
        product_identifier = product_identifier)

    collectionBased_recom_system.set_params(
        excluded_link_types = app_settings["excluded_link_types"], 
        force_one_linked_type = app_settings['collectionBased_RS']["force_one_linked_type"],
        bundle_dimension = app_settings['collectionBased_RS']["bundle_dimension"], 
        filter_source_platform = app_settings['collectionBased_RS']["filter_source_platform"],
        drop_similar_categories = app_settings["drop_similar_categories"], 
        merge_collection_type = app_settings['collectionBased_RS']["merge_collection_type"], 
        force_single_item = app_settings['collectionBased_RS']["force_single_item"], 
        verbose = app_settings['collectionBased_RS']["verbose"]
    )
    
    return collectionBased_recom_system

def run_collectionBased(collectionBased_recom_system, recommendations_info, recommendations_byProduct):
    print(120 * "-")
    print("-" * 51, "Collection based", "-" * 51)
    print(120 * "-", "\n")

    # [RS_2] Generate recommendation for all items
    start_time = datetime.now()
    recommendations_collectionBased = collectionBased_recom_system.generate_collectionBased_recommendations()
    
    # Compute computational time
    raw_duration = datetime.now() - start_time
    computational_time = timedelta64(raw_duration, 's' if raw_duration.total_seconds() < 60 else 'm')
    print(f'\nDuration:', computational_time)
    
    rs_codeName = collectionBased_recom_system.rs_name
    rs_category_based = collectionBased_recom_system.category_based
    
    # [RS_2] Add its recommendations to the general file
    recommendations_info[rs_codeName]['duration'] = computational_time                   
    recommendations_info[rs_codeName]['products'] = list()  
    for product_name, grouped_recommendations in recommendations_collectionBased.items():
        for recommended_items in grouped_recommendations.values():
            if len(recommended_items) > 0:
                
                # Avoid adding the recommended item if 
                # a) is categegory based 
                # b) the reference product has already some recommendations
                if rs_category_based and len(recommendations_byProduct[product_name]) > 1:
                    continue
            
                recommendations_byProduct[product_name.upper()].extend(recommended_items)
                recommendations_info[rs_codeName]['products'].append(product_name)  
                
def initialize_typeBased_engine(products_df, product_identifier, type_attribute_name, filterSamePlatform):
    
    # [RS_1A] Initialization
    typeBased_recom_system = typeBased_RS(products_df, product_identifier, type_attribute_name, filterSamePlatform) 
    
    return typeBased_recom_system
    
def run_typeBased(typeBased_recom_system, recommendations_info, recommendations_byProduct):
    print(120 * "-")
    print("-" * 51, "Type based", "-" * 51)
    print(120 * "-", "\n")

    # [RS_2] Generate recommendation for all items
    start_time = datetime.now()
    recommendations_typeBased = typeBased_recom_system.generate_typeBased_recommendations()
    
    # Compute computational time
    raw_duration = datetime.now() - start_time
    computational_time = timedelta64(raw_duration, 's' if raw_duration.total_seconds() < 60 else 'm')
    print(f'\nDuration:', computational_time)
    
    rs_codeName = typeBased_recom_system.rs_name
    
    # [RS_2] Add its recommendations to the general file
    recommendations_info[rs_codeName]['duration'] = computational_time                   
    recommendations_info[rs_codeName]['products'] = list()  
    for product_name, recommended_items in recommendations_typeBased.items():
        if len(recommended_items) > 0:

            # Avoid adding the recommended item if the reference product has already some recommendations
            #recommended_skus = np.unique([recom['item_sku'] for recom in recommendations_byProduct[product_name]]) 
            #if len(recommended_skus) > 8:
            #    continue
            
            recommendations_byProduct[product_name.upper()].extend(recommended_items)
            recommendations_info[rs_codeName]['products'].append(product_name)
            
def initialize_vendorBased_engine(products_df, product_identifier, filterSamePlatform):
    
    # [RS_1A] Initialization
    vendorBased_recom_system = vendorBased_RS(products_df, product_identifier, filterSamePlatform) 
    
    return vendorBased_recom_system
    
def run_vendorBased(vendorBased_recom_system, recommendations_info, recommendations_byProduct):
    print(120 * "-")
    print("-" * 51, "Vendor based", "-" * 51)
    print(120 * "-", "\n")
    
    rs_codeName = vendorBased_recom_system.rs_name

    # [RS_2] Generate recommendation for all items
    start_time = datetime.now()
    recommendations_vendorBased = vendorBased_recom_system.generate_vendorBased_recommendations()
    
    # Compute computational time
    raw_duration = datetime.now() - start_time
    computational_time = timedelta64(raw_duration, 's' if raw_duration.total_seconds() < 60 else 'm')
    print(f'\nDuration:', computational_time)
    
    # [RS_2] Add its recommendations to the general file
    recommendations_info[rs_codeName]['duration'] = computational_time                   
    recommendations_info[rs_codeName]['products'] = list()  
    for product_name, recommended_items in recommendations_vendorBased.items():
        if len(recommended_items) > 0:

            # Avoid adding the recommended item if the reference product has already some recommendations
            #recommended_skus = np.unique([recom['item_sku'] for recom in recommendations_byProduct[product_name]]) 
            #if len(recommended_skus) > 8:
            #    continue
            
            recommendations_byProduct[product_name.upper()].extend(recommended_items)
            recommendations_info[rs_codeName]['products'].append(product_name)
            
def run_randomProducts(allProducts_df, recommendations_byProduct, recommendations_info,
                       indacoCategories, filter_source_platform, num_target_item = 4):
    print(120 * "-")
    print("-" * 51, "Random products", "-" * 51)
    print(120 * "-", "\n")
    
    # Initialize the approach
    semiRandomBased_recom_system = semiRandom_RS(allProducts_df, indacoCategories, filter_source_platform)
    rs_codeName = semiRandomBased_recom_system.rs_codeName
    
    # Initialize 
    start_time = datetime.now()
    recommendations_info[rs_codeName] = {'products': list()}

    # Generate the connected products if necessary
    for product_sku, recommended_items in recommendations_byProduct.items():
        
        # Retrive the recommendations from the previous methods
        previous_recommendedSKUs = np.unique([recom['item_sku'].upper() for recom in recommended_items]) 
        
        num_missing_products = num_target_item - len(previous_recommendedSKUs)
          
        # Trigger the approach if the item has equal (add an extra item) or fewer linked products than the target threshold
        if num_missing_products >= 0:

            # Generate the connected products semi-randomly 
            random_items = semiRandomBased_recom_system.generate_connectedProducts(
                product_sku, previous_recommendedSKUs, num_randomItems = num_missing_products + 1)
            
            # Attach the new items
            recommendations_byProduct[product_sku.upper()].extend(random_items)
            recommendations_info[rs_codeName]['products'].append(product_sku)  
            
    # Compute the elaped time
    raw_duration = datetime.now() - start_time
    recommendations_info[rs_codeName]['duration'] = timedelta64(raw_duration, 's')
    
    print("Duration:", recommendations_info[rs_codeName]['duration'])
    
    return recommendations_byProduct