Your IP : 216.73.217.13


Current Path : /home/deltalab/PMS/recommendations/recommender-system-batch/_library/
Upload File :
Current File : //home/deltalab/PMS/recommendations/recommender-system-batch/_library/launcher_utils.py

import json
from datetime import date
from os import path
from string import ascii_uppercase

import numpy as np
from pandas import DataFrame, ExcelWriter, concat
from tabulate import tabulate
from collections import defaultdict

from _library.data_utils import io_toolkit
from _library.recom_utils.collectionBased_utils import (
    build_bundles, minorChanges_recomAttributes)
from _library.toolkit import jaccard_similarity


def visualize_intro(app_settings):
    print("\n" + 120 * "-")
    print(120 * "-")
    print("-" * 43, f"Hybrid recommender system (v{app_settings['app_version']})", "-" * 43)
    print(120 * "-")
    print(120 * "-" + "\n" )
    
    print("\n" + 80 * "-")
    print(80 * "-")
    print("-" * 17, f"The setting file has been read ({len(app_settings.keys())} params)", "-" * 20)
    print(80 * "-")
    print(80 * "-" + "\n" )
 
    for idk, (key, value) in enumerate(app_settings.items()):
        info = type(value).__name__
        if isinstance(value, dict):
            info += ':' + str(len(value.keys()))
            value = json.dumps(value, indent = 8)
        elif isinstance(value, list):
            info += ':' + str(len(value))
            value = ' | '.join(value)

        print('-' * 90)
        print(f" |{ascii_uppercase[idk]}| {key.upper()} ({info}) --> {value}")
        print('-' * 90)
        
def generate_rsEngine_names(app_settings):
    
    # Generate the names
    rs_engines = app_settings['rs_priority'] 
    
    if app_settings['add_categoryBased_variants']:
        
        # Retrieve the methods available
        methods_with_catVersion =  ["enhanced_assRules", "collectionBased", "simple_assRules"]
        methods_with_catVersion = [item for item in methods_with_catVersion if item in rs_engines]
        
        if len(methods_with_catVersion) > 0:
        
            # Generate the names for the categoryBased versions
            catVersion_names = [engine_name + '_cat'  for engine_name in rs_engines 
                                if engine_name in methods_with_catVersion]
            
            # Insert the new names
            entryPoint = np.max([rs_engines.index(item) for item in methods_with_catVersion]) + 1
            for idk, engineName_variant in enumerate(catVersion_names):
                rs_engines.insert(entryPoint + idk, engineName_variant)
        
    # Visualize the engines
    print("\n" + "-" * 30)
    print(f"Recommender engines ({len(rs_engines)}):")
    print("-" * 30)
    for idk, engine_name in enumerate(rs_engines):
        print(f"--> ({idk + 1}) {engine_name}")
    print("-" * 30, "\n")
    
    return rs_engines

def visualize_recommendations(recommendations):
    for idk, (item_name, recommendations) in enumerate(recommendations.items()):
        
        print("\n" + "-" * 60)
        print(f"({idk + 1}) REFERENCE PRODUCT: {item_name}")
        print(f"--> Recommendations: {len(recommendations)}\n")
        print( "-" * 60)
        
        if len(recommendations) == 0:
            print("There are no items that can be recommended! Sorry\n")
        else:
            for recommendation in recommendations:
                for attribute, value in recommendation.items():
                    if isinstance(value, list) or isinstance(value, set):
                        value = ' | '.join(value)
                        
                    if isinstance(value, str):
                        value = value.upper()
                        
                    print(f"--> {attribute} [{type(value)}] --> {value}")
                print("-" * 60)

def merge_order_dfs(indacoOrders_df, inTrentinoOrders_df, sku_mapping):
    
    # Select the necessary columns
    selected_columns = ['Transaction id', 'SKU', 'Order Month', 'Title', 'Product Type', 'Quantity']
    orders_to_attach = inTrentinoOrders_df[selected_columns].dropna(subset = ['SKU']).copy()
    orders_to_attach = orders_to_attach.reset_index(drop = True)
    
    # Minor changes
    orders_to_attach['SKU'] = orders_to_attach['SKU'].str.upper()
    orders_to_attach['customer_id'] = -1
    #orders_to_attach['Order Month'] = orders_to_attach['Order Month'].apply(lambda ts: ts.to_timestamp())
    orders_to_attach.rename(
        columns = {
            'Title': 'product_name', 
            'Quantity': 'quantity',
            'Order Month' : 'timestamp'}, 
        inplace = True)

    orders_to_attach['indaco_sku'] = orders_to_attach['SKU'].apply(
        lambda simplified_sku: sku_mapping[simplified_sku] if simplified_sku in sku_mapping.keys() else "")

    # Avoid overlaps among the transaction ids
    orders_to_attach['Transaction id'] += 1
    orders_to_attach['Transaction id'] *= -1
    
    # Merge them
    merged_df = concat([indacoOrders_df, orders_to_attach, ], axis = 0)
    merged_df = merged_df.sort_values(by = ['Transaction id'], ascending = True).reset_index(drop = True)

    return merged_df

def rearrange_recommendations(recommendations_byProduct, platfrom_products, app_settings, rs_priorities, user_profile,  product_identifier):
    
    print("\n" + 90 * "-")
    print("-" * 27,"Sort and re-arrange recommendations", "-" * 26)
    print(90 * "-")
    
    print(f"[ORDERS] Recommender engines ({len(rs_priorities)}):", "\n" + "-" * 30)
    for idk, engine_name in enumerate(rs_priorities):
        print(f"--> ({idk + 1}) {engine_name}")
    print("-" * 30, "\n")
    
    # Sorting function
    userBased = len(user_profile.keys()) > 0
    
    # --> (A) Profile-based
    if userBased:
        print("\nSORTING MODE: User-based\n")
        
        normalizeNames = lambda items: list(map(str.lower, items))
        
        if 'brands' not in user_profile.keys():
            user_profile['brands'] = []
        
        # Low is better (ascending)
        sortingProfileBased_func = lambda recom : (
            rs_priorities.index(recom['rs_source']),
            recom['item_type'].lower() not in normalizeNames(user_profile['categories']), # PREFERRED VALUE: false (0)
            recom['item_vendor'].lower() not in normalizeNames(user_profile['brands']), # PREFERRED VALUE: false (0)
            recom['item_sku'].lower() in normalizeNames(user_profile['unique_products'])) # PREFERRED VALUE: false (0)
            #recom['###'].lower() in normalizeNames(user_profile['bio_percentage'] if user_profile['bio_percentage'] is not -1)
            # aggiungere magari bio 
    # --> (B) Generic approach: (1) rs method (2) type dissimilarity and (3) name dissimilarity
    else:
        print("\nSORTING MODE: Generic sorting\n")
        
        # Low is better (ascending)
        genericSorting = lambda recom, referenceName, referenceType: (
            rs_priorities.index(recom['rs_source']),
            1 - jaccard_similarity(recom['item_type'], referenceType), 
            jaccard_similarity(recom['item_name'], referenceName))

    for product_sku in recommendations_byProduct.keys():
        
        # Find the product type
        cond = platfrom_products[product_identifier] == product_sku
        product_type = platfrom_products.loc[cond, 'Product Type'].values[0]
        product_name = platfrom_products.loc[cond, 'Title'].values[0]
        
        # Set the sorting function according to the modality: Generic or user-based
        sorting_func = lambda recom: sortingProfileBased_func(recom) if userBased else genericSorting(recom, product_name, product_type)
        
        # Pre-sort recommendations (according to 'rs_priority' and 'rs_source')
        recommendations_byProduct[product_sku] = sorted(recommendations_byProduct[product_sku], key = sorting_func)
        
        #print(f"[{product_sku}] ORIGINAL ({len(recommendations_byProduct[product_sku])}):", json.dumps(recommendations_byProduct[product_sku], indent = 4))
        
        # Filter recommended items: (a) drop similar products  || (b) self.drop_similar_categories
        rs_methods = [recom['rs_source'] for recom in recommendations_byProduct[product_sku]]
            
        if len(recommendations_byProduct[product_sku]) > 4 and 'randomProducts' not in rs_methods:
            bundles = build_bundles(
                reference_products = None, 
                recommendations_byCollectionType = {'all_items': recommendations_byProduct[product_sku]}, 
                products_df = platfrom_products,
                product_identifier = product_identifier,
                drop_similar_categories = app_settings['drop_similar_categories'], 
                merge_collection_type = False, 
                output_recom_with_one_linktype = app_settings['output_recom_with_one_linktype'],
                verbose = False)        
            recommendations_byProduct[product_sku] = bundles['all_items']
        else:
            
            # Drop duplicate items
            recommendations = []
            for recom in recommendations_byProduct[product_sku]:
                previous_items = [item['item_sku'] for item in recommendations]
                if recom['item_sku'] not in previous_items:
                    recommendations.append(recom)
            recommendations_byProduct[product_sku] = recommendations
                   
        # Sort recommendations after bundles generation
        recommendations_byProduct[product_sku] = sorted(recommendations_byProduct[product_sku], key = sorting_func)
        #print(f"AFTER BUILD ({len(recommendations_byProduct[product_sku])}):", json.dumps(recommendations_byProduct[product_sku], indent = 4))
        
        # Minor changes in the attributes of the recommendations
        attribute_order = ['rank', 'item_sku', "indaco_sku",  'item_name', 'item_frequency', 'item_type', 'item_vendor',
                           "similar_products", 'linked_production_areas', 'linked_regions', 'linked_recipes', 
                           'linked_experiences','explaination', "seller", 'inTrentino_source', "merged_source", 'rs_source'] 
        
        recommendations_byProduct[product_sku] = minorChanges_recomAttributes(
            recommendations = recommendations_byProduct[product_sku], 
            attribute_order = attribute_order)
        
        #print(f"MINOR CHANGES ({len(recommendations_byProduct[product_sku])}):", json.dumps(recommendations_byProduct[product_sku], indent = 4))
        
        if app_settings['max_recommendations'] != -1:
            partial_recommendations = recommendations_byProduct[product_sku][:app_settings['max_recommendations']]
            recommendations_byProduct[product_sku] = partial_recommendations
            
        #print(f"OUTPUT ({len(recommendations_byProduct[product_sku])}):", json.dumps(recommendations_byProduct[product_sku], indent = 4))
        
        if len(recommendations_byProduct[product_sku]) < 4:
            print("SMALL BUNDLE:", product_sku)
            #raise Exception()
        
    return recommendations_byProduct

def compute_methodsCoverage(recommendations_byProduct, info_recommendations):
    
    info_df = DataFrame(index = ['Overview'] + list(info_recommendations.keys()), 
                        columns = ['Duration', 'Item coverage (percentage)', 
                                   'Item coverage', '[AVG] recommended items for product']) 
    
    # [Overview] Compute general items coverage
    items_coverage = np.array(list(map(len, recommendations_byProduct.values()))) 
    
    total_items = len(recommendations_byProduct.keys())
    items_covered = np.nonzero(items_coverage)[0].size
    items_covered_percentage = round((items_covered / len(items_coverage)) * 100, 1)
    total_duration = np.sum([info['duration'] for info in info_recommendations.values() if 'duration' in info.keys()])    
    total_duration = np.timedelta64(total_duration, 's' if total_duration.astype(int) <= 60 else 'm')
    info_df.loc['Overview', 'Duration'] = total_duration
    info_df.loc['Overview', 'Item coverage'] = items_covered
    info_df.loc['Overview', 'Item coverage (percentage)'] = items_covered_percentage
    info_df.loc['Overview', '[AVG] recommended items for product'] = np.round(np.mean(items_coverage), 0)
    
    # [Overview] Visualize general items coverage
    print("\n" + 90 * "-")
    print("-" * 24,f"[Overview] ITEM COVERAGE: {items_covered}/{len(items_coverage)} "\
        f"({items_covered_percentage} %)", "-" * 24)
    print('-' * 35, "DURATION: ~", total_duration, '-' * 35)
    print(90 * "-")
    
    # [Details] Visualize items coverage for each RS method
    coveredItems_counter = set()
    for idk, (method_name, info) in enumerate(info_recommendations.items()):
        
        # The RS method has been skipped
        if len(info.keys()) == 0:
            continue
        
        # Compute the method coverage
        products_covered = info['products']
        coveredItems_counter.update(products_covered)
        
        products_covered_percentage = round((len(products_covered)/total_items) * 100, 1)
        products_covered_comulativePercentage = round((len(coveredItems_counter) / total_items) * 100, 1)
        
        #  Add the coverage information
        info_df.loc[method_name, 'Duration'] = info['duration']
        info_df.loc[method_name, 'Item coverage'] = len(products_covered)
        info_df.loc[method_name, 'Item coverage (percentage)'] = products_covered_percentage
        
        # Add the cumulative coverage
        info_df.loc[method_name, 'Item coverage (cumulative)'] = len(coveredItems_counter)
        info_df.loc[method_name, 'Item coverage (cumulative percentage)'] = products_covered_comulativePercentage

        info_df = info_df.reindex(
            columns = [
                '[AVG] recommended items for product', 'Duration', 
                'Item coverage', 'Item coverage (cumulative)',
                'Item coverage (percentage)', 'Item coverage (cumulative percentage)']
            )
        
        # Visualize the outcomes
        print("\n" + 60 * "-")
        print(f"RS METHOD {idk + 1}: {method_name} --> item coverage: "\
            f"{products_covered_percentage} % ({len(products_covered)})")
        print("\t\t\tDURATION:", info['duration'])
        print(60 * "-")
        if len(products_covered) > 0:
            print("-->", "\n--> ".join(products_covered[:5]))
            if len(products_covered) > 5:
                print(f"--> ...")
            print(40 * "-")
    print(60 * "-" + "\n")
    
    return info_df

def generate_params_df(app_settings):
    app_settings_stringfy = dict()
    for param_name, param_value in app_settings.items():
        if isinstance(param_value, list):
            app_settings_stringfy[param_name] = ', '.join(param_value)
        elif isinstance(param_value, dict):
            for sub_name, sub_value in param_value.items():
                app_settings_stringfy[param_name + ":" + sub_name] = sub_value
        else:
            app_settings_stringfy[param_name] = param_value

    app_setting_df = DataFrame.from_dict(app_settings_stringfy, orient = 'index', columns = ['Value'])
    
    return app_setting_df

def visualize_orders(orders_df, last_k_orders = -1):
    print("\n" + 100 * "-") 
    print("-" * 38, f"Transaction ({len(orders_df['Transaction id'].unique())})", "-" * 37)
    print(100 * "-", "\n")
    
    if last_k_orders != -1:
        transaction_ids = sorted(orders_df['Transaction id'].unique(), reverse = True)
        orders_df = orders_df[orders_df['Transaction id'].isin(transaction_ids[:last_k_orders])]

        print("\n" + 150 * "-") 
        print("-" * 67, f"LAST {last_k_orders} orders", "-" * 68)
        print(150 * "-")

    # Minor changes
    orders_df = orders_df.drop(columns = ['indaco_sku'])
    orders_df = orders_df.sort_values(by = ['Transaction id', 'Product Type' ,'product_name', 'quantity'], 
                                      ascending = [True, True, True, False]).reset_index(drop = True)
        
    print(tabulate(orders_df, headers = orders_df.columns, tablefmt = "pretty"))
    
def visualize_products(products_df, all_columns = False):
    if all_columns:
       cols_to_visualize = products_df.columns
    else:
        cols_to_visualize = ['Title',  'SKU', 'Product Type', 'Vendor', 'Seller', 
                             'production_areas','inTrentino_source', 'Frequency']
    
    print("\n" + 120 * "-") 
    print("-" * 52, "INDACO products", "-" * 51)
    print(120 * "-", "\n")
    
    df_to_visulize = products_df[cols_to_visualize].sort_values(by = ['Title', "Product Type"]).reset_index(drop = True)
    
    print(f"ALL COLUMNS ({len(products_df.columns)}):", ' | '.join(products_df.columns), "\n")
    print(tabulate(df_to_visulize, headers = cols_to_visualize, tablefmt = "pretty")) 
    print("\n")
    
def visualize_user_profile(user_id, user_profile):
    print("\n" + "-" * 140 +  "\n" + "-" * 64, f'USER ID: {user_id}', "-" * 63 + "\n" + "-" * 140, "\n")
    chart_divider = ' | '
    
    for attribute_name, attribute_values in user_profile.items(): 
               
        # CASE A: list/set
        if isinstance(attribute_values, set) or isinstance(attribute_values, list):
            print(attribute_name.upper(), f"({len(attribute_values)}):", chart_divider.join(sorted(map(str, attribute_values))))
        
        # CASE B: dictionary of lists
        elif isinstance(attribute_values, defaultdict):
            print(attribute_name.upper(), f"({len(attribute_values)}):\n\t",
                  '\n\t '.join([f't{key} ({len(values)} items) --> ' +  chart_divider.join(sorted(values)) for key, values in attribute_values.items()]))
        
        # GENERAL CASE
        else: 
            print(str(attribute_name).upper() + ":", attribute_values)
        print("-" * 80)
    return user_profile

def save_recommendations(recommendations_byProduct, info_df, app_settings, 
                         base_saving_folder = 'recommendations', 
                         user_id = None,
                         base_file_name = 'raccomandazioni_inTrentino'):
    
    # Generate the dataframe for the application setting
    app_setting_df = generate_params_df(app_settings)
    
    # Add version
    saving_folder = path.join(base_saving_folder, f"v{app_settings['app_version']}")
    if user_id:
        subfolder = f'user_{user_id}' if user_id != -1 else 'generic_user'
        saving_folder = path.join(saving_folder, subfolder)
    
    # [file name] Extra info
    recom_file_name = base_file_name
    recom_file_name += '_' + date.today().strftime("%b%Y").lower()
    if app_settings['drop_similar_categories']:
        recom_file_name += '_tipiProdottiAggregati'
        
    # [file name] Extension
    recom_file_name += '.json'

    # Save the recommendations for each products as a JSON file
    io_toolkit.save_recommendations(recommendations_byProduct, saving_folder, recom_file_name)
    
    # Save the further information
    extra_info_file_name = 'info'
    if app_settings['drop_similar_categories']:
        extra_info_file_name += '_tipiProdottiAggregati'
    extra_info_file_name += '.xlsx'
    
    with ExcelWriter(path.join(saving_folder, extra_info_file_name), mode = 'w+') as excelWriter:
        info_df.to_excel(excelWriter, sheet_name = 'Execution info')
        app_setting_df.to_excel(excelWriter, sheet_name = 'Appplication params')
        
    return saving_folder