Your IP : 216.73.217.13


Current Path : /home/deltalab/PMS/recommendations/user_profiling/_library/
Upload File :
Current File : //home/deltalab/PMS/recommendations/user_profiling/_library/profiling_utils.py

from collections import defaultdict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from components.DbService import DbService
from _library.io_toolkit import load_collectionTypes

from os import path


def retrive_userCategories(df_row, user_profiles,):
    
    # 0) Retrieve the user identify 
    user_id = df_row['customer_id']
    
    # 0) Retrieve transaction id
    transaction_id = df_row['Transaction id']
    
    columns = {
        'Product Type' : 'categories', 
        'Vendor': 'brands',
        'SKU': 'unique_products',
        'production_areas': 'production_areas',
        'weight [grams]': 'product_weights',
        'refrigerated': 'refrigerated_products'
        }
    
    for colName, attributeName in columns.items():
        if df_row[colName] not in ['', None]:
            user_profiles[user_id][attributeName].add(df_row[colName])
            
            if attributeName == 'unique_products':
                if not isinstance(user_profiles[user_id]['shopping_baskets'], dict):
                    user_profiles[user_id]['shopping_baskets'] = defaultdict(set)
                user_profiles[user_id]['shopping_baskets'][transaction_id].add(df_row[colName])  
    
def visualize_profile_stats(user_profiles):
    
    # Put together all values according to the attributes
    profile_values = defaultdict(list)
    for user_profile in user_profiles.values():
        for attribute_name, values in user_profile.items():

            if isinstance(values, set):
                profile_values[attribute_name].extend(values)
            else:
                profile_values[attribute_name].append(values)

    # Visualize statistics
    for attribute_name, all_values in profile_values.items():
        
        # Discover the type of the attribute
        example_value = np.random.default_rng().choice(all_values, size = 1)[0]
        is_numeric_attribute = isinstance(example_value, np.number) 
        
        print("-" * 100 +  "\n" + "-" * 40, attribute_name, "-" * 40 + "\n" + "-" * 100)
        
        # Visualize statistics
        if is_numeric_attribute:
            descr_stats_df = pd.DataFrame(data = all_values, columns = [attribute_name])#.describe()
            descr_stats_df.boxplot(column = attribute_name)
            plt.show()
        else:
            unique_items, counts = np.unique(all_values, return_counts = True)
            item_frequencies = pd.DataFrame(data = counts, index = unique_items, columns = ['Frequency'])
            item_frequencies.sort_values( by = 'Frequency', inplace = True, ascending = False)
            print(item_frequencies)
            
def write_profiles(db_e,user_profiles, sku_mapping,collectionName, overwriteCollection = True,channel=None):
    
    get_product_id = True if(collectionName == "userprofiles") else False

    # Retrieve the collections
    file_path = path.join('_library', 'INDACO_collectionCodes.json')
    _, rawCollectionTypes  = load_collectionTypes(file_path, verbose = False)
    productionArea_codes = {value.lower(): att for att, value in rawCollectionTypes['production_areas'].items()}
    
    # Decrypting customers
    db = DbService("mongodb")
    db.ENCRYPTING_KEY = db_e.ENCRYPTING_KEY
    decryptedCustomers = db_e.get_decryptedUsers(user_profiles.keys())
    
    if overwriteCollection:
        #db.drop_existingTable(collectionName)
        db.db_service.dropCollection(collectionName)
    # Write the collection
    print(f"[COLLECTION: {collectionName}] Writing {len(user_profiles.keys())} user profiles...")
    for user_id, user_profile in user_profiles.items():
        
        # Create the database item
        db_profile = {'user_id': decryptedCustomers[user_id]}
        if(channel is not None):
            db_profile['channel'] = channel
        
        if 'unique_products' in user_profile.keys():
            user_profile['unique_products'] = [db_e.get_dBproduct(sku_mapping[product],channel=channel, as_dict = True,product=get_product_id)['_id'] 
                                               for product in user_profile['unique_products']]
        
        if 'shopping_baskets' in user_profile.keys():
            user_profile['shopping_baskets'] = {basket: [db_e.get_dBproduct(sku_mapping[product],channel=channel, as_dict = True,product=get_product_id)['_id'] for product in products] 
                                                for basket, products in user_profile['shopping_baskets'].items()}
        
        # Cast the values for saving
        for att, values in user_profile.items():
            if 'recommendation' in att.lower():
                if isinstance(values, dict):
                    values['recommendations'] = [db_e.get_dBproduct(product['indaco_sku'],channel=channel, as_dict = True,product=get_product_id)['_id']  for product in values['recommendations']]
                    if 'production_area' in values.keys():
                        production_area = str(values['production_area'])
                        values['production_area'] = productionArea_codes[production_area.lower()]
                    recommended_SKUs = values
                else:
                    recommended_SKUs = [db_e.get_dBproduct(product['indaco_sku'],channel=channel, as_dict = True,product=get_product_id)['_id']  for product in values]
                user_profile[att] = recommended_SKUs
            elif 'production_areas' in att.lower():
                user_profile[att] = sorted([productionArea_codes[production_area.lower()] for production_area in values], reverse = True)
            elif 'categories' in att.lower():
                user_profile[att] = [db_e.get_productTypeInfo(category)['_id'] for category in values]
            else:
              
                if isinstance(values, set):
                    user_profile[att] = sorted(values, reverse = True)
                elif isinstance(values, dict):
                    values = dict(sorted(values.items(), reverse = True))

                    keys = list(map(str, values.keys()))
                    values = list(values.values())
                    user_profile[att] = dict(zip(keys, values))
        db_profile.update(user_profile)
        # Write the item to the database
        #db.write_newDbItem(collectionName, db_profile)
        db.db_service.writeNewDbItem(collectionName, db_profile)


# def write_recommendations(db_e,recoms, sku_mapping, overwriteCollection = True):
#     # Retrieve the collections
#     file_path = path.join('_library', 'INDACO_collectionCodes.json')
#     _, rawCollectionTypes  = load_collectionTypes(file_path, verbose = False)
#     productionArea_codes = {value.lower(): att for att, value in rawCollectionTypes['production_areas'].items()}
    
#     # Decrypting customers
#     db = DbService("mongodb")
#     db.ENCRYPTING_KEY = db_e.ENCRYPTING_KEY
#     decryptedCustomers = db_e.get_decryptedUsers(user_profiles.keys())
    
#     if overwriteCollection:
#         #db.drop_existingTable(collectionName)
#         db.db_service.dropCollection("userprofiles")
#     # Write the collection
#     print(f"[COLLECTION: userprofiles] Writing {len(recoms.keys())} user profiles...")
#     for user_id, user_profile in user_profiles.items():
        
#         # Create the database item
#         db_profile = {'user_id': decryptedCustomers[user_id]}
        
#         if 'unique_products' in user_profile.keys():
#             user_profile['unique_products'] = [db.get_dBproduct(sku_mapping[product], as_dict = True)['_id'] 
#                                                for product in user_profile['unique_products']]
        
#         if 'shopping_baskets' in user_profile.keys():
#             user_profile['shopping_baskets'] = {basket: [db.get_dBproduct(sku_mapping[product], as_dict = True)['_id'] for product in products] 
#                                                 for basket, products in user_profile['shopping_baskets'].items()}
        
#         # Cast the values for saving
#         for att, values in user_profile.items():
#             # if 'recommendation' in att.lower():
#             #     if isinstance(values, dict):
#             #         values['recommendations'] = [db.get_dBproduct(product['indaco_sku'], as_dict = True)['_id']  for product in values['recommendations']]
#             #         if 'production_area' in values.keys():
#             #             production_area = str(values['production_area'])
#             #             values['production_area'] = productionArea_codes[production_area.lower()]
#             #         recommended_SKUs = values
#             #     else:
#             #         recommended_SKUs = [db.get_dBproduct(product['indaco_sku'], as_dict = True)['_id']  for product in values]
#             #     user_profile[att] = recommended_SKUs
#             #elif 'production_areas' in att.lower():
#             if 'production_areas' in att.lower():
#                 user_profile[att] = sorted([productionArea_codes[production_area.lower()] for production_area in values], reverse = True)
#             elif 'categories' in att.lower():
#                 user_profile[att] = [db.get_productTypeInfo(category)['_id'] for category in values]
#             else:
              
#                 if isinstance(values, set):
#                     user_profile[att] = sorted(values, reverse = True)
#                 elif isinstance(values, dict):
#                     values = dict(sorted(values.items(), reverse = True))

#                     keys = list(map(str, values.keys()))
#                     values = list(values.values())
#                     user_profile[att] = dict(zip(keys, values))
#         db_profile.update(user_profile)
#         # Write the item to the database
#         #db.write_newDbItem(collectionName, db_profile)
#         db.db_service.writeNewDbItem("userprofiles", db_profile)