| Current Path : /home/deltalab/PMS/recommendations/user_profiling/_library/ |
| Current File : //home/deltalab/PMS/recommendations/user_profiling/_library/profiling_utils.py |
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from components.DbService import DbService
from _library.io_toolkit import load_collectionTypes
from os import path
def retrive_userCategories(df_row, user_profiles,):
# 0) Retrieve the user identify
user_id = df_row['customer_id']
# 0) Retrieve transaction id
transaction_id = df_row['Transaction id']
columns = {
'Product Type' : 'categories',
'Vendor': 'brands',
'SKU': 'unique_products',
'production_areas': 'production_areas',
'weight [grams]': 'product_weights',
'refrigerated': 'refrigerated_products'
}
for colName, attributeName in columns.items():
if df_row[colName] not in ['', None]:
user_profiles[user_id][attributeName].add(df_row[colName])
if attributeName == 'unique_products':
if not isinstance(user_profiles[user_id]['shopping_baskets'], dict):
user_profiles[user_id]['shopping_baskets'] = defaultdict(set)
user_profiles[user_id]['shopping_baskets'][transaction_id].add(df_row[colName])
def visualize_profile_stats(user_profiles):
# Put together all values according to the attributes
profile_values = defaultdict(list)
for user_profile in user_profiles.values():
for attribute_name, values in user_profile.items():
if isinstance(values, set):
profile_values[attribute_name].extend(values)
else:
profile_values[attribute_name].append(values)
# Visualize statistics
for attribute_name, all_values in profile_values.items():
# Discover the type of the attribute
example_value = np.random.default_rng().choice(all_values, size = 1)[0]
is_numeric_attribute = isinstance(example_value, np.number)
print("-" * 100 + "\n" + "-" * 40, attribute_name, "-" * 40 + "\n" + "-" * 100)
# Visualize statistics
if is_numeric_attribute:
descr_stats_df = pd.DataFrame(data = all_values, columns = [attribute_name])#.describe()
descr_stats_df.boxplot(column = attribute_name)
plt.show()
else:
unique_items, counts = np.unique(all_values, return_counts = True)
item_frequencies = pd.DataFrame(data = counts, index = unique_items, columns = ['Frequency'])
item_frequencies.sort_values( by = 'Frequency', inplace = True, ascending = False)
print(item_frequencies)
def write_profiles(db_e,user_profiles, sku_mapping,collectionName, overwriteCollection = True,channel=None):
get_product_id = True if(collectionName == "userprofiles") else False
# Retrieve the collections
file_path = path.join('_library', 'INDACO_collectionCodes.json')
_, rawCollectionTypes = load_collectionTypes(file_path, verbose = False)
productionArea_codes = {value.lower(): att for att, value in rawCollectionTypes['production_areas'].items()}
# Decrypting customers
db = DbService("mongodb")
db.ENCRYPTING_KEY = db_e.ENCRYPTING_KEY
decryptedCustomers = db_e.get_decryptedUsers(user_profiles.keys())
if overwriteCollection:
#db.drop_existingTable(collectionName)
db.db_service.dropCollection(collectionName)
# Write the collection
print(f"[COLLECTION: {collectionName}] Writing {len(user_profiles.keys())} user profiles...")
for user_id, user_profile in user_profiles.items():
# Create the database item
db_profile = {'user_id': decryptedCustomers[user_id]}
if(channel is not None):
db_profile['channel'] = channel
if 'unique_products' in user_profile.keys():
user_profile['unique_products'] = [db_e.get_dBproduct(sku_mapping[product],channel=channel, as_dict = True,product=get_product_id)['_id']
for product in user_profile['unique_products']]
if 'shopping_baskets' in user_profile.keys():
user_profile['shopping_baskets'] = {basket: [db_e.get_dBproduct(sku_mapping[product],channel=channel, as_dict = True,product=get_product_id)['_id'] for product in products]
for basket, products in user_profile['shopping_baskets'].items()}
# Cast the values for saving
for att, values in user_profile.items():
if 'recommendation' in att.lower():
if isinstance(values, dict):
values['recommendations'] = [db_e.get_dBproduct(product['indaco_sku'],channel=channel, as_dict = True,product=get_product_id)['_id'] for product in values['recommendations']]
if 'production_area' in values.keys():
production_area = str(values['production_area'])
values['production_area'] = productionArea_codes[production_area.lower()]
recommended_SKUs = values
else:
recommended_SKUs = [db_e.get_dBproduct(product['indaco_sku'],channel=channel, as_dict = True,product=get_product_id)['_id'] for product in values]
user_profile[att] = recommended_SKUs
elif 'production_areas' in att.lower():
user_profile[att] = sorted([productionArea_codes[production_area.lower()] for production_area in values], reverse = True)
elif 'categories' in att.lower():
user_profile[att] = [db_e.get_productTypeInfo(category)['_id'] for category in values]
else:
if isinstance(values, set):
user_profile[att] = sorted(values, reverse = True)
elif isinstance(values, dict):
values = dict(sorted(values.items(), reverse = True))
keys = list(map(str, values.keys()))
values = list(values.values())
user_profile[att] = dict(zip(keys, values))
db_profile.update(user_profile)
# Write the item to the database
#db.write_newDbItem(collectionName, db_profile)
db.db_service.writeNewDbItem(collectionName, db_profile)
# def write_recommendations(db_e,recoms, sku_mapping, overwriteCollection = True):
# # Retrieve the collections
# file_path = path.join('_library', 'INDACO_collectionCodes.json')
# _, rawCollectionTypes = load_collectionTypes(file_path, verbose = False)
# productionArea_codes = {value.lower(): att for att, value in rawCollectionTypes['production_areas'].items()}
# # Decrypting customers
# db = DbService("mongodb")
# db.ENCRYPTING_KEY = db_e.ENCRYPTING_KEY
# decryptedCustomers = db_e.get_decryptedUsers(user_profiles.keys())
# if overwriteCollection:
# #db.drop_existingTable(collectionName)
# db.db_service.dropCollection("userprofiles")
# # Write the collection
# print(f"[COLLECTION: userprofiles] Writing {len(recoms.keys())} user profiles...")
# for user_id, user_profile in user_profiles.items():
# # Create the database item
# db_profile = {'user_id': decryptedCustomers[user_id]}
# if 'unique_products' in user_profile.keys():
# user_profile['unique_products'] = [db.get_dBproduct(sku_mapping[product], as_dict = True)['_id']
# for product in user_profile['unique_products']]
# if 'shopping_baskets' in user_profile.keys():
# user_profile['shopping_baskets'] = {basket: [db.get_dBproduct(sku_mapping[product], as_dict = True)['_id'] for product in products]
# for basket, products in user_profile['shopping_baskets'].items()}
# # Cast the values for saving
# for att, values in user_profile.items():
# # if 'recommendation' in att.lower():
# # if isinstance(values, dict):
# # values['recommendations'] = [db.get_dBproduct(product['indaco_sku'], as_dict = True)['_id'] for product in values['recommendations']]
# # if 'production_area' in values.keys():
# # production_area = str(values['production_area'])
# # values['production_area'] = productionArea_codes[production_area.lower()]
# # recommended_SKUs = values
# # else:
# # recommended_SKUs = [db.get_dBproduct(product['indaco_sku'], as_dict = True)['_id'] for product in values]
# # user_profile[att] = recommended_SKUs
# #elif 'production_areas' in att.lower():
# if 'production_areas' in att.lower():
# user_profile[att] = sorted([productionArea_codes[production_area.lower()] for production_area in values], reverse = True)
# elif 'categories' in att.lower():
# user_profile[att] = [db.get_productTypeInfo(category)['_id'] for category in values]
# else:
# if isinstance(values, set):
# user_profile[att] = sorted(values, reverse = True)
# elif isinstance(values, dict):
# values = dict(sorted(values.items(), reverse = True))
# keys = list(map(str, values.keys()))
# values = list(values.values())
# user_profile[att] = dict(zip(keys, values))
# db_profile.update(user_profile)
# # Write the item to the database
# #db.write_newDbItem(collectionName, db_profile)
# db.db_service.writeNewDbItem("userprofiles", db_profile)