| Current Path : /home/deltalab/PMS/recommendations/user_profiling/_library/ |
| Current File : //home/deltalab/PMS/recommendations/user_profiling/_library/choquetIntegral_utils.py |
import numpy as np
from json import dumps,load
from itertools import combinations
import os
PRECOMPUTED_SUBSET_MOBIUSVALUES = None
CAPACITY_DICT = None
CAPACITY_DICT_NAME = None
def generate_capacityDict(all_attributes,type_recommendation,user_attributes = None):
# Load information on how to build the capacity dictionary from the .json file
with open(os.path.join(os.path.dirname(__file__),"capacity_dictionaries.json")) as f:
dictionaries = load(f)
dictionary_info = dictionaries[type_recommendation]
#check if attributes serialized in json are equal to all attributes given as parameter
if(set(all_attributes) != set(dictionary_info['all_attributes'])):
raise Exception("Attributes given as parameter don't match with attributes in dictionary")
# 1A) Generate the single capacities (1/n)
uniform_singleCapacity = np.round(1 / len(all_attributes), 2)
capacityDict = dict.fromkeys(all_attributes, uniform_singleCapacity)
# Remove unnecessary attributes (i.e., useless attributes when taken alone)
unnecessary_attributes = dictionary_info['unnecessary_attributes']
for att in unnecessary_attributes:
capacityDict[att] = 0
# 1.B) Reduce the capacity for some selected attributes (i.e., less important attributes)
if('lessImportant_attributes' in dictionary_info):
lessImportant_attributes = dictionary_info['lessImportant_attributes']
for reductionFactor in lessImportant_attributes:
capacityDict['|'.join(lessImportant_attributes[reductionFactor])] *= float(reductionFactor)
coalitions = dictionary_info['coalitions']
# Generate the coalitions
for coalition_info in coalitions.values():
conditions = sorted(coalition_info['attributes'])
capacityDict['|'.join(conditions)] = coalition_info['capacity']
# Sort the dictionary of capacities
capacityDict = dict(sorted(capacityDict.items(), key = lambda dict_item: (dict_item[1], len(dict_item[0])), reverse = True))
print('-' * 50)
print(f"CAPACITIY VALUES ({len(capacityDict.keys())} with {len(coalitions.keys())} coalitions):")
print('-' * 50)
print(dumps(capacityDict, indent = 4))
print('-' * 50)
return capacityDict
def compute_subsetCapacity(attribute_subset, capacityDict, verbose):
# Retrieve the single capacities
partial_capacities = dict()
for cond in attribute_subset:
if cond in capacityDict.keys():
single_capacity = capacityDict[cond]
partial_capacities[cond] = single_capacity
# Retrieve the coalitions
coalitions = [conditions.split('|') for conditions in capacityDict.keys() if len(conditions.split('|')) > 1]
coalitions = sorted(coalitions, key = len, reverse = True)
# Check the presence of coalitions
for coalitionAttributes in coalitions:
# Find the common items between the coalition and the considered subset of conditions
commonAtt = np.intersect1d(coalitionAttributes, list(partial_capacities.keys()))
# If all the coalition items matched
if len(commonAtt) == len(coalitionAttributes):
# Retrieve the capacity of the coalition
coalitionDictNames = '|'.join(coalitionAttributes)
coalition_capacity = capacityDict[coalitionDictNames]
# Save the coalition capacity
partial_capacities[coalitionDictNames] = coalition_capacity
# Remove the capacities of the single items
for considered_item in coalitionAttributes:
partial_capacities.pop(considered_item)
# Visualize capacities of the subset
partial_capacities = dict(sorted(partial_capacities.items(), key = lambda dictItem: dictItem[1], reverse = True))
if verbose:
print(f"SUBSET CAPACITIES ({len(attribute_subset)}):", dumps(partial_capacities, indent = 4))
# Sum all the capacities
subset_capacity = np.sum(list(partial_capacities.values()))
return subset_capacity
def generate_mobiusRepresentation(subset_attributeNames, capacityDict, verbose):
if verbose:
print(f"ATTRIBUTES ({len(subset_attributeNames)})", subset_attributeNames)
print("\nGenerate subsets\n" + "-" * 40)
# 0) Generate the subsets
mobius_subsets = []
for dim in range(len(subset_attributeNames), 0, -1):
singleDim_combinations = list(combinations(subset_attributeNames, r = dim))
mobius_subsets.extend(singleDim_combinations)
if verbose:
print(f"SUBSET DIMENSION: {dim} --> COMBINATIONS: {len(singleDim_combinations)}")
if verbose:
print("-" * 40 + f"\n\t\tTOTAL: {len(mobius_subsets)}\n")
# 1) Compute the mobius value
mobius_values = list()
for idk, subsetNames in enumerate(mobius_subsets):
# 1.A) |S \ B| --> cardinality (set formed by the elements of S that are not in B)
diff_subset_cardinality = np.setdiff1d(subset_attributeNames, subsetNames).shape[0]
# 1.B) Subset capacity
subset_capacity = compute_subsetCapacity(subsetNames, capacityDict, verbose = False)
# 1.C) Compute the mabius representation
subset_mobiusValue = np.power(-1, diff_subset_cardinality) * subset_capacity
mobius_values.append(subset_mobiusValue)
if verbose:
print(f"\nSUBSET (id:{idk}, dim = {len(subsetNames)}):", " | ".join(subsetNames))
print("--> Diff subset cardinality:", diff_subset_cardinality)
print("--> [SUBSET] Capacity measure:", subset_capacity)
print("--> [SUBSET] Mabius score", subset_mobiusValue)
# Sum all the subsets
mobius_value = np.sum(mobius_values)
if verbose:
print(f"\nMobius representation --> {mobius_value}")
print("Standard capacity: ", compute_subsetCapacity(subset_attributeNames, capacityDict, verbose = False))
print("\nMIN:", np.min(mobius_values), "MAX:", np.max(mobius_values))
print(f"MOBIUS VALUES ({len(mobius_values)}):", mobius_values, "\n")
return mobius_value
def preComputed_mobiusRepresentation(allAttributes, capacityDict, verbose = True):
# Generate all the subsets
allMobius_subsets = []
for dim in range(len(allAttributes), 0, -1):
singleDim_combinations = list(combinations(allAttributes, r = dim))
allMobius_subsets.extend(singleDim_combinations)
preComputed_mobiusValues = dict()
for subset_attributeNames in allMobius_subsets:
preComputed_mobiusValues[tuple(subset_attributeNames)] = generate_mobiusRepresentation(subset_attributeNames, capacityDict, verbose = False)
return preComputed_mobiusValues
def weightedSum(attributes, capacityDict, verbose):
values = attributes.values
attributeCapacities = [capacityDict[attribute] for attribute in attributes.index]
weightedSum = np.inner(values, attributeCapacities)
if verbose:
print("VALUES:", values)
print("ATTRIBUTE CAPACITIES:", attributeCapacities)
print("--> Weighted sum:", weightedSum)
return weightedSum
def choquet_integral(attributes, capacityDict, verbose):
# 0) Sort the condition values (X)
attributes = attributes.sort_values(ascending = True)
if verbose:
print('-' * 20, f"ALL ATTRIBUTES ({len(attributes)}):", '-' * 20)
print(dumps(attributes.to_dict(), indent=2), "\n")
# 1) [PART 1] Compute the differences --> x(i) - x(i - 1)
diff = np.diff(attributes.values, axis = 0, prepend = [0])
# 2) [PART 2] Compute the partial capacities
partial_capacities = list()
for i in range(len(attributes)):
print("\n" + '-' * 40 + f"\nCompute SUBSET CAPACITY [{i + 1}:{len(attributes)}]\n" + '-' * 40)
# Select a subset of conditions [i:n]
cond_subset = attributes[i:].index
# Compute the capacity of the subset
partialCapacity = compute_subsetCapacity(cond_subset, capacityDict, verbose = False)
partial_capacities.append(partialCapacity)
print("\n\t" + '-' * 35 + f"\n\t Partial capacity [{i + 1}:{len(attributes)}]: {np.round(partialCapacity, 2)}\n\t" + '-' * 35)
# 3) Compute the choquet integral --> inner product (sum(a[:] * b[:]))
choquet_integral = np.inner(diff, partial_capacities)
choquet_integral = np.round(choquet_integral, 4)
# Visualize the outputs
if verbose:
print("\n" + '-' * 20, f"OUTPUT", '-' * 20)
print("(Choquet Integral - PART A) DIFF):", np.round(diff, 4))
print("(Choquet Integral - PART B) PARTIAL CAPACITIES:", np.round(partial_capacities, 4))
print("--> CHOQUET INTEGRAL (inner product)", np.round(choquet_integral, 4))
print('-' * 60, "\n")
return choquet_integral
def choquet_integral_mobius(attributes, preComputed_subsetMobiusValues, verbose):
# Split the attribute series into names (list of str) and values (list of float)
attribute_names = np.array(attributes.index)
attribute_values = attributes.values
# Generate the subsets
indices_subsets = []
for dim in range(len(attributes), 0, -1):
all_indices = range(len(attributes))
singleDim_combinations = np.array(list(combinations(all_indices, r = dim)))
indices_subsets.extend(singleDim_combinations)
# For each subset compute a value
subset_values = dict()
for subset_indices in indices_subsets:
# Get the subset attributes and their values
attributeNames_subset = attribute_names[subset_indices]
attributeValues_subset = attribute_values[subset_indices]
# PART A: (pre-computed) Mobius representation of the subset attributes
subset_mobiusRepresentation = preComputed_subsetMobiusValues[tuple(attributeNames_subset)]
# PART B: Minimum value of the subset
min_value = np.min(attributeValues_subset)
# PART C: A * B
subset_value = subset_mobiusRepresentation * min_value
# Subset value: PART A * B
subset_values[tuple(attributeNames_subset)] = subset_value
if verbose:
print("\n\nINDICES:", subset_indices)
print(f"[SUBSET] Attributes names ({len(attributeNames_subset)}):", attributeNames_subset)
print(f"[SUBSET] Attributes values ({len(attributeValues_subset)}):", attributeValues_subset)
print("[A] Mobius:", subset_mobiusRepresentation)
print("[B] Min value:", min_value)
# Retrieve the most important attributes
subset_scores = np.array(list(subset_values.values()))
idx_selectedSubsets = np.argwhere(subset_scores >= 0.1).flatten() # ALT:np.flatnonzero
if len(idx_selectedSubsets) > 0:
relevantAttributes = np.array(list(subset_values.keys()), dtype='object')[idx_selectedSubsets]
relevantAttributes, indices = np.unique(list(np.concatenate(relevantAttributes).flat), return_index = True)
relevantAttributes = relevantAttributes[np.argsort(indices)]
else:
relevantAttributes = []
# Compute the choquet integral (i.e., sum all the subset values)
choquet_integral = np.sum(subset_scores)
choquet_integral = np.round(choquet_integral, 4)
if verbose:
print("CHOQUET INTEGRAL:", choquet_integral)
return (choquet_integral, relevantAttributes)
def buildUpExplainationString(relevantAttributes, collectionName):
explainationString = f"Ti potrebbero interessare prodotti"
check_attributes = lambda attributes: all(np.isin(attributes, relevantAttributes))
if check_attributes(['y_relativeFreq']):
explainationString += " acquistati frequentemente"
explainationString += f" provenienti dall'area {collectionName.upper()}"
if check_attributes(['x_userNovelty', 'x_userCategories', 'x_userBrands']):
explainationString += ', in linea con il tuo profilo'
if check_attributes(['y_sameWarehouse', 'y_sameConservationMethod']):
explainationString += " ed aggregati per ridurre l'impatto ambientale"
return explainationString