Your IP : 216.73.217.13


Current Path : /home/deltalab/PMS/recommendations/user_profiling/_library/
Upload File :
Current File : //home/deltalab/PMS/recommendations/user_profiling/_library/choquetIntegral_utils.py

import numpy as np
from json import dumps,load
from itertools import combinations
import os

PRECOMPUTED_SUBSET_MOBIUSVALUES = None
CAPACITY_DICT = None
CAPACITY_DICT_NAME = None

def generate_capacityDict(all_attributes,type_recommendation,user_attributes = None):
    
    # Load information on how to build the capacity dictionary from the .json file
    with open(os.path.join(os.path.dirname(__file__),"capacity_dictionaries.json")) as f:
        dictionaries = load(f)
    dictionary_info = dictionaries[type_recommendation]
    
    #check if attributes serialized in json are equal to all attributes given as parameter
    if(set(all_attributes) != set(dictionary_info['all_attributes'])):
        raise Exception("Attributes given as parameter don't match with attributes in dictionary")
    
    # 1A) Generate the single capacities (1/n)
    uniform_singleCapacity = np.round(1 / len(all_attributes), 2)
    capacityDict = dict.fromkeys(all_attributes, uniform_singleCapacity)
    
    # Remove unnecessary attributes (i.e., useless attributes when taken alone)
    unnecessary_attributes = dictionary_info['unnecessary_attributes']
    for att in unnecessary_attributes:
        capacityDict[att] = 0
    
    # 1.B) Reduce the capacity for some selected attributes (i.e., less important attributes)
    if('lessImportant_attributes' in dictionary_info):
        lessImportant_attributes = dictionary_info['lessImportant_attributes']
    
        for reductionFactor in lessImportant_attributes:
            capacityDict['|'.join(lessImportant_attributes[reductionFactor])] *= float(reductionFactor)

    coalitions = dictionary_info['coalitions']
    # Generate the coalitions
    for coalition_info in coalitions.values():
        conditions = sorted(coalition_info['attributes'])
        capacityDict['|'.join(conditions)] =  coalition_info['capacity'] 
    
    # Sort the dictionary of capacities
    capacityDict = dict(sorted(capacityDict.items(), key = lambda dict_item: (dict_item[1], len(dict_item[0])), reverse = True))
    
    print('-' * 50)
    print(f"CAPACITIY VALUES ({len(capacityDict.keys())} with {len(coalitions.keys())} coalitions):")
    print('-' * 50)
    print(dumps(capacityDict, indent = 4))
    print('-' * 50)

    return capacityDict

def compute_subsetCapacity(attribute_subset, capacityDict, verbose):
    
    # Retrieve the single capacities
    partial_capacities = dict()
    for cond in attribute_subset:
        if cond in capacityDict.keys():
            single_capacity = capacityDict[cond] 
            partial_capacities[cond] = single_capacity
    
    # Retrieve the coalitions
    coalitions = [conditions.split('|') for conditions in capacityDict.keys() if len(conditions.split('|')) > 1]
    coalitions = sorted(coalitions, key = len, reverse = True)
    
    # Check the presence of coalitions
    for coalitionAttributes in coalitions:
        
        # Find the common items between the coalition and the considered subset of conditions
        commonAtt = np.intersect1d(coalitionAttributes, list(partial_capacities.keys()))
        
        # If all the coalition items matched
        if len(commonAtt) == len(coalitionAttributes):
            
            # Retrieve the capacity of the coalition
            coalitionDictNames = '|'.join(coalitionAttributes)
            coalition_capacity = capacityDict[coalitionDictNames]
            
            # Save the coalition capacity
            partial_capacities[coalitionDictNames] = coalition_capacity
            
            # Remove the capacities of the single items
            for considered_item in coalitionAttributes:
                partial_capacities.pop(considered_item)
                
    # Visualize capacities of the subset
    partial_capacities = dict(sorted(partial_capacities.items(), key = lambda dictItem: dictItem[1], reverse = True))
    
    if verbose:
        print(f"SUBSET CAPACITIES ({len(attribute_subset)}):", dumps(partial_capacities, indent = 4))

    # Sum all the capacities
    subset_capacity = np.sum(list(partial_capacities.values()))
    return subset_capacity

def generate_mobiusRepresentation(subset_attributeNames, capacityDict, verbose):
    
    if verbose:
        print(f"ATTRIBUTES ({len(subset_attributeNames)})", subset_attributeNames)
        print("\nGenerate subsets\n" + "-" * 40)
    
    # 0) Generate the subsets
    mobius_subsets = []
    for dim in range(len(subset_attributeNames), 0, -1):
        singleDim_combinations = list(combinations(subset_attributeNames, r = dim))
        mobius_subsets.extend(singleDim_combinations)

        if verbose: 
            print(f"SUBSET DIMENSION: {dim} --> COMBINATIONS: {len(singleDim_combinations)}")
            
    if verbose:
        print("-" * 40 + f"\n\t\tTOTAL: {len(mobius_subsets)}\n")

    # 1) Compute the mobius value
    mobius_values = list()
    for idk, subsetNames in enumerate(mobius_subsets):
        
        # 1.A) |S \ B| --> cardinality (set formed by the elements of S that are not in B)
        diff_subset_cardinality = np.setdiff1d(subset_attributeNames, subsetNames).shape[0]

        # 1.B) Subset capacity
        subset_capacity = compute_subsetCapacity(subsetNames, capacityDict, verbose = False)
        
        # 1.C) Compute the mabius representation
        subset_mobiusValue = np.power(-1, diff_subset_cardinality) * subset_capacity   
        mobius_values.append(subset_mobiusValue)
            
        if verbose:     
            print(f"\nSUBSET (id:{idk}, dim = {len(subsetNames)}):", " | ".join(subsetNames))
            print("--> Diff subset cardinality:", diff_subset_cardinality)
            print("--> [SUBSET] Capacity measure:", subset_capacity)
            print("--> [SUBSET] Mabius score", subset_mobiusValue)
    
    # Sum all the subsets
    mobius_value = np.sum(mobius_values)
    
    if verbose:         
        print(f"\nMobius representation --> {mobius_value}")
        print("Standard capacity: ", compute_subsetCapacity(subset_attributeNames, capacityDict, verbose = False))
        print("\nMIN:", np.min(mobius_values), "MAX:", np.max(mobius_values))
        print(f"MOBIUS VALUES ({len(mobius_values)}):", mobius_values, "\n")
    
    return mobius_value

def preComputed_mobiusRepresentation(allAttributes, capacityDict, verbose = True):
    
    # Generate all the subsets
    allMobius_subsets = []
    for dim in range(len(allAttributes), 0, -1):
        singleDim_combinations = list(combinations(allAttributes, r = dim))
        allMobius_subsets.extend(singleDim_combinations)
        
    preComputed_mobiusValues = dict()
    for subset_attributeNames in allMobius_subsets:
        preComputed_mobiusValues[tuple(subset_attributeNames)] = generate_mobiusRepresentation(subset_attributeNames, capacityDict, verbose = False)

    return preComputed_mobiusValues

def weightedSum(attributes, capacityDict, verbose):    
    values = attributes.values
    attributeCapacities = [capacityDict[attribute] for attribute in attributes.index]
    
    weightedSum = np.inner(values, attributeCapacities)
    
    if verbose:
        print("VALUES:", values)
        print("ATTRIBUTE CAPACITIES:", attributeCapacities)
        print("--> Weighted sum:", weightedSum)
        
    return weightedSum
    
def choquet_integral(attributes, capacityDict, verbose):
    
    # 0) Sort the condition values (X)
    attributes = attributes.sort_values(ascending = True)
    
    if verbose:
        print('-' * 20, f"ALL ATTRIBUTES ({len(attributes)}):", '-' * 20)
        print(dumps(attributes.to_dict(), indent=2), "\n")
    
    # 1) [PART 1] Compute the differences --> x(i) - x(i - 1)
    diff = np.diff(attributes.values, axis = 0, prepend = [0])    
    
    # 2) [PART 2] Compute the partial capacities
    partial_capacities = list()
    for i in range(len(attributes)):
        print("\n" + '-' * 40 + f"\nCompute SUBSET CAPACITY [{i + 1}:{len(attributes)}]\n" + '-' * 40)
        
        # Select a subset of conditions [i:n]
        cond_subset = attributes[i:].index

        # Compute the capacity of the subset
        partialCapacity = compute_subsetCapacity(cond_subset, capacityDict, verbose = False)
        partial_capacities.append(partialCapacity)
        
        print("\n\t" + '-' * 35 + f"\n\t   Partial capacity [{i + 1}:{len(attributes)}]: {np.round(partialCapacity, 2)}\n\t" +  '-' * 35)
    
    # 3) Compute the choquet integral --> inner product (sum(a[:] * b[:]))
    choquet_integral = np.inner(diff, partial_capacities)
    choquet_integral = np.round(choquet_integral, 4)
    
    # Visualize the outputs
    if verbose:
        print("\n" + '-' * 20, f"OUTPUT", '-' * 20)
        print("(Choquet Integral - PART A) DIFF):", np.round(diff, 4))
        print("(Choquet Integral - PART B) PARTIAL CAPACITIES:", np.round(partial_capacities, 4))
        print("--> CHOQUET INTEGRAL (inner product)", np.round(choquet_integral, 4))
        print('-' * 60, "\n")

    return choquet_integral

def choquet_integral_mobius(attributes, preComputed_subsetMobiusValues, verbose):
    
    # Split the attribute series into names (list of str) and values (list of float)
    attribute_names = np.array(attributes.index)
    attribute_values = attributes.values
    
    # Generate the subsets
    indices_subsets = []
    for dim in range(len(attributes), 0, -1):
        all_indices = range(len(attributes))
        singleDim_combinations = np.array(list(combinations(all_indices, r = dim)))
        indices_subsets.extend(singleDim_combinations)

    # For each subset compute a value
    subset_values = dict()
    for subset_indices in indices_subsets:    
        
        # Get the subset attributes and their values
        attributeNames_subset = attribute_names[subset_indices]
        attributeValues_subset = attribute_values[subset_indices]
        
        # PART A: (pre-computed) Mobius representation of the subset attributes
        subset_mobiusRepresentation = preComputed_subsetMobiusValues[tuple(attributeNames_subset)]
        
        # PART B: Minimum value of the subset 
        min_value = np.min(attributeValues_subset)
        
        # PART C: A * B
        subset_value = subset_mobiusRepresentation * min_value
        
        # Subset value: PART A * B
        subset_values[tuple(attributeNames_subset)] = subset_value
        
        if verbose: 
            print("\n\nINDICES:", subset_indices)
            print(f"[SUBSET] Attributes names ({len(attributeNames_subset)}):", attributeNames_subset)
            print(f"[SUBSET] Attributes values ({len(attributeValues_subset)}):", attributeValues_subset)
            print("[A] Mobius:", subset_mobiusRepresentation)
            print("[B] Min value:", min_value)
    
    # Retrieve the most important attributes
    subset_scores = np.array(list(subset_values.values()))
    idx_selectedSubsets = np.argwhere(subset_scores >= 0.1).flatten() # ALT:np.flatnonzero

    if len(idx_selectedSubsets) > 0:
        relevantAttributes = np.array(list(subset_values.keys()),  dtype='object')[idx_selectedSubsets]
        relevantAttributes, indices = np.unique(list(np.concatenate(relevantAttributes).flat), return_index = True)
        relevantAttributes = relevantAttributes[np.argsort(indices)]
        
    else:
        relevantAttributes = []
    
    # Compute the choquet integral (i.e., sum all the subset values)
    choquet_integral = np.sum(subset_scores) 
    choquet_integral = np.round(choquet_integral, 4)
    
    if verbose:
        print("CHOQUET INTEGRAL:", choquet_integral)
    
    return (choquet_integral, relevantAttributes)

def buildUpExplainationString(relevantAttributes, collectionName):
    explainationString = f"Ti potrebbero interessare prodotti"
    
    check_attributes = lambda attributes: all(np.isin(attributes, relevantAttributes))

    if check_attributes(['y_relativeFreq']):
        explainationString += " acquistati frequentemente"
    
    explainationString += f" provenienti dall'area {collectionName.upper()}"
        
    if check_attributes(['x_userNovelty', 'x_userCategories', 'x_userBrands']):
        explainationString += ', in linea con il tuo profilo'
    
    if check_attributes(['y_sameWarehouse', 'y_sameConservationMethod']):
        explainationString += " ed aggregati per ridurre l'impatto ambientale"
    
    return explainationString