Your IP : 216.73.217.13


Current Path : /home/deltalab/PMS/recommendations/recommender-system-batch/_library/recom_utils/
Upload File :
Current File : //home/deltalab/PMS/recommendations/recommender-system-batch/_library/recom_utils/assRules_utils.py

import numpy as np

from _library.toolkit import buildUp_explaination


def preProcessing_coPurchases(association_rules_df, name_mapping = None):
    delimiter = '|'
    
    if name_mapping:
        name_mapping = {colName: colName.replace(originalcolName, newColName) 
                        for originalcolName, newColName in name_mapping.items() 
                        for colName in association_rules_df.columns 
                        if originalcolName.lower() in colName.lower()}
        
        if len(name_mapping.keys()) > 0:
            association_rules_df.rename(columns = name_mapping, inplace = True)
    
    for col_name in association_rules_df.select_dtypes(exclude = np.number).columns:
        # Split the string into items
        association_rules_df[col_name] = association_rules_df[col_name].str.split(delimiter)
        
        # Remove potential whitespaces
        remove_whiteSpace =  lambda item_list: [item.strip() for item in item_list] if isinstance(item_list, list) else item_list
        association_rules_df[col_name] = association_rules_df[col_name].apply(remove_whiteSpace)
        
        # Fill NaN values with empty list
        fill_nan = lambda item_list: list() if isinstance(item_list, float) else item_list
        association_rules_df[col_name] = association_rules_df[col_name].apply(fill_nan)
        
    return association_rules_df

def find_recommendations(association_rules, products_df, product_identifier, reference_items, 
                         excluded_products = [], excluded_link_types = [],
                         force_perfect_match = False, drop_scores = True, filter_source_platform = True, verbose = True):

    # Normalize list of items by capitalizing all items
    normalize_string = lambda item_list: list(map(str.upper, item_list))
    reference_item_names = normalize_string([item['item_name'] for item in reference_items]) 
    
    if len(association_rules) == 0:
        print(f"[{', '.join(reference_item_names)}] ERROR! No association rules have been found!")
        
        return list()
    association_rules['antecedents'] = association_rules['antecedents'].map(normalize_string)
    
    # PART A: Filter the association rules concerning this reference items
    intersect_cond = lambda item_list: np.intersect1d(item_list, reference_item_names)
    check_intersect_cond = lambda item_list: True if len(intersect_cond(item_list)) >= len(reference_item_names) else False
    
    # Find the minimum number of matching items
    test_cond = association_rules['antecedents'].apply(check_intersect_cond)
    min_matching_items = association_rules.loc[test_cond, 'antecedents'].apply(len).min()
    smallest_matching_basket_cond = lambda item_list: len(item_list) == min_matching_items
    
    # Define the final filtering condition
    match_cond = lambda item_list: check_intersect_cond(item_list) and smallest_matching_basket_cond(item_list)
    
    # Force the matching baskets to include only the refererence items
    if force_perfect_match:
        same_item_cond = lambda item_list: np.array_equal(sorted(item_list), sorted(reference_item_names))
        match_cond = lambda item_list: check_intersect_cond(item_list) \
            and smallest_matching_basket_cond(item_list) \
                and same_item_cond(item_list)
    
    # Filtering the association rules    
    filtering_cond = association_rules['antecedents'].apply(match_cond)
    basic_item_ass_rules_df = association_rules[filtering_cond]

    # Generate rows for each consequent item (A --> B & C) --> (A --> B | A --> C)
    item_ass_rules_df = basic_item_ass_rules_df.explode('consequents').reset_index(drop = True)
    item_ass_rules_df = item_ass_rules_df.drop_duplicates(subset = 'consequents')
    
    # Transform the dataset (i.e., rename)
    recommendations = item_ass_rules_df.rename(columns = {'consequents': 'item_name'})
    recommendations['item_name'] = recommendations['item_name'].str.upper()
    
    # Remove excluded products
    cond = ~recommendations['item_name'].isin(excluded_products)
    recommendations = recommendations[cond]
    
    assRule_collectionNames = {assRule_col: assRule_col.split('-')[1].strip()
                               for assRule_col in recommendations.columns 
                               if 'common' in assRule_col.lower()}
            
    # Remove items of excluded link types
    if len(assRule_collectionNames.keys()) > 0:
        
        for colName in assRule_collectionNames.keys():
            recommendations[colName] = recommendations[colName].apply(
                lambda items: list(map(str.capitalize, items)))
        
        if len(excluded_link_types) > 0:
 
            excluded_col_link = [full_colName for excluded_col in excluded_link_types
                                for full_colName, collection_name in assRule_collectionNames.items() 
                                if excluded_col.lower() in collection_name.lower().split(' ')]
            remaining_links = np.setdiff1d(list(assRule_collectionNames.keys()), excluded_col_link)
            
            # Drop columns concerning excluded links
            recommendations = recommendations.drop(columns = excluded_col_link)
            
            # Keep observations having linked collection in the remaining linked collection columns 
            dim_linkedCollections = recommendations[remaining_links].applymap(len) > 0
            cond = dim_linkedCollections.any(axis = 1)
            recommendations = recommendations[cond]
            
            dropped_items = len(cond) - len(cond[cond == True])
            
            if dropped_items > 0:
                print(f"\nDropping {dropped_items} items concerning the excluded link "\
                    f"(i.e., {', '.join(excluded_col_link)})\n")
    
    # Retrieve further information about the recommended products 
    #linkedCollection_colNames = ['production_areas']
    #products_df = products_df.drop(columns = linkedCollection_colNames)
    products_df[product_identifier] = products_df[product_identifier].str.upper()
    recommendations = recommendations.merge(products_df, how = 'left', left_on = 'item_name', right_on = product_identifier)
    recommendations = recommendations.rename(
        columns = {
            'Title': 'product_name',
            'Product Type': 'item_type', 
            'Vendor': 'item_vendor', 
            'SKU': 'item_sku',
            'production_areas' : 'linked_production_area'}
        )
    # Select only information infer from one product in case the identifier used is the product type
    recommendations = recommendations.sort_values(by = 'Frequency', ascending = False)
    recommendations = recommendations.drop_duplicates(subset = 'item_name').reset_index(drop = True)
    
    # Drop unnecessary columns 
    if len(recommendations) > 0:
        cols_to_delete = ['Type id', 'Frequency', "indaco_sku", 'refrigerated', 'weight [grams]', 'warehouses']
        if product_identifier == 'Title':
            cols_to_delete.append('product_name')
        elif product_identifier == 'Product Type':
            cols_to_delete.extend(['item_sku', 'product_name', 'item_vendor', 'inTrentino_source', 'linked_production_area'])
        actual_cols_to_delate = [col for col in recommendations.columns if col in cols_to_delete]

        recommendations = recommendations.drop(columns = actual_cols_to_delate)

    if filter_source_platform and product_identifier != 'Product Type':
        reference_sourcePlatforms = np.unique([item['inTrentino_source'] for item in reference_items])
        
        # Filter recommended items if there is a unique source platform among the reference products
        if len(reference_sourcePlatforms) == 1:
            reference_sourcePlatform = reference_sourcePlatforms[0]
          
            original_recommendation = recommendations.copy()            
            cond = recommendations['inTrentino_source'] == reference_sourcePlatform
            recommendations = recommendations[cond]

            unknown_products = original_recommendation[original_recommendation['inTrentino_source'].isna()]
            if len(unknown_products) > 0:
                print(f"\n\t[INFO] {len(unknown_products)} products have not been found. So sad :/")
                print('\t-->', '\n\t--> '.join(unknown_products['item_name'].values))
                
            drop_items = original_recommendation.drop(index = recommendations.index)
            if len(drop_items) - len(unknown_products) > 0:
                print(f"\n\t[INFO] Dropped {len(original_recommendation) - len(recommendations)} "\
                    "associated items which come from the other platform.")
                print('\t-->', '\n\t--> '.join(drop_items['item_name'].values))
                    
    # Sort dataframe
    recommendations = recommendations.sort_values(by = ['confidence', 'lift', 'support'], ascending = False).reset_index(drop = True)
    
    # Create the metadata column 
    recommendations.insert(0, 'rank', recommendations.index + 1)

    # Explainations statements
    linked_collection_types = {
        'Common - production_areas'  : ' e sono tipici del territorio',
    }
    
    # 'linked_production_area':  ' e sono tipici del territorio'

    recommendations['explaination'] = recommendations.apply(
        func = lambda df_row: buildUp_explaination(linked_collection_types, df_row),
        axis = 1)
   
    unnecessary_columns  = [col_name for col_name in recommendations.columns 
                            if ('antecedent' in col_name.lower()) or ('consequent' in col_name.lower())]
    
    if drop_scores:
        unnecessary_columns.extend(['confidence', 'support', 'lift'])
    recommendations.drop(columns = unnecessary_columns, inplace = True)
    
    # Remove artefacts from the column names
    cleaned_columns = []
    for col_name in recommendations.columns:
        if col_name == 'inTrentino_source':
            new_colName = col_name
        elif 'common' in col_name.lower():
            if 'linked' in col_name.lower():
                new_colName = col_name.replace('Common - ', '').replace(' ', '_').lower()
            else:
                new_colName = col_name.replace('Common -', 'Linked').replace(' ', '_').lower() # 
        else:
            new_colName = col_name.lower()
        cleaned_columns.append(new_colName)
    recommendations.columns = cleaned_columns
    
    # Visualize outcome
    item_stringfy = [item['item_name'] + (' [by ' + item['vendor'].upper()+ ']' 
                                          if isinstance(item['vendor'], str) else ' ') 
                     for item in reference_items]
    
    if len(recommendations) > 0:
        print("\n" + "-" * 110)
        print(f"[Assrules] REFERENCE ITEMS ({len(reference_items)}):", ' | '.join(item_stringfy), "-->",
            len(recommendations), "association rules")
        
        if not force_perfect_match and len(basic_item_ass_rules_df) > 0:
            print(f"Min matching antecedents items: {min_matching_items} "\
                f"(e.g., {' | '.join(basic_item_ass_rules_df.iloc[0].loc['antecedents'])})")
        print("-" * 110)
    
        if verbose: 
            print(recommendations)
        else:
            print("-->", '\n--> '.join(recommendations.apply(
                func = lambda df_row: f"({int(df_row.name) + 1}) [{df_row['item_type']}] {df_row['item_name']} " + 
                    (f"--> inTrentino: {str(df_row['inTrentino_source']).upper()}" if 'inTrentino_source' in df_row.index else ''), 
                axis = 1)))
    
    # Return a dictionary
    recommendations = recommendations.to_dict(orient = 'records')
    
    # Remove unnecesary attributes
    for idk, recommendation in enumerate(recommendations.copy()):
        for key, attribute in recommendation.copy().items():
            if isinstance(attribute, list) and (len(attribute) == 0):
                recommendations[idk].pop(key) 
    
    return recommendations

def find_products_byCategory(all_products, item_type, unique_product_identifier, reference_vendors, 
                             reference_sourcePlatforms, collection_items = None):
    
    # Build up the conditions
    # Condition (1): Type-based condition
    typeBased_cond = all_products['Product Type'] == item_type
    filtering_method = typeBased_cond
    filtering_condition_names = ['type_based']
    
    # Condition (2): Vendor-based condition
    if reference_vendors != None:
        vendorBased_cond = all_products['Vendor'].isin(reference_vendors)
        filtering_method = typeBased_cond & vendorBased_cond
        filtering_condition_names.append('vendor_based')
    
    # Intermediate step (TEST: Cond 1 & cond 2)
    linked_items = all_products.loc[filtering_method, :]
    
    # In case the vendor has zero products for this category
    # --> Retrieve products also from other vendors
    if (len(linked_items) == 0) and (reference_vendors != None):
        filtering_condition_names.remove('vendor_based')
        filtering_method = typeBased_cond
        
    # Condition (3): Item-based condition
    if collection_items != None:
        itemBased_cond = all_products['Title'].isin(collection_items)
        filtering_method = filtering_method & itemBased_cond
        filtering_condition_names.append('collectionItem_based')
    
    # Condition (4): source platform condition
    if reference_sourcePlatforms != None:
        if len(reference_sourcePlatforms) == 1:
            reference_sourcePlatform = reference_sourcePlatforms[0]
            sourcePlatform_cond = all_products['inTrentino_source'] == reference_sourcePlatform
            filtering_method = filtering_method & sourcePlatform_cond
            filtering_condition_names.append('sourcePlatform_based')

    # Filter the products    
    linked_items = all_products.loc[filtering_method, :]
    
    # Sort the recommendations according to the historical data
    linked_items = linked_items.sort_values(by = 'Frequency', ascending = False)
            
    # Save the list of linked items
    linked_items = linked_items.drop_duplicates(subset = ['Title', 'SKU'])
    linked_items = linked_items.drop(columns = ['Type id', 'Frequency', 'Product Type'])
    linked_items = linked_items.rename(columns = {
        'Title': 'item_name', 
        'SKU': 'item_sku', 
        'Vendor': 'item_vendor',
        'production_areas': 'linked_production_area'})
    linked_items = linked_items.to_dict(orient = 'records')
    
    return linked_items, filtering_condition_names

def addItems_byCategory(recommendations, reference_items, all_products, unique_product_identifier,
                        filter_source_platform = False, single_item = False):
    
    reference_vendors = [item['vendor'] for item in reference_items]
    if filter_source_platform:
        reference_sourcePlatforms = np.unique([item['inTrentino_source'] for item in reference_items])
    else:
        reference_sourcePlatforms = None
    
    for recommendation in recommendations.copy():
        
        # Delate an artefact: The item name since the item is a category
        if 'item_name' in recommendation.keys():
            recommendation.pop('item_name')
        if 'product_name' in recommendation.keys():
             recommendation.pop('product_name')

        linked_items, filtering_methods = find_products_byCategory(all_products, recommendation['item_type'], 
                                                                   unique_product_identifier, reference_vendors, 
                                                                   reference_sourcePlatforms)
        if len(linked_items) == 0:
            recommendations.remove(recommendation)
            print(f"[INFO] Dropping '{recommendation['item_type']}' due to a lack of linked items.")
            continue
            
        if single_item:
            linked_item = linked_items[0]
            recommendation.update(linked_item)
        else:
            recommendation['category_items'] = linked_items

    print("\n[INFO] Filtering approach: Products have been filtered according to:", 
              ' | '.join(filtering_methods))
    
    if single_item:
        print('[INFO] The list of products have been shrinked to only one product')
    
    return recommendations