Your IP : 216.73.217.13


Current Path : /home/deltalab/PMS/recommendations/recommender-system-batch/_library/
Upload File :
Current File : //home/deltalab/PMS/recommendations/recommender-system-batch/_library/toolkit.py

import numpy as np
import pandas as pd

# sys.path.append(join(abspath(__file__), '..', '..'))

# ---------------------------
# STRUCTURE: {
    # 'vendor': '', 
    # 'item_name': '', 
    # 'item_type': '', 
    # 'type_id': , 
    # 'Frequency': , 
    # 'inTrentino_source': }
# ----------------------------
def extract_referenceProduct(product, product_identifier):

    # Normalize the title
    product['Title'] = product['Title'].capitalize()

    if product_identifier in product.index:
        product['item_name'] = product[product_identifier]
        #if product_identifier == 'Product Type':
        #    product.drop(index = ['SKU', 'Title', 'Frequency'] , inplace = True)
            
        #else:
        product.drop(index = product_identifier, inplace = True)   
    else:
        print(f"What am I supposed to to? --> product_identifier: {product_identifier}")
        
    # Rename the columns
    product = product.rename(index = {'Product Type': 'item_type', 'Title': 'product_name'})
    product.index = [col.replace(' ','_')  for col in product.index]
    product.index = [col.lower() if col != 'inTrentino_source' else col 
                     for col in product.index]
    
    # Turn the item into a dictionary 
    reference_items = product.to_dict() 
    
    return reference_items

def buildUp_explaination(linked_collection_types, recommendation, evidence_coPurchases = True):
    
    # Base string
    explaination_string = 'Prodotti che'
    if evidence_coPurchases:
        explaination_string += ' sono tipicamente acquistati insieme'

    for collection_type, type_explaination in linked_collection_types.items():
        
        actual_key = [key for key in recommendation.keys() if key in collection_type]
        
        if len(actual_key) > 0:
            actual_key = actual_key[0]
        else:
            continue

        linked_collections = recommendation[actual_key]

        if(pd.isnull(recommendation[actual_key])):
            continue

        # Generate a string that includes the collection references
        if len(linked_collections) > 0:
            explaination_string += type_explaination
            
            if isinstance(linked_collections, list):
                collection_names = ', '.join([collection.upper() for collection in linked_collections])
            else:
                collection_names = linked_collections
            explaination_string += ' ' + collection_names
                    
    return explaination_string
                
def add_recommendationSource(recommendations, source_name):
    if len(recommendations) == 0:
        return recommendations
    
    for recomendation in recommendations:
        recomendation['rs_source'] = source_name
    
    return recommendations

def jaccard_similarity(stringA, stringB, verbose = False):
    
    words_A = np.array(stringA.split(' '))
    words_B = np.array(stringB.split(' '))
    
    #Find intersection of two sets
    intersection = np.intersect1d(words_A, words_B)
    
    #Find union of two sets
    union = np.union1d(words_A, words_B)

    #Take the ratio of sizes
    similarity = len(intersection)/len(union)
    
    if verbose:
        print("\nString A:", words_A)
        print("String B:", words_B)
        print("Intersection:", intersection)
        print("Union", union)
        print("Jaccard similarity", similarity)
    
    return similarity

def extract_baskets_of_orders(orders_df, product_identifier):
    
    # Pre-processing
    products_df = orders_df.dropna(subset = [product_identifier])
    products_df = products_df[[product_identifier, 'Transaction id']]
    products_df[product_identifier] = products_df[product_identifier].str.upper()
    
    # Group the single ordered items according to the transaction
    grouped_products_df = products_df.groupby(by = 'Transaction id', as_index = False).agg(lambda x: set(x))

    # Group and count the unique baskets of orders
    baskets_df = grouped_products_df[product_identifier].map('|'.join).value_counts()
    baskets_df = baskets_df.rename('Frequency').to_frame()
    baskets_df = baskets_df.reset_index(drop = False).rename(columns = {'index': 'Items'})
    baskets_df['Items'] = baskets_df['Items'].str.split('|')
    baskets_df['Basket dimension'] = baskets_df['Items'].apply(len)
    baskets_df.sort_values(by = ['Frequency', 'Basket dimension'], inplace = True, ascending = False)
    baskets_df = baskets_df.reset_index(drop = True)
    
    return baskets_df

def generate_fake_products(products_df):
    
    # Generate fake products    
    products_names = ['Pomodori marziani', 'Insalata marziana']
    fake_products = []
    for idk, product_name in enumerate(products_names):
        fake_product = {}
        
        if 'Vendor' in products_df.columns:
            fake_product['Vendor'] = 'Mars S.p.A.'
        if 'SKU' in products_df.columns:
            fake_product['SKU'] = f'Mrsverd-0{idk +1}'
        if 'Title' in products_df.columns:
            fake_product['Title'] = product_name
        if 'Product Type' in products_df.columns:
            fake_product['Product Type'] = 'Frutta e verdura'
        if 'Type id' in products_df.columns:
            fake_product['Type id'] = 430
        if 'inTrentino_source' in products_df.columns:
            fake_product['inTrentino_source'] = False
        if 'Frequency' in products_df.columns:
            fake_product['Frequency'] = np.random.default_rng().uniform(
                low = products_df['Frequency'].min(), 
                high = products_df['Frequency'].max())
        
        fake_products.append(fake_product)
        
    # Simulate new products
    products_names = ['Pomodori terrestri', 'Insalata terrestri']
    for idk, product_name in enumerate(products_names):
        fake_product = {}
        
        if 'Vendor' in products_df.columns:
            fake_product['Vendor'] = 'Mars S.p.A.'
        if 'SKU' in products_df.columns:
            fake_product['SKU'] = f'Mrsearth-0{idk +1}'
        if 'Title' in products_df.columns:
            fake_product['Title'] = product_name
        if 'Product Type' in products_df.columns:
            fake_product['Product Type'] = 'Prodotti terrestri'
        if 'Type id' in products_df.columns:
            fake_product['Type id'] = 0
        if 'inTrentino_source' in products_df.columns:
            fake_product['inTrentino_source'] = True
        if 'Frequency' in products_df.columns:
            fake_product['Frequency'] = 0
        
        fake_products.append(fake_product)

    # Append the new products to the original set
    fake_products_df = pd.DataFrame([pd.Series(fake_product) for fake_product in fake_products])
    products_df = pd.concat([products_df, fake_products_df], axis = 0, ignore_index = True)
    
    return products_df

def preProcessing_collectionColumns(orders_df, cols_to_split = ['Linked regions', 'Linked experiences', 'Linked recipes'],
                                    delimiter = '|'):
    
    actual_colsToSplit = [col for col in cols_to_split if col in list(orders_df.columns)]
    for col_name in actual_colsToSplit:
    
        # Split the strings into items
        orders_df[col_name] = orders_df[col_name].str.split(delimiter)
        
        # Fill NaN values with empty list
        orders_df[col_name] = orders_df[col_name].apply(
            lambda item: list() if isinstance(item, float) else item)
        
        # Remove potential whitespaces
        orders_df[col_name] = orders_df[col_name].apply(
            lambda list: [item.strip() for item in list])
        
        # Normalize item names
        orders_df[col_name] = orders_df[col_name].apply(
            lambda items: [item.capitalize() for item in items])
    return orders_df