Your IP : 216.73.217.13


Current Path : /home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/_library/
Upload File :
Current File : //home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/_library/toolkit.py

from tabulate import tabulate

import numpy as np

def visualize_orders(orders_df, last_k_orders = -1):
    print("\n" + 100 * "-") 
    print("-" * 41, f"Transaction ({len(orders_df['Transaction id'].unique())})", "-" * 41)
    print(100 * "-", "\n")
    
    if last_k_orders != -1:
        transaction_ids = sorted(orders_df['Transaction id'].unique(), reverse = True)
        orders_df = orders_df[orders_df['Transaction id'].isin(transaction_ids[:last_k_orders])]

        print("\n" + 150 * "-") 
        print("-" * 67, f"LAST {last_k_orders} orders", "-" * 68)
        print(150 * "-")

    # Minor changes
    orders_df = orders_df.drop(columns = ['indaco_sku'])
    orders_df = orders_df.sort_values(by = ['Transaction id', 'Product Type' ,'product_name', 'quantity'], 
                                      ascending = [True, True, True, False]).reset_index(drop = True)
        
    print(tabulate(orders_df, headers = orders_df.columns, tablefmt = "pretty"))
    
def visualize_products(indaco_products, num_products = -1):

    # Visualize
    cols_to_visualize = ['Title',  'SKU', 'Product Type', 'Vendor', 'Seller', 'production_areas',
                         'inTrentino_source', 'Frequency'] 
                 
    print("\n" + 120 * "-") 
    print("-" * 49, f"INDACO products ({len(indaco_products)})", "-" * 49)
    print(120 * "-", "\n")
    
    df_to_visulize = indaco_products[cols_to_visualize].sort_values(by = ['Title', "Product Type"]).reset_index(drop = True)
    if num_products != -1:
        print("-" * 15, "\n" + "-" * 15)
        print(f"TOP {num_products} items") 
        print("-" * 15, "\n"  + "-" * 15, "\n")
        df_to_visulize = df_to_visulize.sort_values(by = ['Frequency'], ascending = False)[:num_products]
    
    print(tabulate(df_to_visulize, headers = cols_to_visualize, tablefmt = "pretty")) 
    print("\n")
    
def visualize_collection(df, collection_type):
    
    if collection_type not in df.columns:
        print(f'\n"{collection_type}" not found in the columns!')
        print('-->', ' | '.join(df.columns), "\n")
        return
    
    # Filter the items
    aggregatedCollectionItems = df[df[collection_type] != '']
    
    # Minor visual change
    aggregatedCollectionItems.columns = list(map(str.upper,aggregatedCollectionItems.columns))
    
    # Aggregate the items
    aggregatedCollectionItems = aggregatedCollectionItems.groupby(
        by = [collection_type.upper(), 'VENDOR','PRODUCT TYPE'], 
        as_index = False).count()
    
    # Minor visual changes
    cols_to_visualize = [collection_type.upper(),'VENDOR','PRODUCT TYPE', 'FREQUENCY']
    aggregatedCollectionItems = aggregatedCollectionItems[cols_to_visualize]
    
    aggregatedCollectionItems.rename(columns = {'FREQUENCY': 'ITEMS'}, inplace = True)
    
    aggregatedCollectionItems = aggregatedCollectionItems.sort_values(
        by = [collection_type.upper(), 'VENDOR','PRODUCT TYPE'],
        ascending = True) 
    
    print("\n" + 70 * "-")
    print("-" * 20, f'Aggregated {collection_type.lower()}', "-" * 21)
    print(70 * "-", "\n")
    
    print(61 * "-")
    print(61 * "-")
    print(tabulate(aggregatedCollectionItems, 
                   headers = aggregatedCollectionItems.columns,
                   tablefmt = 'pretty'))
    print(61 * "-")
    print(61 * "-", "\n")
    
    total_items = aggregatedCollectionItems['ITEMS'].sum()
    print('\t\t' + 30 * "-")
    print('\t\t' + 30 * "-")
    print(f"\t\tTOTAL ITEMS: {total_items}/{len(df)} "\
        f"({np.round((total_items/len(df))*100, 2)} %)")
    print('\t\t' + 30 * "-")
    print('\t\t' + 30 * "-", "\n")