| Current Path : /home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/_library/ |
| Current File : //home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/_library/toolkit.py |
from tabulate import tabulate
import numpy as np
def visualize_orders(orders_df, last_k_orders = -1):
print("\n" + 100 * "-")
print("-" * 41, f"Transaction ({len(orders_df['Transaction id'].unique())})", "-" * 41)
print(100 * "-", "\n")
if last_k_orders != -1:
transaction_ids = sorted(orders_df['Transaction id'].unique(), reverse = True)
orders_df = orders_df[orders_df['Transaction id'].isin(transaction_ids[:last_k_orders])]
print("\n" + 150 * "-")
print("-" * 67, f"LAST {last_k_orders} orders", "-" * 68)
print(150 * "-")
# Minor changes
orders_df = orders_df.drop(columns = ['indaco_sku'])
orders_df = orders_df.sort_values(by = ['Transaction id', 'Product Type' ,'product_name', 'quantity'],
ascending = [True, True, True, False]).reset_index(drop = True)
print(tabulate(orders_df, headers = orders_df.columns, tablefmt = "pretty"))
def visualize_products(indaco_products, num_products = -1):
# Visualize
cols_to_visualize = ['Title', 'SKU', 'Product Type', 'Vendor', 'Seller', 'production_areas',
'inTrentino_source', 'Frequency']
print("\n" + 120 * "-")
print("-" * 49, f"INDACO products ({len(indaco_products)})", "-" * 49)
print(120 * "-", "\n")
df_to_visulize = indaco_products[cols_to_visualize].sort_values(by = ['Title', "Product Type"]).reset_index(drop = True)
if num_products != -1:
print("-" * 15, "\n" + "-" * 15)
print(f"TOP {num_products} items")
print("-" * 15, "\n" + "-" * 15, "\n")
df_to_visulize = df_to_visulize.sort_values(by = ['Frequency'], ascending = False)[:num_products]
print(tabulate(df_to_visulize, headers = cols_to_visualize, tablefmt = "pretty"))
print("\n")
def visualize_collection(df, collection_type):
if collection_type not in df.columns:
print(f'\n"{collection_type}" not found in the columns!')
print('-->', ' | '.join(df.columns), "\n")
return
# Filter the items
aggregatedCollectionItems = df[df[collection_type] != '']
# Minor visual change
aggregatedCollectionItems.columns = list(map(str.upper,aggregatedCollectionItems.columns))
# Aggregate the items
aggregatedCollectionItems = aggregatedCollectionItems.groupby(
by = [collection_type.upper(), 'VENDOR','PRODUCT TYPE'],
as_index = False).count()
# Minor visual changes
cols_to_visualize = [collection_type.upper(),'VENDOR','PRODUCT TYPE', 'FREQUENCY']
aggregatedCollectionItems = aggregatedCollectionItems[cols_to_visualize]
aggregatedCollectionItems.rename(columns = {'FREQUENCY': 'ITEMS'}, inplace = True)
aggregatedCollectionItems = aggregatedCollectionItems.sort_values(
by = [collection_type.upper(), 'VENDOR','PRODUCT TYPE'],
ascending = True)
print("\n" + 70 * "-")
print("-" * 20, f'Aggregated {collection_type.lower()}', "-" * 21)
print(70 * "-", "\n")
print(61 * "-")
print(61 * "-")
print(tabulate(aggregatedCollectionItems,
headers = aggregatedCollectionItems.columns,
tablefmt = 'pretty'))
print(61 * "-")
print(61 * "-", "\n")
total_items = aggregatedCollectionItems['ITEMS'].sum()
print('\t\t' + 30 * "-")
print('\t\t' + 30 * "-")
print(f"\t\tTOTAL ITEMS: {total_items}/{len(df)} "\
f"({np.round((total_items/len(df))*100, 2)} %)")
print('\t\t' + 30 * "-")
print('\t\t' + 30 * "-", "\n")