| Current Path : /proc/thread-self/root/home/deltalab/PMS/recommendations/user_profiling/_library/ |
| Current File : //proc/thread-self/root/home/deltalab/PMS/recommendations/user_profiling/_library/data_loader.py |
from tabulate import tabulate
import numpy as np
from _library.mongodb_utils import enhanced_products_df
from components.DbService import DbService
def load_indacoProducts(db_type,visualize_aggregated_territories = True):
# -------- Connecting to the database ---------------
db = DbService(db_type)
# Get the categories
categories = db.get_productTypes()
# Get the sellers
sellers = db.get_sellers()
# Get the warehouse
warehouses = db.get_warehouses()
# Get orders
orders = db.get_orders()
# Get the products
indacoProducts_df = db.get_products(consider_delatedProducts = False)
# --------------------------------------------
# Enhance product df
if(db_type == "mongodb"):
indacoProducts_df = enhanced_products_df(indacoProducts_df, categories, sellers, warehouses, orders)
else:
indacoProducts_df = db.enhanced_products_df(indacoProducts_df, categories, sellers, warehouses, orders)
# Visualize production_areas
if visualize_aggregated_territories:
aggregatedProductionAreas = indacoProducts_df[indacoProducts_df['production_areas'] != ''].copy()
aggregatedProductionAreas.columns = list(map(str.upper,aggregatedProductionAreas.columns))
aggregatedProductionAreas = aggregatedProductionAreas.groupby(
by = ['PRODUCTION_AREAS','VENDOR','PRODUCT TYPE'],
as_index = False).count()
aggregatedProductionAreas = aggregatedProductionAreas[['PRODUCTION_AREAS','VENDOR','PRODUCT TYPE', 'FREQUENCY']]
aggregatedProductionAreas.rename(columns = {'FREQUENCY': 'ITEMS'},inplace = True)
aggregatedProductionAreas = aggregatedProductionAreas.sort_values(
by = ['PRODUCTION_AREAS','VENDOR','PRODUCT TYPE'],
ascending = True)
print("\n" + 70 * "-")
print("-" * 20, f'Aggregated production areas', "-" * 21)
print(70 * "-", "\n")
print(61 * "-")
print(61 * "-")
print(tabulate(aggregatedProductionAreas, headers = aggregatedProductionAreas.columns, tablefmt = 'pretty'))
print(61 * "-")
print(61 * "-", "\n")
total_items = aggregatedProductionAreas['ITEMS'].sum()
print('\t\t' + 30 * "-")
print('\t\t' + 30 * "-")
print(f"\t\tTOTAL ITEMS: {total_items}/{len(indacoProducts_df)} "\
f"({np.round((total_items/len(indacoProducts_df))*100, 2)} %)")
print('\t\t' + 30 * "-")
print('\t\t' + 30 * "-", "\n")
withoutArea = indacoProducts_df.loc[indacoProducts_df['production_areas'] == '', 'Title'].unique()
print(f"ITEMS WITHOUT A PRODUCTION AREA ({len(withoutArea)}):", "\n" + "-" * 50)
print('\n'.join(withoutArea))
# Add the category for each product in the orders
findProdType = lambda df, indaco_sku: df.loc[df['indaco_sku'].str.upper() == indaco_sku.upper(), 'Product Type']
orders.insert(loc = 2, column = 'Product Type', value = orders['sku'].apply(
lambda sku: findProdType(indacoProducts_df, sku).unique()))
# Dealing with ordered products that do not exists any more
orders['Product Type'] = orders['Product Type'].apply(lambda category: category[0] if len(category) != 0 else np.nan)
orders = orders.dropna(subset = ['Product Type'])
return db,indacoProducts_df, orders, categories