Your IP : 216.73.216.158


Current Path : /proc/thread-self/root/home/deltalab/PMS/recommendations/user_profiling/_library/
Upload File :
Current File : //proc/thread-self/root/home/deltalab/PMS/recommendations/user_profiling/_library/data_loader.py

from tabulate import tabulate
import numpy as np

from _library.mongodb_utils import enhanced_products_df
from components.DbService import DbService

def load_indacoProducts(db_type,visualize_aggregated_territories = True):
    # -------- Connecting to the database ---------------
    db = DbService(db_type)
    # Get the categories
    categories = db.get_productTypes()

    # Get the sellers
    sellers = db.get_sellers()

    # Get the warehouse
    warehouses = db.get_warehouses()

    # Get orders 
    orders = db.get_orders() 
    # Get the products
    indacoProducts_df = db.get_products(consider_delatedProducts = False)
    # --------------------------------------------

    # Enhance product df
    if(db_type == "mongodb"):
        indacoProducts_df = enhanced_products_df(indacoProducts_df, categories, sellers, warehouses, orders)
    else:
        indacoProducts_df = db.enhanced_products_df(indacoProducts_df, categories, sellers, warehouses, orders)
    # Visualize production_areas
    if visualize_aggregated_territories:
        aggregatedProductionAreas = indacoProducts_df[indacoProducts_df['production_areas'] != ''].copy()
        aggregatedProductionAreas.columns = list(map(str.upper,aggregatedProductionAreas.columns))
        aggregatedProductionAreas = aggregatedProductionAreas.groupby(
            by = ['PRODUCTION_AREAS','VENDOR','PRODUCT TYPE'], 
            as_index = False).count()
        aggregatedProductionAreas = aggregatedProductionAreas[['PRODUCTION_AREAS','VENDOR','PRODUCT TYPE', 'FREQUENCY']]
        aggregatedProductionAreas.rename(columns = {'FREQUENCY': 'ITEMS'},inplace = True)
        aggregatedProductionAreas = aggregatedProductionAreas.sort_values(
            by = ['PRODUCTION_AREAS','VENDOR','PRODUCT TYPE'],
            ascending = True) 
        
        print("\n" + 70 * "-")
        print("-" * 20, f'Aggregated production areas', "-" * 21)
        print(70 * "-", "\n")
        
        print(61 * "-")
        print(61 * "-")
        print(tabulate(aggregatedProductionAreas, headers = aggregatedProductionAreas.columns, tablefmt = 'pretty'))
        print(61 * "-")
        print(61 * "-", "\n")
        
        total_items = aggregatedProductionAreas['ITEMS'].sum()
        print('\t\t' + 30 * "-")
        print('\t\t' + 30 * "-")
        print(f"\t\tTOTAL ITEMS: {total_items}/{len(indacoProducts_df)} "\
            f"({np.round((total_items/len(indacoProducts_df))*100, 2)} %)")
        print('\t\t' + 30 * "-")
        print('\t\t' + 30 * "-", "\n")
        
        withoutArea = indacoProducts_df.loc[indacoProducts_df['production_areas'] == '', 'Title'].unique()
        print(f"ITEMS WITHOUT A PRODUCTION AREA ({len(withoutArea)}):", "\n" + "-" * 50)
        print('\n'.join(withoutArea))
    
    # Add the category for each product in the orders
    findProdType =  lambda df, indaco_sku: df.loc[df['indaco_sku'].str.upper() == indaco_sku.upper(), 'Product Type']
    orders.insert(loc = 2, column = 'Product Type', value = orders['sku'].apply(
        lambda sku: findProdType(indacoProducts_df, sku).unique()))
    
    # Dealing with ordered products that do not exists any more
    orders['Product Type'] = orders['Product Type'].apply(lambda category: category[0] if len(category) != 0 else np.nan)
    orders = orders.dropna(subset = ['Product Type'])
    return db,indacoProducts_df, orders, categories