| Current Path : /home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/ |
| Current File : //home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/app.py |
from _library.data_utils.data_loader import load_indacoOrders, load_collectionTypes
from _library import toolkit
from _library.data_utils.io_toolkit import read_settings
from components.mine_assRules import MineAssociationRules
from numpy import nan
import numpy as np
from os import path
from pandas import DataFrame
from sqlalchemy import create_engine,text
import pandas as pd
if __name__ == '__main__':
appSettings = read_settings()
# Load the orders
print("\n" + 120 * "-")
print("-" * 50, f"(0) Loading orders", "-" * 50)
print(120 * "-", "\n")
# Get the products, orders, sku mapping
indaco_products, indaco_orders, indaco_categories, sku_mapping = load_indacoOrders(
"etl",
consider_delatedProducts = True,
consider_unavailableProducts = True)
# Visualize orders
toolkit.visualize_orders(indaco_orders, last_k_orders = -1)
# Visualize products
toolkit.visualize_products(indaco_products, num_products = 20)
# Read the collections
collectionTypes, rawCollectionTypes = load_collectionTypes(filepath = path.join('_library', 'INDACO_collectionCodes.json'),
verbose = True)
collections = list(collectionTypes.keys())
# Attach the collection to the orders (and visualize collections)
for collection_type in collections:
products_view = indaco_products[['SKU', collection_type]]
indaco_orders = indaco_orders.merge(products_view, how = 'inner', on = 'SKU')
indaco_orders[collection_type] = indaco_orders[collection_type].replace('', nan)
toolkit.visualize_collection(indaco_products, collection_type)
# ------------------------------------------------------------------------------------------
# ---------------------------- (A) Mine the association rules ------------------------------
# ------------------------------------------------------------------------------------------
print("\n" + 120 * "-")
print("-" * 44, f"(A) Mine the association rules", "-" * 44)
print(120 * "-", "\n")
assRules_identifiers = ['SKU', 'Product Type']
for assRules_identifier in assRules_identifiers:
print("\n" + 120 * "-")
print("-" * 43, f"(A0) Ass. rules based on {assRules_identifier.upper()}", "-" * 43)
print(120 * "-", "\n")
# Initialize the miner
associationRulesMiner = MineAssociationRules(indaco_orders, product_identifier = assRules_identifier)
# Find frequent patterns
print(120 * "-")
print("-" * 46, f"(B1) Find frequent patterns", "-" * 45)
print(120 * "-", "\n")
frequentPatterns = associationRulesMiner.find_frequentPatterns(min_support = 0.01, limit_dim_pattern = 6)
# Define the trehsold for the minimum confidence
min_confidence = 0.7
if assRules_identifier == 'Product Type':
min_confidence -= 0.3
# Generate simple association rules
print("\n" + 120 * "-")
print("-" * 44, f"(B2) Generate association rules", "-" * 43)
print(120 * "-", "\n")
associationRules = associationRulesMiner.mine_associationRules(frequentPatterns, min_confidence, min_lift = 2)
# Generate the enhanced association rules (i.e., with linked collections)
print("\n" + 120 * "-")
print("-" * 39, f"(B3) Generate enhanced association rules", "-" * 39)
print(120 * "-", "\n")
enhanced_associationRules = associationRulesMiner.mine_enhancedAssociationRules(
associationRules = associationRules,
grouped_collections = collectionTypes)
# Save the outcomes
print("\n" + 120 * "-")
print("-" * 43, f"(C) Saving the findings locally", "-" * 44)
print(120 * "-", "\n")
associationRulesMiner.save_outcomes_asExcel(frequentPatterns, associationRules, enhanced_associationRules,
timestamp_col = 'timestamp', folder_path = "_tmp",
version_name = appSettings['versionName'])
# Save remotely
if appSettings['saveRemotely']:
print("\n" + 120 * "-")
print("-" * 43, f"(D) Saving the findings remotely", "-" * 43)
print(120 * "-", "\n")
associationRulesMiner.save_excel_remotely()