| Current Path : /home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/components/ |
| Current File : //home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/components/mine_assRules.py |
from _library import mine_assRules_utils
from _library.data_utils.remoteConnection_utils import move_file_to_remote
import numpy as np
from os import path, makedirs, remove
class MineAssociationRules:
def retrieve_baskets(self):
cleaned_df = self.orders.dropna(subset = [self.product_identifier])
grouped_products_df = cleaned_df.groupby(by = 'Transaction id', as_index = False).agg(lambda x: x.tolist())
grouped_products_df = grouped_products_df[self.product_identifier].apply(np.unique).to_frame()
return grouped_products_df
def __init__(self, orders, product_identifier):
self.orders = orders
self.product_identifier = product_identifier
self.item_separator = ' | '
self.baskets = self.retrieve_baskets()
def find_frequentPatterns(self, min_support = 0.05, limit_dim_pattern = 6):
self.min_support = min_support
# Data preparation
transactions = mine_assRules_utils.preProcessing_transaction(self.baskets, self.item_separator,
self.product_identifier)
# Compute frequent patterns
frequentPatterns = mine_assRules_utils.compute_frequentPatterns(transactions, self.item_separator,
self.min_support, limit_dim_pattern)
return frequentPatterns
def mine_associationRules(self, frequentPatterns, min_confidence = 0.7, min_lift = 2):
self.min_confidence = min_confidence
self.min_lift = min_lift
# Rename columns --> The library wants this column names
library_mapping = {'Patterns' : 'itemsets', 'PercentageSupport': 'support'}
fp = frequentPatterns.copy().rename(columns = library_mapping)
# Select only the two relevant columns
fp = fp[['itemsets', 'support']]
# Generate the association rules
associationRules = mine_assRules_utils.generate_associationRules(fp, self.min_confidence, self.min_lift)
return associationRules
def mine_enhancedAssociationRules(self, associationRules, grouped_collections, name_mapping = None, keep_all_cols = False):
if not name_mapping:
name_mapping = {collectionType: collectionType for collectionType in grouped_collections.keys()}
# Preliminary data
assRules_colNames = ['antecedents', 'consequents']
# Extract linked collections for each association rule
enhancedAssociationRules = mine_assRules_utils.assRules_extractLinkedCollections(associationRules,
grouped_collections,
self.orders,
self.product_identifier,
assRules_colNames, name_mapping)
# Filter the assocation rules
enhancedAssociationRules = mine_assRules_utils.filter_enhancedAssRules(enhancedAssociationRules,
grouped_collections,
assRules_colNames,
name_mapping,
keep_all_cols)
return enhancedAssociationRules
def save_outcomes_asExcel(self, frequentPatterns, associationRules, enhanced_associationRules, timestamp_col,
folder_path, version_name = 'v2'):
if not path.exists(folder_path):
makedirs(folder_path)
base_file_name = "coPurchases"
if version_name != None:
base_file_name = str(version_name) + "_" + base_file_name
# Get preliminary data
saving_name = mine_assRules_utils.generate_saving_fileName(self.orders, self.product_identifier, timestamp_col, base_file_name)
self.coPurchases_filePath = path.join(folder_path, saving_name)
# Create a dataframe for the parameters
params = mine_assRules_utils.create_paramsDF(self.min_support, self.min_confidence, self.min_lift)
# Save outcomes
mine_assRules_utils.save_findings(frequentPatterns, associationRules, enhanced_associationRules,
params, self.coPurchases_filePath, self.item_separator)
def save_excel_remotely(self):
move_file_to_remote(local_file_path = path.abspath(self.coPurchases_filePath),
remote_sub_folder = 'co-purchases')
if path.exists(self.coPurchases_filePath):
remove(self.coPurchases_filePath)
print("--> OK: The local file has been removed.\n")