Your IP : 216.73.217.13


Current Path : /home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/components/
Upload File :
Current File : //home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/components/mine_assRules.py

from _library import mine_assRules_utils
from _library.data_utils.remoteConnection_utils import move_file_to_remote

import numpy as np
from os import path, makedirs, remove

class MineAssociationRules:
    
    def retrieve_baskets(self):
        cleaned_df = self.orders.dropna(subset = [self.product_identifier])
        
        grouped_products_df = cleaned_df.groupby(by = 'Transaction id', as_index = False).agg(lambda x: x.tolist())
        grouped_products_df = grouped_products_df[self.product_identifier].apply(np.unique).to_frame()
        
        return grouped_products_df
    
    def __init__(self, orders, product_identifier):
        self.orders = orders
        self.product_identifier = product_identifier
        self.item_separator = ' | '
        
        self.baskets = self.retrieve_baskets()

    def find_frequentPatterns(self, min_support = 0.05, limit_dim_pattern = 6):
        
        self.min_support = min_support
        
        # Data preparation
        transactions = mine_assRules_utils.preProcessing_transaction(self.baskets, self.item_separator, 
                                                                     self.product_identifier)
        
        # Compute frequent patterns
        frequentPatterns = mine_assRules_utils.compute_frequentPatterns(transactions, self.item_separator, 
                                                                        self.min_support, limit_dim_pattern)

        return frequentPatterns
    
    def mine_associationRules(self, frequentPatterns, min_confidence = 0.7, min_lift = 2):
        
        self.min_confidence = min_confidence
        self.min_lift = min_lift
        
        # Rename columns --> The library wants this column names
        library_mapping = {'Patterns' : 'itemsets', 'PercentageSupport': 'support'}
        fp = frequentPatterns.copy().rename(columns = library_mapping)

        # Select only the two relevant columns
        fp = fp[['itemsets', 'support']] 
        
        # Generate the association rules
        associationRules = mine_assRules_utils.generate_associationRules(fp, self.min_confidence, self.min_lift)
        
        return associationRules
    
    def mine_enhancedAssociationRules(self, associationRules, grouped_collections, name_mapping = None, keep_all_cols = False):
        
        if not name_mapping:
            name_mapping = {collectionType: collectionType for collectionType in grouped_collections.keys()}
        
        # Preliminary data
        assRules_colNames = ['antecedents', 'consequents']
        
        # Extract linked collections for each association rule
        enhancedAssociationRules = mine_assRules_utils.assRules_extractLinkedCollections(associationRules, 
                                                                                         grouped_collections, 
                                                                                         self.orders, 
                                                                                         self.product_identifier, 
                                                                                         assRules_colNames, name_mapping) 
        
        # Filter the assocation rules
        enhancedAssociationRules = mine_assRules_utils.filter_enhancedAssRules(enhancedAssociationRules,
                                                                               grouped_collections, 
                                                                               assRules_colNames, 
                                                                               name_mapping, 
                                                                               keep_all_cols)
        
        return enhancedAssociationRules
    
    def save_outcomes_asExcel(self, frequentPatterns, associationRules, enhanced_associationRules, timestamp_col, 
                              folder_path, version_name = 'v2'):
        
        if not path.exists(folder_path):
            makedirs(folder_path)
        
        base_file_name = "coPurchases"
        if version_name != None:
            base_file_name = str(version_name) + "_"  + base_file_name
        
        # Get preliminary data
        saving_name = mine_assRules_utils.generate_saving_fileName(self.orders, self.product_identifier, timestamp_col, base_file_name)
        self.coPurchases_filePath = path.join(folder_path, saving_name)
        
        # Create a dataframe for the parameters
        params = mine_assRules_utils.create_paramsDF(self.min_support, self.min_confidence, self.min_lift)
        
        # Save outcomes
        mine_assRules_utils.save_findings(frequentPatterns, associationRules, enhanced_associationRules, 
                                          params, self.coPurchases_filePath, self.item_separator)
        
    def save_excel_remotely(self):
        move_file_to_remote(local_file_path = path.abspath(self.coPurchases_filePath),
                            remote_sub_folder = 'co-purchases')

        if path.exists(self.coPurchases_filePath):
            remove(self.coPurchases_filePath)
            print("--> OK: The local file has been removed.\n")