Your IP : 216.73.217.13


Current Path : /home/deltalab/PMS/recommendations/recommender-system-batch/components/
Upload File :
Current File : //home/deltalab/PMS/recommendations/recommender-system-batch/components/collectionBased_RS.py

from collections import defaultdict
import json

from _library import toolkit
from _library.recom_utils import collectionBased_utils

from components.collection_analyzer import RecomSystemBatchAnalyzer

class collectionBased_RS:
    
    def enhance_with_collectionItems(self, grouped_collection_names):
        print("\n[INIT] unique_product_identifier:", self.unique_product_identifier, "\n")

        enhanced_grouped_collections = collectionBased_utils.retrieve_collection_items(
            orders_df = self.orders, 
            indacoProducts = self.platfrom_products,
            grouped_collections = grouped_collection_names, 
            product_identifier = self.unique_product_identifier)
        
        return enhanced_grouped_collections
        
    def __init__(self, orders_df, platform_products_df, grouped_collection_names, 
                 unique_product_identifier,  product_identifier):
        
        # Save the product identifier
        self.unique_product_identifier = unique_product_identifier
        self.product_identifier = product_identifier
        self.category_based = True if self.product_identifier == 'Product Type' else False
        
        self.rs_name = 'collectionBased'
        if self.category_based: 
            self.rs_name += '_cat'

        # Pre-process and save the orders
        self.orders = orders_df
        self.orders = toolkit.preProcessing_collectionColumns(self.orders)
        
        # Retrieve the unique baskets of orders
        self.order_baskets = toolkit.extract_baskets_of_orders(self.orders, self.product_identifier)
        
        # Save the platform products
        self.platfrom_products = platform_products_df
        
        # Retrieve the Save the collection names grouped by type
        self.grouped_collection_names = grouped_collection_names
        self.grouped_collection_names = self.enhance_with_collectionItems(self.grouped_collection_names.copy())
        
        
    def set_params(self, excluded_link_types = [], force_one_linked_type = False, 
                   bundle_dimension = -1, filter_source_platform = True, drop_similar_categories = False, 
                   merge_collection_type = True, force_single_item = True, verbose = False):
        
        # Set the parameters
        self.excluded_link_types = excluded_link_types
        self.force_one_linked_type = force_one_linked_type
        self.bundle_dimension = bundle_dimension
        self.filter_source_platform = filter_source_platform
        self.drop_similar_categories = drop_similar_categories
        self.merge_collection_type = merge_collection_type
        
        # That's relevant only when the system is category-based 
        self.force_single_item = force_single_item

        self.verbose = verbose
        self.flag_params = True
        
    def generate_recommendations(self, reference_items, verbose = False):
        
        # Find linked collections for the reference items
        linked_collections = collectionBased_utils.find_linked_collections(
            reference_products = reference_items, 
            collections = self.grouped_collection_names, 
            product_identifier = self.product_identifier,
            excluded_link_types = self.excluded_link_types)
        
        if len(linked_collections) == 0:
            print('There are no selected collections. Check out at the part called "Find and select linked collections".')
            return defaultdict(list)
        
        ranked_collections = []
        for collection in linked_collections:
            
            # Retrieve collection information
            linkType = collection['linked_type']
            collectionName = collection['collection_name']
            
            # Retrieve the collection item skus
            collectionItems = self.grouped_collection_names[linkType][collectionName]['all_items']
            
            # Retrive the product information
            itemCond = self.platfrom_products[self.unique_product_identifier].isin(collectionItems)
            
            # Filtering the source platform (inTrentino or INDACO)
            if self.filter_source_platform:
                colName = 'inTrentino_source'
                
                # Retrive the platform of the reference item
                reference_platforms = [item[colName] for item in reference_items]
                
                # Retrive the filtered collection items
                platformCond = self.platfrom_products[colName].isin(reference_platforms)
                collectionItems = self.platfrom_products[itemCond & platformCond]
            else:
                collectionItems = self.platfrom_products[itemCond]
                
            if len(collectionItems) == 0:
                continue

            # Analyze the collection and its products
            max_bundleDim = self.bundle_dimension if self.bundle_dimension != -1 else 10
            collectionAnalyzer = RecomSystemBatchAnalyzer(
                collection_name = collectionName, 
                collection_items = collectionItems, 
                reference_items = reference_items,
                orders = self.orders, 
                user_profile = None, 
                limit_bundleDim = max_bundleDim,
                product_identifier = self.product_identifier,
                type_recommendation="product_based_sustainability")
            analyzed_collection = collectionAnalyzer.analyze_collections(verbose = False)
            
            # Save the findings
            collection_info = dict(zip(['score', 'items', 'explaination'], analyzed_collection))
            collection_info['linkType'] = linkType
            collection_info['collectionName'] = collectionName
            
            if self.category_based: 
                collection_info['allCollectionItems'] = collectionItems
            
            ranked_collections.append(collection_info)
        
        # Sort the ranked collections
        ranked_collections = sorted(ranked_collections, key = lambda item: item['score'], reverse = True)
        
        # Select the best collection according to the score
        selected_collection = ranked_collections[0]
        selected_type = selected_collection['linkType']
        
        print(f"\nAll linked collections ({len(linked_collections)}):", linked_collections)
        print(f"--> Selected collection: [{collection_info['linkType']}] {collection_info['collectionName']}\n")
        
        # [IF CATEGORY BASED] If the recommendations are based on categories
        if self.category_based: 
            
            # Find collection item by category
            recommendations = collectionBased_utils.find_collection_item_byCategory(
                selected_collection = selected_collection,
                reference_items = reference_items, 
                force_single_item = self.force_single_item,
                verbose = verbose)
        else:
            
            # Generate the recommendations
            recommendations = collectionBased_utils.generateRecommendations(
                selected_collection, self.platfrom_products, self.product_identifier, 
                reference_items, self.bundle_dimension)
            
        # Wrap up the recommendations in a dictionary for compatibility with the follow-up steps
        selectedCollection_recommendations = {selected_type: recommendations}

        # Filter recommended items: (a) drop similar products  || (b) self.drop_similar_categories
        bundles_byCollectionType = collectionBased_utils.build_bundles(
            reference_products = reference_items, 
            recommendations_byCollectionType = selectedCollection_recommendations, 
            products_df = self.platfrom_products, 
            product_identifier = self.product_identifier,
            drop_similar_categories = self.drop_similar_categories, 
            merge_collection_type =  self.merge_collection_type, 
            output_recom_with_one_linktype = False, 
            verbose = False)
        
        #product = '03235'       
        #if product in [item['sku'] if 'sku' in item.keys() else item['item_name'] for item in reference_items]:
            #print(f"\nEND (PRODUCT: {product})\n")
            #sys.exit(0)  

        if verbose:
            print(f"All collections ({len(ranked_collections)}):", ranked_collections)
            print("\n" + "-" * 100)
            print(f"Selected collection [{selected_collection['collectionName']}]:", selected_collection)
            print("\n" + "-" * 100)
            print(f"Recommendations ({len(recommendations)}):", json.dumps(recommendations, indent = 4))
            print("\n" + "-" * 100)
            
        return bundles_byCollectionType
        
    def itemWise_collectionBased_recommendations(self, reference_items):
        
        # Slight artefact
        if (not isinstance(reference_items, list)) or (not isinstance(reference_items, set)):
            reference_items = [reference_items]
        
        # Generate the recommendations 
        recommendedItems_byLink = self.generate_recommendations(reference_items)
        
        # Add the name of this method to the recommendations
        for recommendations in recommendedItems_byLink.values():
            recommendations = toolkit.add_recommendationSource(recommendations, self.rs_name)
            
        return recommendedItems_byLink
       
        
    def generate_collectionBased_recommendations(self):
        
        # Assign the default paramas
        if not self.flag_params:
            self.set_params()

        # Pre-processing the products
        reference_products = self.platfrom_products.apply(
            func = lambda df_row: toolkit.extract_referenceProduct(df_row, self.product_identifier), 
            axis = 1)
        
        # Compute the recommendation for each products
        recommendations = reference_products.apply(self.itemWise_collectionBased_recommendations)
 
        # Improve the data representation
        identifier = 'item_name'
        if self.category_based:
            if self.unique_product_identifier == 'Title':
                identifier = 'product_name'
            elif self.unique_product_identifier == 'SKU':
                identifier = 'sku'
        recommendations.index = reference_products.apply(lambda product: product[identifier])
        recommendations = recommendations.to_dict()
        
        return recommendations