| Current Path : /home/deltalab/PMS/recommendations/recommender-system-batch/components/ |
| Current File : //home/deltalab/PMS/recommendations/recommender-system-batch/components/collectionBased_RS.py |
from collections import defaultdict
import json
from _library import toolkit
from _library.recom_utils import collectionBased_utils
from components.collection_analyzer import RecomSystemBatchAnalyzer
class collectionBased_RS:
def enhance_with_collectionItems(self, grouped_collection_names):
print("\n[INIT] unique_product_identifier:", self.unique_product_identifier, "\n")
enhanced_grouped_collections = collectionBased_utils.retrieve_collection_items(
orders_df = self.orders,
indacoProducts = self.platfrom_products,
grouped_collections = grouped_collection_names,
product_identifier = self.unique_product_identifier)
return enhanced_grouped_collections
def __init__(self, orders_df, platform_products_df, grouped_collection_names,
unique_product_identifier, product_identifier):
# Save the product identifier
self.unique_product_identifier = unique_product_identifier
self.product_identifier = product_identifier
self.category_based = True if self.product_identifier == 'Product Type' else False
self.rs_name = 'collectionBased'
if self.category_based:
self.rs_name += '_cat'
# Pre-process and save the orders
self.orders = orders_df
self.orders = toolkit.preProcessing_collectionColumns(self.orders)
# Retrieve the unique baskets of orders
self.order_baskets = toolkit.extract_baskets_of_orders(self.orders, self.product_identifier)
# Save the platform products
self.platfrom_products = platform_products_df
# Retrieve the Save the collection names grouped by type
self.grouped_collection_names = grouped_collection_names
self.grouped_collection_names = self.enhance_with_collectionItems(self.grouped_collection_names.copy())
def set_params(self, excluded_link_types = [], force_one_linked_type = False,
bundle_dimension = -1, filter_source_platform = True, drop_similar_categories = False,
merge_collection_type = True, force_single_item = True, verbose = False):
# Set the parameters
self.excluded_link_types = excluded_link_types
self.force_one_linked_type = force_one_linked_type
self.bundle_dimension = bundle_dimension
self.filter_source_platform = filter_source_platform
self.drop_similar_categories = drop_similar_categories
self.merge_collection_type = merge_collection_type
# That's relevant only when the system is category-based
self.force_single_item = force_single_item
self.verbose = verbose
self.flag_params = True
def generate_recommendations(self, reference_items, verbose = False):
# Find linked collections for the reference items
linked_collections = collectionBased_utils.find_linked_collections(
reference_products = reference_items,
collections = self.grouped_collection_names,
product_identifier = self.product_identifier,
excluded_link_types = self.excluded_link_types)
if len(linked_collections) == 0:
print('There are no selected collections. Check out at the part called "Find and select linked collections".')
return defaultdict(list)
ranked_collections = []
for collection in linked_collections:
# Retrieve collection information
linkType = collection['linked_type']
collectionName = collection['collection_name']
# Retrieve the collection item skus
collectionItems = self.grouped_collection_names[linkType][collectionName]['all_items']
# Retrive the product information
itemCond = self.platfrom_products[self.unique_product_identifier].isin(collectionItems)
# Filtering the source platform (inTrentino or INDACO)
if self.filter_source_platform:
colName = 'inTrentino_source'
# Retrive the platform of the reference item
reference_platforms = [item[colName] for item in reference_items]
# Retrive the filtered collection items
platformCond = self.platfrom_products[colName].isin(reference_platforms)
collectionItems = self.platfrom_products[itemCond & platformCond]
else:
collectionItems = self.platfrom_products[itemCond]
if len(collectionItems) == 0:
continue
# Analyze the collection and its products
max_bundleDim = self.bundle_dimension if self.bundle_dimension != -1 else 10
collectionAnalyzer = RecomSystemBatchAnalyzer(
collection_name = collectionName,
collection_items = collectionItems,
reference_items = reference_items,
orders = self.orders,
user_profile = None,
limit_bundleDim = max_bundleDim,
product_identifier = self.product_identifier,
type_recommendation="product_based_sustainability")
analyzed_collection = collectionAnalyzer.analyze_collections(verbose = False)
# Save the findings
collection_info = dict(zip(['score', 'items', 'explaination'], analyzed_collection))
collection_info['linkType'] = linkType
collection_info['collectionName'] = collectionName
if self.category_based:
collection_info['allCollectionItems'] = collectionItems
ranked_collections.append(collection_info)
# Sort the ranked collections
ranked_collections = sorted(ranked_collections, key = lambda item: item['score'], reverse = True)
# Select the best collection according to the score
selected_collection = ranked_collections[0]
selected_type = selected_collection['linkType']
print(f"\nAll linked collections ({len(linked_collections)}):", linked_collections)
print(f"--> Selected collection: [{collection_info['linkType']}] {collection_info['collectionName']}\n")
# [IF CATEGORY BASED] If the recommendations are based on categories
if self.category_based:
# Find collection item by category
recommendations = collectionBased_utils.find_collection_item_byCategory(
selected_collection = selected_collection,
reference_items = reference_items,
force_single_item = self.force_single_item,
verbose = verbose)
else:
# Generate the recommendations
recommendations = collectionBased_utils.generateRecommendations(
selected_collection, self.platfrom_products, self.product_identifier,
reference_items, self.bundle_dimension)
# Wrap up the recommendations in a dictionary for compatibility with the follow-up steps
selectedCollection_recommendations = {selected_type: recommendations}
# Filter recommended items: (a) drop similar products || (b) self.drop_similar_categories
bundles_byCollectionType = collectionBased_utils.build_bundles(
reference_products = reference_items,
recommendations_byCollectionType = selectedCollection_recommendations,
products_df = self.platfrom_products,
product_identifier = self.product_identifier,
drop_similar_categories = self.drop_similar_categories,
merge_collection_type = self.merge_collection_type,
output_recom_with_one_linktype = False,
verbose = False)
#product = '03235'
#if product in [item['sku'] if 'sku' in item.keys() else item['item_name'] for item in reference_items]:
#print(f"\nEND (PRODUCT: {product})\n")
#sys.exit(0)
if verbose:
print(f"All collections ({len(ranked_collections)}):", ranked_collections)
print("\n" + "-" * 100)
print(f"Selected collection [{selected_collection['collectionName']}]:", selected_collection)
print("\n" + "-" * 100)
print(f"Recommendations ({len(recommendations)}):", json.dumps(recommendations, indent = 4))
print("\n" + "-" * 100)
return bundles_byCollectionType
def itemWise_collectionBased_recommendations(self, reference_items):
# Slight artefact
if (not isinstance(reference_items, list)) or (not isinstance(reference_items, set)):
reference_items = [reference_items]
# Generate the recommendations
recommendedItems_byLink = self.generate_recommendations(reference_items)
# Add the name of this method to the recommendations
for recommendations in recommendedItems_byLink.values():
recommendations = toolkit.add_recommendationSource(recommendations, self.rs_name)
return recommendedItems_byLink
def generate_collectionBased_recommendations(self):
# Assign the default paramas
if not self.flag_params:
self.set_params()
# Pre-processing the products
reference_products = self.platfrom_products.apply(
func = lambda df_row: toolkit.extract_referenceProduct(df_row, self.product_identifier),
axis = 1)
# Compute the recommendation for each products
recommendations = reference_products.apply(self.itemWise_collectionBased_recommendations)
# Improve the data representation
identifier = 'item_name'
if self.category_based:
if self.unique_product_identifier == 'Title':
identifier = 'product_name'
elif self.unique_product_identifier == 'SKU':
identifier = 'sku'
recommendations.index = reference_products.apply(lambda product: product[identifier])
recommendations = recommendations.to_dict()
return recommendations