| Current Path : /home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/ |
| Current File : //home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/app_inTrentinoOrders.py |
from _library.data_utils.data_loader import load_orders, load_groupedCollectionNames
from components.extract_collections import ExtractCollections
from components.mine_assRules import MineAssociationRules
SAVE_REMOTELY = True
VERSION_NAME = 'OLDinTrentino'
if __name__ == '__main__':
# Load the orders
print("\n" + 120 * "-")
print("-" * 50, f"(0) Loading orders", "-" * 50)
print(120 * "-", "\n")
file_name = 'Orders_Set20_Giu22_anonymized.xlsx'
orders = load_orders(order_file_name = file_name)
# ------------------------------------------------------------------------------------------
# ---------------------------- (A) Extract linked collections ------------------------------
# ------------------------------------------------------------------------------------------
print(120 * "-")
print("-" * 44, f"(A) Extract linked collections", "-" * 44)
print(120 * "-", "\n")
# Initialize the extractor
collectionExtractor = ExtractCollections(orders, product_identifier = 'Title')
# Preliminary steps to the categories
print(120 * "-")
print("-" * 47, f"(A1) Handling categories", "-" * 47)
print(120 * "-", "\n")
collectionExtractor.filling_missing_categories(drop_nan_category_items = False)
collectionExtractor.mapping_toIndacoCategories(improve_with_manual_mapping = True, verbose = False)
# Extract linked collections
print("\n" + 120 * "-")
print("-" * 40, f"(A2) Extracting the linked collections", "-" * 40)
print(120 * "-", "\n")
grouped_collections = load_groupedCollectionNames()
enhanced_orders = collectionExtractor.extract_linked_collections(grouped_collections)
# Visualize linked regions
#linked_regions = enhanced_orders.sort_values(by = ['Transaction id'], ascending = False)
#linked_regions = linked_regions[['Title', 'SKU', 'Vendor', 'Linked regions']]
#linked_regions = linked_regions.drop_duplicates(subset = ['SKU'])
#linked_regions = linked_regions.sort_values(by = ['Title'])
#print(linked_regions)
#linked_regions.to_excel("./_tmp/Prodotti_e_territori.xlsx", index = False)
# Save the enhanced dataframe
keyword = "enhanced"
pos = file_name.find("anonymized")
if pos == -1:
pos = file_name.find(".")
keyword = "_" + keyword
else:
keyword += "_"
enhanced_fileName = VERSION_NAME + "_" + file_name[:pos] + keyword + file_name[pos:]
#'Orders_Set20_Giu22_enhanced_anonymized.xlsx'
collectionExtractor.save_enhanced_orders_remotely(enhanced_fileName)
# ------------------------------------------------------------------------------------------
# ---------------------------- (B) Mine the association rules ------------------------------
# ------------------------------------------------------------------------------------------
print("\n" + 120 * "-")
print("-" * 44, f"(B) Mine the association rules", "-" * 44)
print(120 * "-", "\n")
assRules_identifiers = ['SKU', 'Product Type']
for assRules_identifier in assRules_identifiers:
# Initialize the miner
associationRulesMiner = MineAssociationRules(enhanced_orders, product_identifier = assRules_identifier)
# Find frequent patterns
print(120 * "-")
print("-" * 46, f"(B1) Find frequent patterns", "-" * 45)
print(120 * "-", "\n")
frequentPatterns = associationRulesMiner.find_frequentPatterns(min_support = 0.01, limit_dim_pattern = 5)
# Define the trehsold for the minimum confidence
min_confidence = 0.7
if assRules_identifier == 'Product Type':
min_confidence -= 0.3
# Generate simple association rules
print("\n" + 120 * "-")
print("-" * 44, f"(B2) Generate association rules", "-" * 43)
print(120 * "-", "\n")
associationRules = associationRulesMiner.mine_associationRules(frequentPatterns, min_confidence, min_lift = 2)
# Generate the enhanced association rules (i.e., with linked collections)
print("\n" + 120 * "-")
print("-" * 39, f"(B3) Generate enhanced association rules", "-" * 39)
print(120 * "-", "\n")
name_mapping = {'territori': 'Linked regions', 'esperienze': 'Linked experiences', 'ricette': 'Linked recipes'}
enhanced_associationRules = associationRulesMiner.mine_enhancedAssociationRules(associationRules, grouped_collections, name_mapping)
# Save the outcomes
print("\n" + 120 * "-")
print("-" * 43, f"(C) Saving the findings locally", "-" * 44)
print(120 * "-", "\n")
associationRulesMiner.save_outcomes_asExcel(frequentPatterns, associationRules, enhanced_associationRules,
timestamp_col = 'Order Month', folder_path = "_tmp",
version_name = VERSION_NAME)
# Save remotely
if SAVE_REMOTELY:
print("\n" + 120 * "-")
print("-" * 43, f"(D) Saving the findings remotely", "-" * 43)
print(120 * "-", "\n")
associationRulesMiner.save_excel_remotely()