Your IP : 216.73.217.13


Current Path : /home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/
Upload File :
Current File : //home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/app_inTrentinoOrders.py

from _library.data_utils.data_loader import load_orders, load_groupedCollectionNames
from components.extract_collections import ExtractCollections
from components.mine_assRules import MineAssociationRules

SAVE_REMOTELY = True
VERSION_NAME = 'OLDinTrentino'

if __name__ == '__main__':
    
    # Load the orders 
    print("\n" + 120 * "-")
    print("-" * 50, f"(0) Loading orders", "-" * 50)
    print(120 * "-", "\n")
    file_name = 'Orders_Set20_Giu22_anonymized.xlsx'
    orders = load_orders(order_file_name = file_name)

    # ------------------------------------------------------------------------------------------
    # ---------------------------- (A) Extract linked collections ------------------------------
    # ------------------------------------------------------------------------------------------
    print(120 * "-")
    print("-" * 44, f"(A) Extract linked collections", "-" * 44)
    print(120 * "-", "\n")
    
    # Initialize the extractor
    collectionExtractor = ExtractCollections(orders, product_identifier = 'Title')
    
    # Preliminary steps to the categories
    print(120 * "-")
    print("-" * 47, f"(A1) Handling categories", "-" * 47)
    print(120 * "-", "\n")
    collectionExtractor.filling_missing_categories(drop_nan_category_items = False)
    collectionExtractor.mapping_toIndacoCategories(improve_with_manual_mapping = True, verbose = False)
    
    # Extract linked collections
    print("\n" + 120 * "-")
    print("-" * 40, f"(A2) Extracting the linked collections", "-" * 40)
    print(120 * "-", "\n")
    grouped_collections = load_groupedCollectionNames()
    enhanced_orders = collectionExtractor.extract_linked_collections(grouped_collections)
    
    # Visualize linked regions
    #linked_regions = enhanced_orders.sort_values(by = ['Transaction id'], ascending = False)
    #linked_regions = linked_regions[['Title', 'SKU', 'Vendor', 'Linked regions']]
    #linked_regions = linked_regions.drop_duplicates(subset = ['SKU'])
    #linked_regions = linked_regions.sort_values(by = ['Title'])
    #print(linked_regions)
    #linked_regions.to_excel("./_tmp/Prodotti_e_territori.xlsx", index = False)
    
    # Save the enhanced dataframe
    keyword = "enhanced"
    pos = file_name.find("anonymized")
    if pos == -1:
        pos = file_name.find(".")
        keyword = "_" + keyword
    else:
        keyword += "_"
    enhanced_fileName = VERSION_NAME + "_" + file_name[:pos] + keyword +  file_name[pos:]
    
    #'Orders_Set20_Giu22_enhanced_anonymized.xlsx'
    collectionExtractor.save_enhanced_orders_remotely(enhanced_fileName)
    
    # ------------------------------------------------------------------------------------------
    # ---------------------------- (B) Mine the association rules ------------------------------
    # ------------------------------------------------------------------------------------------
    print("\n" + 120 * "-")
    print("-" * 44, f"(B) Mine the association rules", "-" * 44)
    print(120 * "-", "\n")
    
    assRules_identifiers = ['SKU', 'Product Type']
    for assRules_identifier in assRules_identifiers:
    
        # Initialize the miner
        associationRulesMiner = MineAssociationRules(enhanced_orders, product_identifier = assRules_identifier)
        
        # Find frequent patterns
        print(120 * "-")
        print("-" * 46, f"(B1) Find frequent patterns", "-" * 45)
        print(120 * "-", "\n")
        frequentPatterns = associationRulesMiner.find_frequentPatterns(min_support = 0.01, limit_dim_pattern = 5)
        
        # Define the trehsold for the minimum confidence 
        min_confidence = 0.7
        if assRules_identifier == 'Product Type':
            min_confidence -= 0.3
        
        # Generate simple association rules
        print("\n" + 120 * "-")
        print("-" * 44, f"(B2) Generate association rules", "-" * 43)
        print(120 * "-", "\n")
        associationRules = associationRulesMiner.mine_associationRules(frequentPatterns, min_confidence, min_lift = 2)
        
        # Generate the enhanced association rules (i.e., with linked collections)
        print("\n" + 120 * "-")
        print("-" * 39, f"(B3) Generate enhanced association rules", "-" * 39)
        print(120 * "-", "\n")
        
        name_mapping = {'territori': 'Linked regions', 'esperienze': 'Linked experiences', 'ricette': 'Linked recipes'}
        enhanced_associationRules = associationRulesMiner.mine_enhancedAssociationRules(associationRules, grouped_collections, name_mapping)
        
        # Save the outcomes
        print("\n" + 120 * "-")
        print("-" * 43, f"(C) Saving the findings locally", "-" * 44)
        print(120 * "-", "\n")
        associationRulesMiner.save_outcomes_asExcel(frequentPatterns, associationRules, enhanced_associationRules,
                                                    timestamp_col = 'Order Month', folder_path = "_tmp", 
                                                    version_name = VERSION_NAME)
        
        # Save remotely 
        if SAVE_REMOTELY:
            print("\n" + 120 * "-")
            print("-" * 43, f"(D) Saving the findings remotely", "-" * 43)
            print(120 * "-", "\n")
            associationRulesMiner.save_excel_remotely()