Your IP : 216.73.217.13


Current Path : /home/deltalab/PMS/recommendations/recommender-system-batch/components/
Upload File :
Current File : //home/deltalab/PMS/recommendations/recommender-system-batch/components/mongoDbService.py

import re

import pandas as pd
from cryptography.fernet import Fernet
from _library.data_utils import mongodb_utils
from numpy import isnan

class MongoDB:
    
    ENCRYPTING_KEY = None
        
    def __init__(self, hostname, port, user, password, db_name):
        self.db = mongodb_utils.connect_to_mongodb(hostname = hostname, port = port, user= user, password= password, db_name= db_name)
        
    def getProductTypes(self):
        raw_types = self.db['categories'].find()
        types = mongodb_utils.extractCategories(raw_types)
 
        return types
    
    def getWarehouses(self):
        warehouses = pd.DataFrame(self.db['warehouses'].find({}, {'assignments' : 0})) 
        # NO assignments --> due to a temporary parsing error (year 275760??)
        
        return warehouses
    
    def getProducts(self, consider_delatedProducts = False, consider_unavailableProducts = False):
        raw_products = self.db['products'].find()    
        products = mongodb_utils.extractProducts(raw_products, consider_delatedProducts, consider_unavailableProducts)
        return products
    
    def getProduct(self, item_sku, as_dict):
        item_sku = re.compile(item_sku, re.IGNORECASE)
        query = {"sku": item_sku}
       
        if as_dict:
            product = self.db['products'].find_one(query)
        else: 
            product = self.db['products'].find(query)
        return product
    
    def getCategoryInfo(self, category_name):
        
        # Retrieve all the categories
        categories_df = pd.DataFrame(self.db['categories'].find())
        retrive_italianName = lambda names: [name['label'] for name in names if name['code'] == 'it-IT'][0]
        categories_df['name'] = categories_df['name'].apply(retrive_italianName)
        
        # Retrieve the info of the category
        category_info = categories_df[categories_df['name'] == category_name].iloc[0]
        category_info = category_info.to_dict()
    
        return category_info
    
    def getSellers(self, sellerId = None):
        if sellerId:
            sellers = self.db['partners'].find_one({"_id": sellerId})
        else:
            sellers = pd.DataFrame(self.db['partners'].find())[['_id','companyName']]
        
        return sellers
    
    def _getRawOrders(self):
        orders = pd.DataFrame(self.db['orders'].find())
        
        return orders
    
    def __encryptCustomers(self, unique_customers):
        
        # Initialize and generate the encrypting key
        if not MongoDB.ENCRYPTING_KEY:
            MongoDB.ENCRYPTING_KEY = Fernet.generate_key()
        fernet = Fernet(MongoDB.ENCRYPTING_KEY)
        
        # Encrypt the custumers
        encryptedCustomers = [fernet.encrypt(customer.encode()) for customer in unique_customers]
        self.__encrypted_customers = dict(zip(unique_customers, encryptedCustomers))
        
        # Mapping the encryptions into a simple integer value
        MongoDB.anonymized_customers = {encrypted_customer: (idk + 1) 
                                        for idk, encrypted_customer in enumerate(self.__encrypted_customers.values())}
        
    def decryptCustomers(self, customer_ids):
        
        # Get the encrypting key
        fernet = Fernet(MongoDB.ENCRYPTING_KEY)
        
        # Reverse the mapping functin
        anonymity_reverseMapping = dict(map(reversed, MongoDB.anonymized_customers.items()))
        
        # Map the encryped user names
        decryptedUsers = dict()
        for customer_id in customer_ids:
            if customer_id in anonymity_reverseMapping.keys():
                decryptedUsers[customer_id] = fernet.decrypt(anonymity_reverseMapping[customer_id]).decode() 

        return decryptedUsers          

    def getOrders(self):
        orders = self._getRawOrders()
        orders = orders[['_id', 'name', 'createdAt', 'items', 'customer']]
        
        # Simpify the ids
        orders['_id'] = orders['name'].apply(lambda name: name.lstrip('0'))
        orders = orders.drop(columns = ['name'])
        
        # Get the customer emails
        orders['customer'] = orders['customer'].apply(lambda attributes: attributes['email'])
        
        # Encrypt emails 
        self.__encryptCustomers(orders['customer'].unique())
        
         # Assign an anoymous mapping
        customer_ids = orders['customer'].apply(
            lambda customer: MongoDB.anonymized_customers[self.__encrypted_customers[customer]])
        orders.insert(2, column = 'customer_id', value = customer_ids)
        
        # EXAMPLE: Remap the customer id to its original form
        # --> orders['remapped customer'] = self.__decryptCustomers(orders['customer_id'])
        
        # Keep only the relevant attributes
        orders['items'] = orders['items'].apply(
            lambda items: [{'product_name': item['name'], 'sku': item['sku'], 'quantity': item['quantity']} 
                           for item in items])
        
        # Explode the attributes
        orders = orders.explode(column = "items", ignore_index = True)
        orders = orders.merge(orders['items'].apply(pd.Series), left_index = True, right_index = True)
        
        # Minor artefacts
        orders.drop(columns = ["items", "customer"], inplace = True)
        orders = orders.rename(columns = {'_id': 'Transaction id', 'createdAt': 'timestamp'})
        orders = orders.sort_values(by = ['Transaction id', 'product_name', 'quantity'], 
                                    ascending = [True, True, False]).reset_index(drop = True)
        return orders
        
    
    def getCustomerProfiles(self):
        
        # Retrive the collections
        userCollectionName = 'user' + 'profiles'
        user_profiles = pd.DataFrame(self.db[userCollectionName].find())
        
        if len(user_profiles) == 0:
            print(f"No user profiles found [collection: {userCollectionName}]")
            return dict()
        
        # Retrieve and map information
        user_profiles = mongodb_utils.enhanceCustomerProfiles(user_profiles, 
            platfromProducts =  self.getProducts(consider_delatedProducts = True, consider_unavailableProducts = True), 
            platfromCategories = self.getProductTypes()
        )
        
        # Anonymized users 
        self.__encryptCustomers(user_profiles['user_id'].unique())
        
        # Save the anonymized users
        user_profiles.index = user_profiles['user_id'].apply(
            lambda email: MongoDB.anonymized_customers[self.__encrypted_customers[email]])
        user_profiles = user_profiles.drop(columns = ['_id', 'user_id'])
        
        # Transform the dataframe as a dictionary
        user_profiles = user_profiles.to_dict(orient = 'index')
        
        # Delate empty attributes
        for user_id, user_profile in user_profiles.items():
            for att, values in user_profile.copy().items():
                if isinstance(values, float) and isnan(values):
                    user_profiles[user_id].pop(att)

        return user_profiles         
    
    def dropCollection(self, collection_name):
        if collection_name in self.db.list_collection_names():
            self.db.drop_collection(collection_name)
        else:
            print("Collection not found!")
            
    def writeNewDbItem(self, collection_name, item): 
        self.db[collection_name].insert_one(item)
    
    def setNewProductAttribute(self, reference_product, attribute_name, value):
        reference_product = re.compile(reference_product, re.IGNORECASE)
        query = {"sku": reference_product}
        
        set_value = {"$set": {attribute_name: value}}
        
        self.db['products'].update_one(query, set_value)