| Current Path : /home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/components/ |
| Current File : //home/deltalab/PMS/recommendations/recomsys-datapreparation-batch/components/mongoDbService.py |
from _library.data_utils import mongoDB_utils
import re
import pandas as pd
import numpy as np
class MongoDB:
def __init__(self, hostname, port, username, password, db_name):
self.db = mongoDB_utils.connect_to_mongodb(hostname = hostname, user= username, password= password, db_name= db_name, port = port)
def getProductTypes(self):
raw_types = self.db['categories'].find()
types = mongoDB_utils.extractCategories(raw_types)
return types
def getProducts(self, consider_delatedProducts, consider_unavailableProducts):
raw_products = self.db['products'].find()
products = mongoDB_utils.extractProducts(raw_products, consider_delatedProducts, consider_unavailableProducts, verbose = False)
return products
def getProduct(self, item_sku, as_dict):
item_sku = re.compile(item_sku, re.IGNORECASE)
query = {"sku": item_sku}
if as_dict:
product = self.db['products'].find_one(query)
else:
product = self.db['products'].find(query)
return product
def getSellers(self, sellerId = None):
if sellerId:
sellers = self.db['partners'].find_one({"_id": sellerId})
else:
sellers = pd.DataFrame(self.db['partners'].find())
return sellers
def _getRawOrders(self):
orders = pd.DataFrame(self.db['orders'].find())
return orders
def getOrders(self):
orders = self._getRawOrders()
orders = orders[['_id','name','createdAt', 'items']]
# Simpify the ids
orders['_id'] = orders['name'].apply(lambda name: name.lstrip('0'))
orders = orders.drop(columns = ['name'])
# Keep only the relevant attributes
orders['items'] = orders['items'].apply(
lambda items: [{'product_name': item['name'], 'sku': item['sku'], 'quantity': item['quantity']}
for item in items])
# Explode the attributes
orders = orders.explode(column = "items", ignore_index = True)
orders = orders.merge(orders['items'].apply(pd.Series), left_index = True, right_index = True)
# Minor artefacts
orders.drop(columns = ["items"], inplace = True)
orders = orders.rename(columns = {'_id': 'Transaction id', 'createdAt': 'timestamp'})
orders = orders.sort_values(by = ['Transaction id', 'product_name', 'quantity'],
ascending = [True, True, False]).reset_index(drop = True)
return orders