# -----------------------------------------------------------------------
#
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import numpy as np
import cupy as cp

def generate_negatives(neg_users, true_mat, item_range, sort=False, use_trick=False):
    """ 
    Generate negative samples for data augmentation
    """
    neg_u = []
    neg_i = []

    # If using the shortcut, generate negative items without checking if the associated
    # user has interacted with it. Speeds up training significantly with very low impact
    # on accuracy.
    if use_trick:
        neg_items = cp.random.randint(0, high=item_range, size=neg_users.shape[0])
        return neg_users, neg_items

    # Otherwise, generate negative items, check if associated user has interacted with it,
    # then generate a new one if true
    while len(neg_users) > 0:
        neg_items = cp.random.randint(0, high=item_range, size=neg_users.shape[0])
        neg_mask = true_mat[neg_users, neg_items]
        neg_u.append(neg_users[neg_mask])
        neg_i.append(neg_items[neg_mask])

        neg_users = neg_users[cp.logical_not(neg_mask)]

    neg_users = cp.concatenate(neg_u)
    neg_items = cp.concatenate(neg_i)

    if not sort:
        return neg_users, neg_items

    sorted_users = cp.sort(neg_users)
    sort_indices = cp.argsort(neg_users)

    return sorted_users, neg_items[sort_indices]


class DataGenerator:
    """
    Class to handle data augmentation
    """
    def __init__(self,
                 seed,
                 hvd_rank,
                 num_users,                 # type: int
                 num_items,                 # type: int
                 neg_mat,                   # type: np.ndarray
                 train_users,               # type: np.ndarray
                 train_items,               # type: np.ndarray
                 train_labels,              # type: np.ndarray
                 train_batch_size,          # type: int
                 train_negative_samples,    # type: int
                 pos_eval_users,            # type: np.ndarray
                 pos_eval_items,            # type: np.ndarray
                 eval_users_per_batch,      # type: int
                 eval_negative_samples,     # type: int
                ):
        # Check input data
        if train_users.shape != train_items.shape:
            raise ValueError(
                "Train shapes mismatch! {} Users vs {} Items!".format(
                    train_users.shape, train_items.shape))
        if pos_eval_users.shape != pos_eval_items.shape:
            raise ValueError(
                "Eval shapes mismatch! {} Users vs {} Items!".format(
                    pos_eval_users.shape, pos_eval_items.shape))
        
        np.random.seed(seed)
        cp.random.seed(seed)
        # Use GPU assigned to the horovod rank
        self.hvd_rank = hvd_rank
        cp.cuda.Device(self.hvd_rank).use()

        self.num_users = num_users
        self.num_items = num_items
        self._neg_mat = neg_mat
        self._train_users = cp.array(train_users)
        self._train_items = cp.array(train_items)
        self._train_labels = cp.array(train_labels)
        self.train_batch_size = train_batch_size
        self._train_negative_samples = train_negative_samples
        self._pos_eval_users = pos_eval_users
        self._pos_eval_items = pos_eval_items
        self.eval_users_per_batch = eval_users_per_batch
        self._eval_negative_samples = eval_negative_samples

        # Eval data
        self.eval_users = None
        self.eval_items = None
        self.dup_mask = None

        # Training data
        self.train_users_batches = None
        self.train_items_batches = None
        self.train_labels_batches = None

    # Augment test data with negative samples
    def prepare_eval_data(self):
        pos_eval_users = cp.array(self._pos_eval_users)
        pos_eval_items = cp.array(self._pos_eval_items)

        neg_mat = cp.array(self._neg_mat)

        neg_eval_users_base = cp.repeat(pos_eval_users, self._eval_negative_samples)

        # Generate negative samples
        test_u_neg, test_i_neg = generate_negatives(neg_users=neg_eval_users_base, true_mat=neg_mat,
                                                    item_range=self.num_items, sort=True, use_trick=False)

        test_u_neg = test_u_neg.reshape((-1, self._eval_negative_samples)).get()
        test_i_neg = test_i_neg.reshape((-1, self._eval_negative_samples)).get()

        test_users = self._pos_eval_users.reshape((-1, 1))
        test_items = self._pos_eval_items.reshape((-1, 1))
        # Combine positive and negative samples
        test_users = np.concatenate((test_u_neg, test_users), axis=1)
        test_items = np.concatenate((test_i_neg, test_items), axis=1)

        # Generate duplicate mask
        ## Stable sort indices by incrementing all values with fractional position
        indices = np.arange(test_users.shape[1]).reshape((1, -1)).repeat(test_users.shape[0], axis=0)
        summed_items = np.add(test_items, indices/test_users.shape[1])
        sorted_indices = np.argsort(summed_items, axis=1)
        sorted_order = np.argsort(sorted_indices, axis=1)
        sorted_items = np.sort(test_items, axis=1)
        ## Generate duplicate mask
        dup_mask = np.equal(sorted_items[:,0:-1], sorted_items[:,1:])
        dup_mask = np.concatenate((dup_mask, np.zeros((test_users.shape[0], 1))), axis=1)
        r_indices = np.arange(test_users.shape[0]).reshape((-1, 1)).repeat(test_users.shape[1], axis=1)
        dup_mask = dup_mask[r_indices, sorted_order].astype(np.float32)

        # Reshape all to (-1) and split into chunks
        batch_size = self.eval_users_per_batch * test_users.shape[1]
        split_indices = np.arange(batch_size, test_users.shape[0]*test_users.shape[1], batch_size)
        self.eval_users = np.split(test_users.reshape(-1), split_indices)
        self.eval_items = np.split(test_items.reshape(-1), split_indices)
        self.dup_mask = np.split(dup_mask.reshape(-1), split_indices)

        # Free GPU memory to make space for Tensorflow
        cp.get_default_memory_pool().free_all_blocks()

    # Augment training data with negative samples
    def prepare_train_data(self):
        batch_size = self.train_batch_size

        is_neg = cp.logical_not(self._train_labels)

        # Do not store verification matrix if using the negatives generation shortcut
        neg_mat = None

        # If there are no negative samples in the local portion of the training data, do nothing
        any_neg = cp.any(is_neg)
        if any_neg:
            self._train_users[is_neg], self._train_items[is_neg] = generate_negatives(
                self._train_users[is_neg], neg_mat, self.num_items, use_trick=True
            )

        shuffled_order = cp.random.permutation(self._train_users.shape[0])
        self._train_users = self._train_users[shuffled_order]
        self._train_items = self._train_items[shuffled_order]
        self._train_labels = self._train_labels[shuffled_order]

        # Manually create batches
        split_indices = np.arange(batch_size, self._train_users.shape[0], batch_size)
        self.train_users_batches = np.split(self._train_users, split_indices)
        self.train_items_batches = np.split(self._train_items, split_indices)
        self.train_labels_batches = np.split(self._train_labels, split_indices)