athos
/
baangt


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777
							import csv
import itertools
import xlsxwriter
import errno
import os
import logging
import faker
from random import sample, randint
import baangt.base.GlobalConstants as GC
import re
from openpyxl import load_workbook
import sys
import pandas as pd
from CloneXls import CloneXls
import json

logger = logging.getLogger("pyC")


class Writer:
    """
    This class is made to update existing excel file.
    First it will open the file in python and then we can do multiple writes and once everything is update we can use
    save method in order to save the updated excel file. Hence, this class is very useful is saving time while updating
    excel files.
    """
    def __init__(self, path):
        self.path = path
        self.workbook = load_workbook(path)

    def write(self, row, data, sht):
        # Update the values using row and col number.
        # Note :- We are using openpyxl so row & column index will start from 1 instead of 0
        column = 0
        sheet = self.workbook[sht]
        headers = next(sheet.rows)
        for header in headers:  # checks if usecount header is present in sheet
            if "usecount" in str(header.value).lower():
                column = headers.index(header) + 1
        if column:
            sheet.cell(row, column).value = data

    def save(self):
        # Call this method to save the file once every updates are written
        self.workbook.save(self.path)
        self.workbook.close()

class TestDataGenerator:
    """
    TestDataGenerator Class is to used to create a TestData file from raw excel file containing all possible values.

    Formats accepted in input excel file:
    1. Value             = ``<value>``
    2. list of values    = ``[<value1>,<value2>]``
    3. range             = ``<start>-<end>,<step>``
    4. random            = ``RND_[list]``
    5. random from range = ``RND_<start>-<end>,<step>``
    6. List of header    = ``[<title1>, <title2>, <title3>]``
    7. Faker Prefix      = ``FKR_(<type>, <locale>, <number_of_data>)``
    8. RRD Prefix        = ``RRD_(<sheetName>,<TargetData>,[<Header1>:[<Value1>],<Header2>:[<Value1>,<Value2>]])``

    :param rawExcelPath: Takes input path for xlsx file containing input data.
    :param sheetName: Name of sheet where all base data is located.
    :method write: Will write the final processed data in excel/csv file.
    """
    def __init__(self, rawExcelPath=GC.TESTDATAGENERATOR_INPUTFILE, sheetName="",
                 from_handleDatabase=False, noUpdate=False):
        self.path = os.path.abspath(rawExcelPath)
        self.sheet_name = sheetName
        if not os.path.isfile(self.path):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), self.path)
        self.sheet_dict, self.raw_data_json = self.read_excel(self.path, self.sheet_name)
        self.rre_sheets = {}
        self.isUsecount = {}
        self.remove_header = []
        self.usecount_dict = {}  # used to maintain usecount limit record and verify if that non of the data cross limit
        self.done = {}
        self.noUpdateFiles = noUpdate
        self.writers = {}
        if not from_handleDatabase:
            self.processed_datas = self.__process_data(self.raw_data_json)
            self.headers = [x for x in list(self.processed_datas[0].keys()) if x not in self.remove_header]
            self.headers = [x for x in self.headers if 'usecount' not in x.lower()]
            self.final_data = self.__generateFinalData(self.processed_datas)
            if self.isUsecount:
                if not self.noUpdateFiles:
                    self.save_usecount()  # saving source input file once everything is done

    def write(self, OutputFormat=GC.TESTDATAGENERATOR_OUTPUT_FORMAT, batch_size=0, outputfile=None):
        """
        Will write the generated data in output file.
        :param OutputFormat: "xlsx" or "csv"
        :param batch_size: Number of data to be written in output file. Will be randomly selected.
        :param outputfile: name and path of outputfile.
        :return:
        """
        if OutputFormat.lower() == "xlsx":
            if outputfile == None:
                outputfile = GC.TESTDATAGENERATOR_OUTPUTFILE_XLSX
            self.__write_excel(batch_size=batch_size, outputfile=outputfile)
        elif OutputFormat.lower() == "csv":
            if outputfile == None:
                outputfile = GC.TESTDATAGENERATOR_OUTPUTFILE_CSV
            self.__write_csv(batch_size=batch_size, outputfile=outputfile)
        else:
            logger.debug("Incorrect file format")

    def __write_excel(self, outputfile=GC.TESTDATAGENERATOR_OUTPUTFILE_XLSX, batch_size=0):
        """
        Writes TestData file with final processsed data.
        :param outputfile: Name and path for output file.
        :param batch_size: No. of data to be randomly selected and written in output file.
        :return: None
        """
        if batch_size > 0:
            if len(self.final_data) > batch_size:
                data_lis = sample(self.final_data, batch_size)
            else:
                data_lis = self.final_data
                logger.debug("Total final data is smaller than batch size.")
        else:
            data_lis = self.final_data
        with xlsxwriter.Workbook(outputfile) as workbook:
            worksheet = workbook.add_worksheet()
            worksheet.write_row(0, 0, self.headers)
            for row_num, data in enumerate(data_lis):
                worksheet.write_row(row_num+1, 0, data)

    def __write_csv(self, outputfile=GC.TESTDATAGENERATOR_OUTPUTFILE_CSV, batch_size=0):
        """
        Writes final data in csv
        :param outputfile: Name and path of output file
        :param batch_size: No. of data to be randomly selected and written in output file.
        :return:
        """
        if batch_size > 0:
            if len(self.final_data) > batch_size:
                data_lis = sample(self.final_data, batch_size)
            else:
                data_lis = self.final_data
        else:
            data_lis = self.final_data
        with open(outputfile, 'w', newline='\n', encoding='utf-8-sig') as file:
            fl = csv.writer(file)
            fl.writerow(self.headers)
            for dt in data_lis:
                fl.writerow(list(dt))

    def __generateFinalData(self, processed_data):
        """
        This method will do the final process on the processed_data. Processed_data contains list of dictionary, each
        dictionary is the row from input file which are processed to be interact able in python as per the requirement.

        First loop is of processed_data
        Second loop is of the dictionary(row) and each key:value of that dictionary is header:processed_data

        Method will first check the data type of value.
        If it is a string than method will put it inside a list(i.e. ["string"])
        If it is a tuple than it is a data with prefix so it will be sent to ``__prefix_data_processing`` method for
        further processing.
        Else the value is of type list.

        Then we store all this lists in a list(can be treat as row). This list contains value of cells. They are evaluted
        i.e. ranges are converted to list, strings are converted to list & list are all ready list. So to generate all
        possible combinations from it we use ``iterable`` module.

        Once this list of lists which contains all possible combinations is created we will call
        ``__update_prefix_data_in_final_list`` method. This method will insert the processed prefix data along with the
        data of all combinations list in the final list with the correct position of every value.

        Finally it will return the list of lists which is completely processed and ready to be written in output file.

        :param processed_data:
        :return: Final_data_list
        """
        final_data = []
        for lis in processed_data:
            index = {}
            data_lis = []
            for key in lis:
                if type(lis[key]) == str:
                    data = [lis[key]]
                elif type(lis[key]) == tuple:
                    if len(lis[key]) > 0:
                        self.__prefix_data_processing(lis, key, index)
                        continue
                    else:
                        data = ['']
                else:
                    data = lis[key]
                data_lis.append(data)
            datas = list(itertools.product(*data_lis))
            self.__update_prefix_data_in_final_list(datas, index, final_data)
        logger.info(f"Total generated data = {len(final_data)}")
        return final_data

    def __update_prefix_data_in_final_list(self, data_list, dictionary, final_list):
        """
        This method will insert the data from the dictionary to final_list. So further it can be written in the output.

        ``data_list`` is the list where all possible combinations generated by the input lists and ranges are stored.
        ``dictionary`` is where the data with prefix are stored with their index value(which will be used to place data)
        ``final_list`` is the list where final data will be stored after merging values from data_list and dictionary

        First it will iterate through data_list which is list of lists(Also you can take it as list of rows).
        Second loop will go through dictionary and check the data type of each value. Pairings inside dictionary are of
        ``index: value`` here index is the position from where this value was picked. So it will be used in placing the
        data in their correct position.

        List:
        =====
            If value is list then it is a data with ``FKR_`` prefix with number of data 0(i.e. create new fake data for
            every output). So we will create the faker module instance as per the input and will generate fake data for
            every row. And will insert them in the position of index(key of dictionary).


        Dictionary:
        ==========
            If it is not of type list then we will check that if it is of type dict. If yes then this is a data with
            "RRD_" prefix and we have to select the random data here. So we will start looping through this dictionary.
            Remember this is the third loop. This dictionary contains header:value(TargetDatas from matched data) pair.
            On every loop it will first check that if the same header is stored in done dictionary. If yes then it will
            get value of it from done dictionary. Then it will create a list from the TargetData list. This new list
            will contain only data which has same value for same header stored in done dictionary.
            i.e. If matching Header has value x then from TargetDatas list only data where header=x will be
            considered for random pick.

            Then the random value is selected from that list.
            If none of the header is processed before(for the same row). Then it will get random data from the list and
            will store header: value pair in done dictionary so it is used in the checking process as above.

            It will also check if the ``self.header`` list contains all the header which are in the random selected data.
            If not then it will add the header there.

            At last we will index the position of header inside self.headers list and will insert the value in same
            position and append the row in final_data list.

        Tuple:
        ======
            If the type is tuple then we need to simply pick a random value from it and insert it in the same position of
            index(key of dictionary for current value).

        :param data_list:
        :param dictionary:
        :param final_list:
        :return: None
        """
        for data in data_list:
            success = True  # if usecount data is present then this is used to keep track of it and not to add whole row in final
            data = list(data)
            done = {}
            for ind in dictionary:
                if type(dictionary[ind]) == list:
                    fake = faker.Faker(dictionary[ind][2])
                    data.insert(ind, getattr(fake, dictionary[ind][1])())
                else:
                    if type(dictionary[ind][0]) == dict:
                        sorted_data = False
                        for header in dictionary[ind][0]:
                            if header in done:
                                match = done[header]
                                sorted_data = [x for x in dictionary[ind] if x[header] == match]
                                break
                        if not sorted_data:
                            sorted_data = list(dictionary[ind])
                        remove_data = []  # Used to remove data with reached limit of usecount
                        for dtt in sorted_data:  # this loop will check if data has reached the usecount limit and remove
                            if self.usecount_dict[repr(dtt)]['limit'] == 0:
                                continue
                            if not self.usecount_dict[repr(dtt)]['use'] < self.usecount_dict[repr(dtt)]['limit']:
                                remove_data.append(dtt)
                        for dtt in remove_data:  # removing data from main data list
                            logger.debug(f"UseCount limit of {dtt} is exceeded : {str(self.usecount_dict[repr(dtt)]['limit'])}")
                            sorted_data.remove(dtt)
                        if len(sorted_data) == 0:  # if the current loop has reached usecount the we need not to add whole row in final output
                            success = False
                            break
                        elif len(sorted_data) == 1:
                            data_to_insert = sorted_data[0]
                        else:
                            data_to_insert = sorted_data[randint(0, len(sorted_data) - 1)]
                        self.usecount_dict[repr(data_to_insert)]['use'] += 1
                        for keys in data_to_insert:
                            if "usecount" in keys.lower():  # removing usecount header from headers in final output
                                self.update_usecount_in_source(data_to_insert)
                                continue
                            if keys not in self.headers:
                                self.headers.append(keys)
                            if keys not in done:
                                data.insert(self.headers.index(keys), data_to_insert[keys])
                                done[keys] = data_to_insert[keys]
                    else:
                        data_to_insert = dictionary[ind][randint(0, len(dictionary[ind]) - 1)]
                        data.insert(ind, data_to_insert)
            if success:
                final_list.append(data)

    def __prefix_data_processing(self, dic, key, dictionary: dict):
        """
        This method will process the datas with prefix.

        ``dic`` the dictionary where all data which are in final process is stored
        ``key`` the header of the current data which will be used now to call the data.
        ``dictionary`` in which the values will be inserted after performing their process.

        First it will check the first value of tuple.
        If it is ``Faker`` then in will continue the process and will check the 4th value of tuple.
        If the 4th value(which is used to determine the number of fake data to be generated and store inside a list)
        is ``0`` then the method will store the values as it is in a list, because ``0`` value means we have to generate
        new fake data for every output data, so it will be done later.
        If it is greater than ``0`` then this method will create tuple with the given number of fake data and store it.
        (If no number is given then default number is 5.)

        If first value is not ``Faker`` then no process will be done.

        Finally the data will be inserted in the dictionary.

        :param dic:
        :param key:
        :param dictionary:
        :return:
        """
        ltuple = dic[key]
        if ltuple[0] == "Faker":
            fake = faker.Faker(ltuple[2])
            fake_lis = []
            if len(ltuple) == 4:
                if int(ltuple[3]) == 0:
                    dictionary[list(dic.keys()).index(key)] = list(ltuple)
                    return True
                else:
                    for x in range(int(ltuple[3])):
                        fake_lis.append(getattr(fake, ltuple[1])())
            else:
                for x in range(5):
                    fake_lis.append(getattr(fake, ltuple[1])())
            dictionary[list(dic.keys()).index(key)] = tuple(fake_lis)
            return True
        else:
            dictionary[list(dic.keys()).index(key)] = ltuple
            return True

    def __process_data(self, raw_json):
        """
        This method is used to Process all the raw unprocessed data read from the excel file.

        It will first send the header to ``__data_generator`` so that if it is a list then it will get converted in
        individual header.

        Later it will process the values using ``__data_generator``.

        It will then check returned iterable type, if it is a tuple that mean input value was with prefix, so, it will
        further check if the tuple contains dict. If True than prefix was RRD_. In that case we will have to deal with
        the original header of the input value. Because if the original value's header is not in the TargetData then this
        header will contain no value in the output file and my cause errors too. So the header will added in
        ``self.remove_header`` list which will be further used to remove it from main header list.

        Finally it will return list of dictionarys. Each dictionary contains processed data of a row of input file.
        Processed data are the raw data converted into python data type and iterables. Ranges are converted into list.

        :param raw_json:
        :return:
        """
        processed_datas = []
        raw_json = json.loads(raw_json.to_json(orient="records"))
        for raw_data in raw_json:
            if not list(raw_data.values())[0]:
                continue
            processed_data = {}
            for key in raw_data:
                keys = self.data_generators(key)
                for ke in keys:
                    processed_data[ke] = self.data_generators(raw_data[key])
                    if type(processed_data[ke]) == tuple and len(processed_data[ke])>0:
                        if type(processed_data[ke][0]) == dict:
                            if ke not in processed_data[ke][0]:
                                self.remove_header.append(ke)
            processed_datas.append(processed_data)
        return processed_datas

    def data_generators(self, raw_data_old):
        """
        This method first send the data to ``__raw_data_string_process`` method to split the data and remove the unwanted
        spaces.

        Later this method uses other methods to convert all the different data_types from string to their respective
        python data types.
        i.e. string list to python list, etc.

        Later according to the prefix of data and the data_type assigned it will convert them.
        Simple list and strings are converted in to ``list`` type.
        Data with prefix will converted in to ``tuple`` type so further it will be helpful in distinguishing. Also it will
        insert the prefix name in first value of tuple if the prefix is ``FKR_`` so it will be helpful in further process.

        Finally it will return the iterable for further process.

        :param raw_data:
        :return: List or Tuple containing necessary data
        """
        raw_data, prefix, data_type = self.__raw_data_string_process(raw_data_old)
        if len(raw_data)<=1:
            return [""]
        if raw_data[0] == "[" and raw_data[-1] == "]" and prefix == "":
            processed_datas = self.__splitList(raw_data)
            processed_datas = data_type(processed_datas)

        elif prefix == "Faker":
                processed_datas = [data.strip() for data in raw_data[1:-1].split(",")]
                processed_datas.insert(0, "Faker")
                processed_datas = data_type(processed_datas)

        elif prefix == "Rrd":
            first_value = raw_data[1:-1].split(',')[0].strip()
            second_value = raw_data[1:-1].split(',')[1].strip()
            if second_value[0] == "[":
                second_value = ','.join(raw_data[1:-1].split(',')[1:]).strip()
                second_value = second_value[:second_value.index(']')+1]
                third_value = [x.strip() for x in ']'.join(raw_data[1:-1].split(']')[1:]).split(',')[1:]]
            else:
                third_value = [x.strip() for x in raw_data[1:-1].split(',')[2:]]
            evaluated_list = ']],'.join(','.join(third_value)[1:-1].strip().split('],')).split('],')
            if evaluated_list[0] == "":
                evaluated_dict = {}
            else:
                evaluated_dict = {
                    splited_data.split(':')[0]: self.__splitList(splited_data.split(':')[1])  for splited_data in evaluated_list
                }
            if second_value[0] == "[" and second_value[-1] == "]":
                second_value = self.__splitList(second_value)
            try:
                processed_datas = self.__processRrdRre(first_value, second_value,evaluated_dict)
                processed_datas = data_type(processed_datas)
            except KeyError:
                sys.exit(f"Please check that source files contains all the headers mentioned in : {raw_data_old}")

        elif prefix == "Rre":
            file_name = raw_data[1:-1].split(',')[0].strip()
            first_value = raw_data[1:-1].split(',')[1].strip()
            second_value = raw_data[1:-1].split(',')[2].strip()
            if second_value[0] == "[":
                second_value = ','.join(raw_data[1:-1].split(',')[2:]).strip()
                second_value = second_value[:second_value.index(']')+1]
                third_value = [x.strip() for x in ']'.join(raw_data[1:-1].split(']')[1:]).split(',')[1:]]
            else:
                third_value = [x.strip() for x in raw_data[1:-1].split(',')[3:]]
            evaluated_list = ']],'.join(','.join(third_value)[1:-1].strip().split('],')).split('],')
            if evaluated_list[0] == "":
                evaluated_dict = {}
            else:
                evaluated_dict = {
                    splited_data.split(':')[0]: self.__splitList(splited_data.split(':')[1])  for splited_data in evaluated_list
                }
            if second_value[0] == "[" and second_value[-1] == "]":
                second_value = self.__splitList(second_value)
            try:
                processed_datas = self.__processRrdRre(first_value, second_value, evaluated_dict, filename=file_name)
            except KeyError as e:
                raise e
                sys.exit(f"Please check that source files contains all the headers mentioned in : {raw_data_old}")
            processed_datas = data_type(processed_datas)

        elif prefix == "Renv":
            processed_datas = self.get_env_variable(raw_data)

        elif "-" in raw_data:
            raw_data = raw_data.split('-')
            start = raw_data[0].strip()
            end = raw_data[1].strip()
            step = 1
            if "," in end:
                raw_data = end.split(",")
                end = raw_data[0].strip()
                step = raw_data[1].strip()
            processed_datas = [x for x in range(int(start), int(end)+1, int(step))]
            processed_datas = data_type(processed_datas)

        else:
            processed_datas = [raw_data.strip()]
            processed_datas = data_type(processed_datas)
        return processed_datas

    def __processRrdRre(self, sheet_name, data_looking_for, data_to_match: dict, filename=None):
        if filename:
            file_name = ".".join(filename.split(".")[:-1])
            file_extension = filename.split(".")[-1]
            file = file_name + "_baangt" + "." + file_extension
            if not file in self.rre_sheets:
                logger.debug(f"Creating clone file of: {filename}")
                if not self.noUpdateFiles:
                    filename = CloneXls(filename).update_or_make_clone()
                    self.rre_sheets[filename] = {}
            filename = file
            if sheet_name in self.rre_sheets[filename]:
                df = self.rre_sheets[filename][sheet_name]
            else:
                df = pd.read_excel(filename, sheet_name, dtype=str)
                self.rre_sheets[filename][sheet_name] = df
        else:
            df = self.sheet_dict[sheet_name]
            if not self.path in self.rre_sheets:
                self.rre_sheets[self.path] = {}
            if not sheet_name in self.rre_sheets[self.path]:
                self.rre_sheets[self.path][sheet_name] = df
        df1 = df.copy()
        for key, value in data_to_match.items():
            if not isinstance(value, list):
                value = [value]
            df1 = df1.loc[df1[key].isin(value)]
        data_lis = []

        if type(data_looking_for) == str:
            data_looking_for = data_looking_for.split(",")
        key_name = repr(sheet_name) + repr(data_looking_for) + repr(data_to_match) + repr(filename)
        if key_name in self.done:
            logger.debug(f"Data Gathered from previously saved data.")
            return self.done[key_name]

        usecount, limit, usecount_header = self.check_usecount(df.columns.values.tolist())
        if not filename:
            if self.path not in self.isUsecount:
                self.isUsecount[self.path] = usecount_header
            if not self.isUsecount[self.path]:
                self.isUsecount[self.path] = usecount_header
        else:
            if filename not in self.isUsecount:
                self.isUsecount[filename] = usecount_header
            if not self.isUsecount[filename]:
                self.isUsecount[filename] = usecount_header
        for tup in df1.itertuples():
            data = dict(tup._asdict())
            if usecount_header:
                try:
                    used_limit = int(data[usecount_header])
                except:
                    used_limit = 0
            else:
                used_limit = 0

            if data_looking_for[0] == "*":
                index = data["Index"]
                del data["Index"]
                data_lis.append(data)
                self.usecount_dict[repr(data)] = {
                    "use": used_limit, "limit": limit, "index": index,
                    "sheet_name": sheet_name, "file_name": filename
                }
            else:
                dt = {keys: data[keys] for keys in data_looking_for}
                data_lis.append(dt)
                self.usecount_dict[repr(dt)] = {
                    "use": used_limit, "limit": limit, "index": data["Index"],
                    "sheet_name": sheet_name, "file_name": filename
                }

        if len(data_lis) == 0:
            logger.info(f"No data matching: {data_to_match}")
            sys.exit(f"No data matching: {data_to_match}")
        logger.debug(f"New Data Gathered.")
        self.done[key_name] = data_lis

        return data_lis

    def __raw_data_string_process(self, raw_string):
        """
        Returns ``String, prefix, data_type`` which are later used to decided the process to perform on string.
        Their depth explanation are written in the function where they are used.

        It will process the value string of all cells in the input sheet.
        It will first convert all floats into string as by default xlrd ints are converted in float
        Later it will check if the string size is greater than 4 or not. If not then it will simply return the values,
        else it will process further.

        If string has more than 4 characters, this method will look if the fourth character is "_" or not. If not it will
        return the values. Else it mean there is prefix in string and it will process further.

        Later it will split the prefix from the value and define the data_type according to the string.
        If their is no matching prefix then the data type wil be list else it will be tuple.

        :param raw_string:
        :return: String of values, prefix, Data_type
        """
        if type(raw_string) == float:
            raw_string = int(raw_string)
        raw_string = str(raw_string).strip()
        prefix = ""
        if len(raw_string)>4:
            if raw_string[3] == "_":
                if raw_string[:4].lower() == "rnd_":           # Random
                    raw_string = raw_string[4:]
                    data_type = tuple
                elif raw_string[:4].lower() == "fkr_":
                    prefix = "Faker"
                    raw_string = raw_string[4:]
                    data_type = tuple
                elif raw_string[:4].lower() == "rrd_":         # Remote Random (Remote = other sheet)
                    prefix = "Rrd"
                    raw_string = self.__process_rrd_string(raw_string)
                    raw_string = raw_string[4:]
                    data_type = tuple
                elif raw_string[:4].lower() == "rre_":         # Remote Random (Remote = other sheet)
                    prefix = "Rre"
                    raw_string = self.__process_rre_string(raw_string)
                    raw_string = raw_string[4:]
                    data_type = tuple
                else:
                    data_type = list
            else:
                if raw_string[:5].lower() == "renv_":
                    prefix = "Renv"
                    raw_string = raw_string[5:]
                data_type = list
        else:
            data_type = list
        return raw_string, prefix, data_type

    def get_str_sheet(self, excel, sheet):
        columns = excel.parse(sheet).columns
        converters = {column: str for column in columns}
        data = excel.parse(sheet, converters=converters)
        return data

    def read_excel(self, path, sheet_name="", return_json=False):
        """
        This method will read the input excel file.
        It will read all the sheets inside this excel file and will create a dictionary of dictionary containing all data
        of every sheet.
        i.e. {"sheetName": {headers**: data**}}

        It will also look for a base sheet whose name must be given while creating the instance. If no sheet name is
        given then first sheet of the file will be considered as base sheet.

        Finally it will return a dictionary containing sheetNames:data of all sheets & dictionary of base sheet.

        :param path: Path to raw data xlsx file.
        :param sheet_name: Name of base sheet sheet where main input data is located. Default will be the first sheet.
        :return: Dictionary of all sheets and data, Dictionary of base sheet.
        """
        wb = pd.ExcelFile(path)
        sheet_lis = wb.sheet_names
        sheet_df = {}
        for sheet in sheet_lis:
            sheet_df[sheet] = self.get_str_sheet(wb, sheet)
            sheet_df[sheet].fillna("", inplace=True)
        if return_json:
            for df in sheet_df.keys():
                sheet_df[df] = json.loads(sheet_df[df].to_json(orient="records"))
        if sheet_name == "":
            base_sheet = sheet_df[sheet_lis[0]]
        else:
            assert sheet_name in sheet_df, f"Excel file doesn't contain {sheet_name} sheet. Please recheck."
            base_sheet = sheet_df[sheet_name]
        return sheet_df, base_sheet

    @staticmethod
    def __splitList(raw_data):
        """
        Will convert string list to python list.
        i.e. "[value1,value2,value3]" ==> ["value1","value2","value3"]
        :param raw_data: string of list
        :return: Python list
        """
        proccesed_datas = [data.strip() for data in raw_data[1:-1].split(",")]
        return proccesed_datas

    def check_usecount(self, data):
        # used to find and return if their is usecount header and limit in input file
        usecount = False
        limit = 0
        usecount_header = None
        for header in data:
            if "usecount" in header.lower():
                usecount = True
                usecount_header = header
                if "usecount_" in header.lower():
                    try:
                        limit = int(header.lower().strip().split("count_")[1])
                    except:
                        limit = 0
        return usecount, limit, usecount_header

    def save_usecount(self):
        if self.noUpdateFiles:
            return 
        for filename in self.isUsecount:
            logger.debug(f"Updating file {filename} with usecounts.")
            sheet_dict = self.rre_sheets[filename]
            ex = pd.ExcelFile(filename)
            for sheet in ex.sheet_names:
                if sheet in sheet_dict:
                    continue
                df = self.get_str_sheet(ex, sheet)
                sheet_dict[sheet] = df
            with pd.ExcelWriter(filename) as writer:
                for sheetname in sheet_dict:
                    sheet_dict[sheetname].to_excel(writer, sheetname, index=False)
                writer.save()
            logger.debug(f"File updated {filename}.")

    def update_usecount_in_source(self, data):
        if self.noUpdateFiles:
            return 
        filename = self.usecount_dict[repr(data)]["file_name"]
        if not filename:
            filename = self.path
        if filename not in self.isUsecount:
            return
        if not self.isUsecount[filename]:
            return
        self.rre_sheets[filename][self.usecount_dict[repr(data)]["sheet_name"]][
            self.isUsecount[filename]][self.usecount_dict[repr(data)]["index"]] = self.usecount_dict[repr(data)]["use"]

    def __process_rrd_string(self, rrd_string):
        """
        This method is used to validate rrd_strings provided by the user.
        If their will be any error in string this fuction will immediately create an error and will stop further execution.
        Also these function will remove empty spaces around the commas in string.
        Regex supporting formats in this method are:
        ``RRD_[sheetName,TargetData,[Header:[values**],Header:[values**]]]``
        ``RRD_[sheetName,[TargetData**],[Header:[values**],Header:[values**]]]``
        ``RRD_(sheetName,[TargetData**],[Header:[values**],Header:[values**]])``
        ``RRD_[sheetName,*,[Header:[values**],Header:[values**]]]``
        ``RRD_[sheetName,*,[Header:[values**],Header:[values**]]]``
        ``RRD_[sheetName,TargetData,[]]``
        ``RRD_(sheetName,TargetData,[])``
        ``RRD_(sheetName,*,[])``
        ``RRD_[sheetName,*,[]]``
        :param rrd_string:
        :return:
        """
        processed_string = ','.join([word.strip() for word in rrd_string.split(', ')])
        match = re.match(r"(RRD_(\(|\[))[a-zA-z0-9\s]+,(\[?[a-zA-z\s,]+\]?|)|\*,\[([a-zA-z0-9\s]+:\[[a-zA-z0-9,\s]+\](,?))*\]",processed_string)
        err_string = f"{rrd_string} not matching pattern RRD_(sheetName,TargetData," \
                     f"[Header1:[Value1],Header2:[Value1,Value2]])"
        assert match, err_string
        return processed_string

    def __process_rre_string(self, rrd_string):
        """
        This method is used to validate rrd_strings provided by the user.
        If their will be any error in string this fuction will immediately create an error and will stop further execution.
        Also these function will remove empty spaces around the commas in string.
        Regex supporting formats in this method are:
        ``RRE_[fileName,sheetName,TargetData,[Header:[values**],Header:[values**]]]``
        ``RRE_[fileName,sheetName,[TargetData**],[Header:[values**],Header:[values**]]]``
        ``RRE_(fileName,sheetName,[TargetData**],[Header:[values**],Header:[values**]])``
        ``RRE_[fileName,sheetName,*,[Header:[values**],Header:[values**]]]``
        ``RRE_[fileName,sheetName,*,[Header:[values**],Header:[values**]]]``
        ``RRE_[fileName,sheetName,TargetData,[]]``
        ``RRE_(fileName,sheetName,TargetData,[])``
        ``RRE_(fileName,sheetName,*,[])``
        ``RRE_[fileName,sheetName,*,[]]``
        :param rrd_string:
        :return:
        """
        processed_string = ','.join([word.strip() for word in rrd_string.split(', ')])
        match = re.match(r"(RRE_(\(|\[))[\w\d\s\-./\\]+\.(xlsx|xls),[a-zA-z0-9\s]+,(\[?[a-zA-z\s,]+\]?|)|\*,\[([a-zA-z0-9\s]+:\[[a-zA-z0-9,\s]+\](,?))*\]",processed_string)
        err_string = f"{rrd_string} not matching pattern RRE_(fileName, sheetName, TargetData," \
                     f"[Header1:[Value1],Header2:[Value1,Value2]])"
        assert match, err_string
        return processed_string

    @staticmethod
    def get_env_variable(string):
        variable = string[1:-1].strip().split(',')[0].strip()
        data = os.environ.get(variable)
        try:
            if not data:
                data = string[1:-1].strip().split(',')[1].strip()
                logger.info(f"{variable} not found in environment, using {data} instead")
        except:
            raise BaseException(f"Can't find {variable} in envrionment & default value is also not set")
        return data


if __name__ == "__main__":
    lTestDataGenerator = TestDataGenerator("../../tests/0TestInput/RawTestData.xlsx")
    lTestDataGenerator.write()