TestDataGenerator.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777
  1. import csv
  2. import itertools
  3. import xlsxwriter
  4. import errno
  5. import os
  6. import logging
  7. import faker
  8. from random import sample, randint
  9. import baangt.base.GlobalConstants as GC
  10. import re
  11. from openpyxl import load_workbook
  12. import sys
  13. import pandas as pd
  14. from CloneXls import CloneXls
  15. import json
  16. logger = logging.getLogger("pyC")
  17. class Writer:
  18. """
  19. This class is made to update existing excel file.
  20. First it will open the file in python and then we can do multiple writes and once everything is update we can use
  21. save method in order to save the updated excel file. Hence, this class is very useful is saving time while updating
  22. excel files.
  23. """
  24. def __init__(self, path):
  25. self.path = path
  26. self.workbook = load_workbook(path)
  27. def write(self, row, data, sht):
  28. # Update the values using row and col number.
  29. # Note :- We are using openpyxl so row & column index will start from 1 instead of 0
  30. column = 0
  31. sheet = self.workbook[sht]
  32. headers = next(sheet.rows)
  33. for header in headers: # checks if usecount header is present in sheet
  34. if "usecount" in str(header.value).lower():
  35. column = headers.index(header) + 1
  36. if column:
  37. sheet.cell(row, column).value = data
  38. def save(self):
  39. # Call this method to save the file once every updates are written
  40. self.workbook.save(self.path)
  41. self.workbook.close()
  42. class TestDataGenerator:
  43. """
  44. TestDataGenerator Class is to used to create a TestData file from raw excel file containing all possible values.
  45. Formats accepted in input excel file:
  46. 1. Value = ``<value>``
  47. 2. list of values = ``[<value1>,<value2>]``
  48. 3. range = ``<start>-<end>,<step>``
  49. 4. random = ``RND_[list]``
  50. 5. random from range = ``RND_<start>-<end>,<step>``
  51. 6. List of header = ``[<title1>, <title2>, <title3>]``
  52. 7. Faker Prefix = ``FKR_(<type>, <locale>, <number_of_data>)``
  53. 8. RRD Prefix = ``RRD_(<sheetName>,<TargetData>,[<Header1>:[<Value1>],<Header2>:[<Value1>,<Value2>]])``
  54. :param rawExcelPath: Takes input path for xlsx file containing input data.
  55. :param sheetName: Name of sheet where all base data is located.
  56. :method write: Will write the final processed data in excel/csv file.
  57. """
  58. def __init__(self, rawExcelPath=GC.TESTDATAGENERATOR_INPUTFILE, sheetName="",
  59. from_handleDatabase=False, noUpdate=False):
  60. self.path = os.path.abspath(rawExcelPath)
  61. self.sheet_name = sheetName
  62. if not os.path.isfile(self.path):
  63. raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), self.path)
  64. self.sheet_dict, self.raw_data_json = self.read_excel(self.path, self.sheet_name)
  65. self.rre_sheets = {}
  66. self.isUsecount = {}
  67. self.remove_header = []
  68. self.usecount_dict = {} # used to maintain usecount limit record and verify if that non of the data cross limit
  69. self.done = {}
  70. self.noUpdateFiles = noUpdate
  71. self.writers = {}
  72. if not from_handleDatabase:
  73. self.processed_datas = self.__process_data(self.raw_data_json)
  74. self.headers = [x for x in list(self.processed_datas[0].keys()) if x not in self.remove_header]
  75. self.headers = [x for x in self.headers if 'usecount' not in x.lower()]
  76. self.final_data = self.__generateFinalData(self.processed_datas)
  77. if self.isUsecount:
  78. if not self.noUpdateFiles:
  79. self.save_usecount() # saving source input file once everything is done
  80. def write(self, OutputFormat=GC.TESTDATAGENERATOR_OUTPUT_FORMAT, batch_size=0, outputfile=None):
  81. """
  82. Will write the generated data in output file.
  83. :param OutputFormat: "xlsx" or "csv"
  84. :param batch_size: Number of data to be written in output file. Will be randomly selected.
  85. :param outputfile: name and path of outputfile.
  86. :return:
  87. """
  88. if OutputFormat.lower() == "xlsx":
  89. if outputfile == None:
  90. outputfile = GC.TESTDATAGENERATOR_OUTPUTFILE_XLSX
  91. self.__write_excel(batch_size=batch_size, outputfile=outputfile)
  92. elif OutputFormat.lower() == "csv":
  93. if outputfile == None:
  94. outputfile = GC.TESTDATAGENERATOR_OUTPUTFILE_CSV
  95. self.__write_csv(batch_size=batch_size, outputfile=outputfile)
  96. else:
  97. logger.debug("Incorrect file format")
  98. def __write_excel(self, outputfile=GC.TESTDATAGENERATOR_OUTPUTFILE_XLSX, batch_size=0):
  99. """
  100. Writes TestData file with final processsed data.
  101. :param outputfile: Name and path for output file.
  102. :param batch_size: No. of data to be randomly selected and written in output file.
  103. :return: None
  104. """
  105. if batch_size > 0:
  106. if len(self.final_data) > batch_size:
  107. data_lis = sample(self.final_data, batch_size)
  108. else:
  109. data_lis = self.final_data
  110. logger.debug("Total final data is smaller than batch size.")
  111. else:
  112. data_lis = self.final_data
  113. with xlsxwriter.Workbook(outputfile) as workbook:
  114. worksheet = workbook.add_worksheet()
  115. worksheet.write_row(0, 0, self.headers)
  116. for row_num, data in enumerate(data_lis):
  117. worksheet.write_row(row_num+1, 0, data)
  118. def __write_csv(self, outputfile=GC.TESTDATAGENERATOR_OUTPUTFILE_CSV, batch_size=0):
  119. """
  120. Writes final data in csv
  121. :param outputfile: Name and path of output file
  122. :param batch_size: No. of data to be randomly selected and written in output file.
  123. :return:
  124. """
  125. if batch_size > 0:
  126. if len(self.final_data) > batch_size:
  127. data_lis = sample(self.final_data, batch_size)
  128. else:
  129. data_lis = self.final_data
  130. else:
  131. data_lis = self.final_data
  132. with open(outputfile, 'w', newline='\n', encoding='utf-8-sig') as file:
  133. fl = csv.writer(file)
  134. fl.writerow(self.headers)
  135. for dt in data_lis:
  136. fl.writerow(list(dt))
  137. def __generateFinalData(self, processed_data):
  138. """
  139. This method will do the final process on the processed_data. Processed_data contains list of dictionary, each
  140. dictionary is the row from input file which are processed to be interact able in python as per the requirement.
  141. First loop is of processed_data
  142. Second loop is of the dictionary(row) and each key:value of that dictionary is header:processed_data
  143. Method will first check the data type of value.
  144. If it is a string than method will put it inside a list(i.e. ["string"])
  145. If it is a tuple than it is a data with prefix so it will be sent to ``__prefix_data_processing`` method for
  146. further processing.
  147. Else the value is of type list.
  148. Then we store all this lists in a list(can be treat as row). This list contains value of cells. They are evaluted
  149. i.e. ranges are converted to list, strings are converted to list & list are all ready list. So to generate all
  150. possible combinations from it we use ``iterable`` module.
  151. Once this list of lists which contains all possible combinations is created we will call
  152. ``__update_prefix_data_in_final_list`` method. This method will insert the processed prefix data along with the
  153. data of all combinations list in the final list with the correct position of every value.
  154. Finally it will return the list of lists which is completely processed and ready to be written in output file.
  155. :param processed_data:
  156. :return: Final_data_list
  157. """
  158. final_data = []
  159. for lis in processed_data:
  160. index = {}
  161. data_lis = []
  162. for key in lis:
  163. if type(lis[key]) == str:
  164. data = [lis[key]]
  165. elif type(lis[key]) == tuple:
  166. if len(lis[key]) > 0:
  167. self.__prefix_data_processing(lis, key, index)
  168. continue
  169. else:
  170. data = ['']
  171. else:
  172. data = lis[key]
  173. data_lis.append(data)
  174. datas = list(itertools.product(*data_lis))
  175. self.__update_prefix_data_in_final_list(datas, index, final_data)
  176. logger.info(f"Total generated data = {len(final_data)}")
  177. return final_data
  178. def __update_prefix_data_in_final_list(self, data_list, dictionary, final_list):
  179. """
  180. This method will insert the data from the dictionary to final_list. So further it can be written in the output.
  181. ``data_list`` is the list where all possible combinations generated by the input lists and ranges are stored.
  182. ``dictionary`` is where the data with prefix are stored with their index value(which will be used to place data)
  183. ``final_list`` is the list where final data will be stored after merging values from data_list and dictionary
  184. First it will iterate through data_list which is list of lists(Also you can take it as list of rows).
  185. Second loop will go through dictionary and check the data type of each value. Pairings inside dictionary are of
  186. ``index: value`` here index is the position from where this value was picked. So it will be used in placing the
  187. data in their correct position.
  188. List:
  189. =====
  190. If value is list then it is a data with ``FKR_`` prefix with number of data 0(i.e. create new fake data for
  191. every output). So we will create the faker module instance as per the input and will generate fake data for
  192. every row. And will insert them in the position of index(key of dictionary).
  193. Dictionary:
  194. ==========
  195. If it is not of type list then we will check that if it is of type dict. If yes then this is a data with
  196. "RRD_" prefix and we have to select the random data here. So we will start looping through this dictionary.
  197. Remember this is the third loop. This dictionary contains header:value(TargetDatas from matched data) pair.
  198. On every loop it will first check that if the same header is stored in done dictionary. If yes then it will
  199. get value of it from done dictionary. Then it will create a list from the TargetData list. This new list
  200. will contain only data which has same value for same header stored in done dictionary.
  201. i.e. If matching Header has value x then from TargetDatas list only data where header=x will be
  202. considered for random pick.
  203. Then the random value is selected from that list.
  204. If none of the header is processed before(for the same row). Then it will get random data from the list and
  205. will store header: value pair in done dictionary so it is used in the checking process as above.
  206. It will also check if the ``self.header`` list contains all the header which are in the random selected data.
  207. If not then it will add the header there.
  208. At last we will index the position of header inside self.headers list and will insert the value in same
  209. position and append the row in final_data list.
  210. Tuple:
  211. ======
  212. If the type is tuple then we need to simply pick a random value from it and insert it in the same position of
  213. index(key of dictionary for current value).
  214. :param data_list:
  215. :param dictionary:
  216. :param final_list:
  217. :return: None
  218. """
  219. for data in data_list:
  220. success = True # if usecount data is present then this is used to keep track of it and not to add whole row in final
  221. data = list(data)
  222. done = {}
  223. for ind in dictionary:
  224. if type(dictionary[ind]) == list:
  225. fake = faker.Faker(dictionary[ind][2])
  226. data.insert(ind, getattr(fake, dictionary[ind][1])())
  227. else:
  228. if type(dictionary[ind][0]) == dict:
  229. sorted_data = False
  230. for header in dictionary[ind][0]:
  231. if header in done:
  232. match = done[header]
  233. sorted_data = [x for x in dictionary[ind] if x[header] == match]
  234. break
  235. if not sorted_data:
  236. sorted_data = list(dictionary[ind])
  237. remove_data = [] # Used to remove data with reached limit of usecount
  238. for dtt in sorted_data: # this loop will check if data has reached the usecount limit and remove
  239. if self.usecount_dict[repr(dtt)]['limit'] == 0:
  240. continue
  241. if not self.usecount_dict[repr(dtt)]['use'] < self.usecount_dict[repr(dtt)]['limit']:
  242. remove_data.append(dtt)
  243. for dtt in remove_data: # removing data from main data list
  244. logger.debug(f"UseCount limit of {dtt} is exceeded : {str(self.usecount_dict[repr(dtt)]['limit'])}")
  245. sorted_data.remove(dtt)
  246. if len(sorted_data) == 0: # if the current loop has reached usecount the we need not to add whole row in final output
  247. success = False
  248. break
  249. elif len(sorted_data) == 1:
  250. data_to_insert = sorted_data[0]
  251. else:
  252. data_to_insert = sorted_data[randint(0, len(sorted_data) - 1)]
  253. self.usecount_dict[repr(data_to_insert)]['use'] += 1
  254. for keys in data_to_insert:
  255. if "usecount" in keys.lower(): # removing usecount header from headers in final output
  256. self.update_usecount_in_source(data_to_insert)
  257. continue
  258. if keys not in self.headers:
  259. self.headers.append(keys)
  260. if keys not in done:
  261. data.insert(self.headers.index(keys), data_to_insert[keys])
  262. done[keys] = data_to_insert[keys]
  263. else:
  264. data_to_insert = dictionary[ind][randint(0, len(dictionary[ind]) - 1)]
  265. data.insert(ind, data_to_insert)
  266. if success:
  267. final_list.append(data)
  268. def __prefix_data_processing(self, dic, key, dictionary: dict):
  269. """
  270. This method will process the datas with prefix.
  271. ``dic`` the dictionary where all data which are in final process is stored
  272. ``key`` the header of the current data which will be used now to call the data.
  273. ``dictionary`` in which the values will be inserted after performing their process.
  274. First it will check the first value of tuple.
  275. If it is ``Faker`` then in will continue the process and will check the 4th value of tuple.
  276. If the 4th value(which is used to determine the number of fake data to be generated and store inside a list)
  277. is ``0`` then the method will store the values as it is in a list, because ``0`` value means we have to generate
  278. new fake data for every output data, so it will be done later.
  279. If it is greater than ``0`` then this method will create tuple with the given number of fake data and store it.
  280. (If no number is given then default number is 5.)
  281. If first value is not ``Faker`` then no process will be done.
  282. Finally the data will be inserted in the dictionary.
  283. :param dic:
  284. :param key:
  285. :param dictionary:
  286. :return:
  287. """
  288. ltuple = dic[key]
  289. if ltuple[0] == "Faker":
  290. fake = faker.Faker(ltuple[2])
  291. fake_lis = []
  292. if len(ltuple) == 4:
  293. if int(ltuple[3]) == 0:
  294. dictionary[list(dic.keys()).index(key)] = list(ltuple)
  295. return True
  296. else:
  297. for x in range(int(ltuple[3])):
  298. fake_lis.append(getattr(fake, ltuple[1])())
  299. else:
  300. for x in range(5):
  301. fake_lis.append(getattr(fake, ltuple[1])())
  302. dictionary[list(dic.keys()).index(key)] = tuple(fake_lis)
  303. return True
  304. else:
  305. dictionary[list(dic.keys()).index(key)] = ltuple
  306. return True
  307. def __process_data(self, raw_json):
  308. """
  309. This method is used to Process all the raw unprocessed data read from the excel file.
  310. It will first send the header to ``__data_generator`` so that if it is a list then it will get converted in
  311. individual header.
  312. Later it will process the values using ``__data_generator``.
  313. It will then check returned iterable type, if it is a tuple that mean input value was with prefix, so, it will
  314. further check if the tuple contains dict. If True than prefix was RRD_. In that case we will have to deal with
  315. the original header of the input value. Because if the original value's header is not in the TargetData then this
  316. header will contain no value in the output file and my cause errors too. So the header will added in
  317. ``self.remove_header`` list which will be further used to remove it from main header list.
  318. Finally it will return list of dictionarys. Each dictionary contains processed data of a row of input file.
  319. Processed data are the raw data converted into python data type and iterables. Ranges are converted into list.
  320. :param raw_json:
  321. :return:
  322. """
  323. processed_datas = []
  324. raw_json = json.loads(raw_json.to_json(orient="records"))
  325. for raw_data in raw_json:
  326. if not list(raw_data.values())[0]:
  327. continue
  328. processed_data = {}
  329. for key in raw_data:
  330. keys = self.data_generators(key)
  331. for ke in keys:
  332. processed_data[ke] = self.data_generators(raw_data[key])
  333. if type(processed_data[ke]) == tuple and len(processed_data[ke])>0:
  334. if type(processed_data[ke][0]) == dict:
  335. if ke not in processed_data[ke][0]:
  336. self.remove_header.append(ke)
  337. processed_datas.append(processed_data)
  338. return processed_datas
  339. def data_generators(self, raw_data_old):
  340. """
  341. This method first send the data to ``__raw_data_string_process`` method to split the data and remove the unwanted
  342. spaces.
  343. Later this method uses other methods to convert all the different data_types from string to their respective
  344. python data types.
  345. i.e. string list to python list, etc.
  346. Later according to the prefix of data and the data_type assigned it will convert them.
  347. Simple list and strings are converted in to ``list`` type.
  348. Data with prefix will converted in to ``tuple`` type so further it will be helpful in distinguishing. Also it will
  349. insert the prefix name in first value of tuple if the prefix is ``FKR_`` so it will be helpful in further process.
  350. Finally it will return the iterable for further process.
  351. :param raw_data:
  352. :return: List or Tuple containing necessary data
  353. """
  354. raw_data, prefix, data_type = self.__raw_data_string_process(raw_data_old)
  355. if len(raw_data)<=1:
  356. return [""]
  357. if raw_data[0] == "[" and raw_data[-1] == "]" and prefix == "":
  358. processed_datas = self.__splitList(raw_data)
  359. processed_datas = data_type(processed_datas)
  360. elif prefix == "Faker":
  361. processed_datas = [data.strip() for data in raw_data[1:-1].split(",")]
  362. processed_datas.insert(0, "Faker")
  363. processed_datas = data_type(processed_datas)
  364. elif prefix == "Rrd":
  365. first_value = raw_data[1:-1].split(',')[0].strip()
  366. second_value = raw_data[1:-1].split(',')[1].strip()
  367. if second_value[0] == "[":
  368. second_value = ','.join(raw_data[1:-1].split(',')[1:]).strip()
  369. second_value = second_value[:second_value.index(']')+1]
  370. third_value = [x.strip() for x in ']'.join(raw_data[1:-1].split(']')[1:]).split(',')[1:]]
  371. else:
  372. third_value = [x.strip() for x in raw_data[1:-1].split(',')[2:]]
  373. evaluated_list = ']],'.join(','.join(third_value)[1:-1].strip().split('],')).split('],')
  374. if evaluated_list[0] == "":
  375. evaluated_dict = {}
  376. else:
  377. evaluated_dict = {
  378. splited_data.split(':')[0]: self.__splitList(splited_data.split(':')[1]) for splited_data in evaluated_list
  379. }
  380. if second_value[0] == "[" and second_value[-1] == "]":
  381. second_value = self.__splitList(second_value)
  382. try:
  383. processed_datas = self.__processRrdRre(first_value, second_value,evaluated_dict)
  384. processed_datas = data_type(processed_datas)
  385. except KeyError:
  386. sys.exit(f"Please check that source files contains all the headers mentioned in : {raw_data_old}")
  387. elif prefix == "Rre":
  388. file_name = raw_data[1:-1].split(',')[0].strip()
  389. first_value = raw_data[1:-1].split(',')[1].strip()
  390. second_value = raw_data[1:-1].split(',')[2].strip()
  391. if second_value[0] == "[":
  392. second_value = ','.join(raw_data[1:-1].split(',')[2:]).strip()
  393. second_value = second_value[:second_value.index(']')+1]
  394. third_value = [x.strip() for x in ']'.join(raw_data[1:-1].split(']')[1:]).split(',')[1:]]
  395. else:
  396. third_value = [x.strip() for x in raw_data[1:-1].split(',')[3:]]
  397. evaluated_list = ']],'.join(','.join(third_value)[1:-1].strip().split('],')).split('],')
  398. if evaluated_list[0] == "":
  399. evaluated_dict = {}
  400. else:
  401. evaluated_dict = {
  402. splited_data.split(':')[0]: self.__splitList(splited_data.split(':')[1]) for splited_data in evaluated_list
  403. }
  404. if second_value[0] == "[" and second_value[-1] == "]":
  405. second_value = self.__splitList(second_value)
  406. try:
  407. processed_datas = self.__processRrdRre(first_value, second_value, evaluated_dict, filename=file_name)
  408. except KeyError as e:
  409. raise e
  410. sys.exit(f"Please check that source files contains all the headers mentioned in : {raw_data_old}")
  411. processed_datas = data_type(processed_datas)
  412. elif prefix == "Renv":
  413. processed_datas = self.get_env_variable(raw_data)
  414. elif "-" in raw_data:
  415. raw_data = raw_data.split('-')
  416. start = raw_data[0].strip()
  417. end = raw_data[1].strip()
  418. step = 1
  419. if "," in end:
  420. raw_data = end.split(",")
  421. end = raw_data[0].strip()
  422. step = raw_data[1].strip()
  423. processed_datas = [x for x in range(int(start), int(end)+1, int(step))]
  424. processed_datas = data_type(processed_datas)
  425. else:
  426. processed_datas = [raw_data.strip()]
  427. processed_datas = data_type(processed_datas)
  428. return processed_datas
  429. def __processRrdRre(self, sheet_name, data_looking_for, data_to_match: dict, filename=None):
  430. if filename:
  431. file_name = ".".join(filename.split(".")[:-1])
  432. file_extension = filename.split(".")[-1]
  433. file = file_name + "_baangt" + "." + file_extension
  434. if not file in self.rre_sheets:
  435. logger.debug(f"Creating clone file of: {filename}")
  436. if not self.noUpdateFiles:
  437. filename = CloneXls(filename).update_or_make_clone()
  438. self.rre_sheets[filename] = {}
  439. filename = file
  440. if sheet_name in self.rre_sheets[filename]:
  441. df = self.rre_sheets[filename][sheet_name]
  442. else:
  443. df = pd.read_excel(filename, sheet_name, dtype=str)
  444. self.rre_sheets[filename][sheet_name] = df
  445. else:
  446. df = self.sheet_dict[sheet_name]
  447. if not self.path in self.rre_sheets:
  448. self.rre_sheets[self.path] = {}
  449. if not sheet_name in self.rre_sheets[self.path]:
  450. self.rre_sheets[self.path][sheet_name] = df
  451. df1 = df.copy()
  452. for key, value in data_to_match.items():
  453. if not isinstance(value, list):
  454. value = [value]
  455. df1 = df1.loc[df1[key].isin(value)]
  456. data_lis = []
  457. if type(data_looking_for) == str:
  458. data_looking_for = data_looking_for.split(",")
  459. key_name = repr(sheet_name) + repr(data_looking_for) + repr(data_to_match) + repr(filename)
  460. if key_name in self.done:
  461. logger.debug(f"Data Gathered from previously saved data.")
  462. return self.done[key_name]
  463. usecount, limit, usecount_header = self.check_usecount(df.columns.values.tolist())
  464. if not filename:
  465. if self.path not in self.isUsecount:
  466. self.isUsecount[self.path] = usecount_header
  467. if not self.isUsecount[self.path]:
  468. self.isUsecount[self.path] = usecount_header
  469. else:
  470. if filename not in self.isUsecount:
  471. self.isUsecount[filename] = usecount_header
  472. if not self.isUsecount[filename]:
  473. self.isUsecount[filename] = usecount_header
  474. for tup in df1.itertuples():
  475. data = dict(tup._asdict())
  476. if usecount_header:
  477. try:
  478. used_limit = int(data[usecount_header])
  479. except:
  480. used_limit = 0
  481. else:
  482. used_limit = 0
  483. if data_looking_for[0] == "*":
  484. index = data["Index"]
  485. del data["Index"]
  486. data_lis.append(data)
  487. self.usecount_dict[repr(data)] = {
  488. "use": used_limit, "limit": limit, "index": index,
  489. "sheet_name": sheet_name, "file_name": filename
  490. }
  491. else:
  492. dt = {keys: data[keys] for keys in data_looking_for}
  493. data_lis.append(dt)
  494. self.usecount_dict[repr(dt)] = {
  495. "use": used_limit, "limit": limit, "index": data["Index"],
  496. "sheet_name": sheet_name, "file_name": filename
  497. }
  498. if len(data_lis) == 0:
  499. logger.info(f"No data matching: {data_to_match}")
  500. sys.exit(f"No data matching: {data_to_match}")
  501. logger.debug(f"New Data Gathered.")
  502. self.done[key_name] = data_lis
  503. return data_lis
  504. def __raw_data_string_process(self, raw_string):
  505. """
  506. Returns ``String, prefix, data_type`` which are later used to decided the process to perform on string.
  507. Their depth explanation are written in the function where they are used.
  508. It will process the value string of all cells in the input sheet.
  509. It will first convert all floats into string as by default xlrd ints are converted in float
  510. Later it will check if the string size is greater than 4 or not. If not then it will simply return the values,
  511. else it will process further.
  512. If string has more than 4 characters, this method will look if the fourth character is "_" or not. If not it will
  513. return the values. Else it mean there is prefix in string and it will process further.
  514. Later it will split the prefix from the value and define the data_type according to the string.
  515. If their is no matching prefix then the data type wil be list else it will be tuple.
  516. :param raw_string:
  517. :return: String of values, prefix, Data_type
  518. """
  519. if type(raw_string) == float:
  520. raw_string = int(raw_string)
  521. raw_string = str(raw_string).strip()
  522. prefix = ""
  523. if len(raw_string)>4:
  524. if raw_string[3] == "_":
  525. if raw_string[:4].lower() == "rnd_": # Random
  526. raw_string = raw_string[4:]
  527. data_type = tuple
  528. elif raw_string[:4].lower() == "fkr_":
  529. prefix = "Faker"
  530. raw_string = raw_string[4:]
  531. data_type = tuple
  532. elif raw_string[:4].lower() == "rrd_": # Remote Random (Remote = other sheet)
  533. prefix = "Rrd"
  534. raw_string = self.__process_rrd_string(raw_string)
  535. raw_string = raw_string[4:]
  536. data_type = tuple
  537. elif raw_string[:4].lower() == "rre_": # Remote Random (Remote = other sheet)
  538. prefix = "Rre"
  539. raw_string = self.__process_rre_string(raw_string)
  540. raw_string = raw_string[4:]
  541. data_type = tuple
  542. else:
  543. data_type = list
  544. else:
  545. if raw_string[:5].lower() == "renv_":
  546. prefix = "Renv"
  547. raw_string = raw_string[5:]
  548. data_type = list
  549. else:
  550. data_type = list
  551. return raw_string, prefix, data_type
  552. def get_str_sheet(self, excel, sheet):
  553. columns = excel.parse(sheet).columns
  554. converters = {column: str for column in columns}
  555. data = excel.parse(sheet, converters=converters)
  556. return data
  557. def read_excel(self, path, sheet_name="", return_json=False):
  558. """
  559. This method will read the input excel file.
  560. It will read all the sheets inside this excel file and will create a dictionary of dictionary containing all data
  561. of every sheet.
  562. i.e. {"sheetName": {headers**: data**}}
  563. It will also look for a base sheet whose name must be given while creating the instance. If no sheet name is
  564. given then first sheet of the file will be considered as base sheet.
  565. Finally it will return a dictionary containing sheetNames:data of all sheets & dictionary of base sheet.
  566. :param path: Path to raw data xlsx file.
  567. :param sheet_name: Name of base sheet sheet where main input data is located. Default will be the first sheet.
  568. :return: Dictionary of all sheets and data, Dictionary of base sheet.
  569. """
  570. wb = pd.ExcelFile(path)
  571. sheet_lis = wb.sheet_names
  572. sheet_df = {}
  573. for sheet in sheet_lis:
  574. sheet_df[sheet] = self.get_str_sheet(wb, sheet)
  575. sheet_df[sheet].fillna("", inplace=True)
  576. if return_json:
  577. for df in sheet_df.keys():
  578. sheet_df[df] = json.loads(sheet_df[df].to_json(orient="records"))
  579. if sheet_name == "":
  580. base_sheet = sheet_df[sheet_lis[0]]
  581. else:
  582. assert sheet_name in sheet_df, f"Excel file doesn't contain {sheet_name} sheet. Please recheck."
  583. base_sheet = sheet_df[sheet_name]
  584. return sheet_df, base_sheet
  585. @staticmethod
  586. def __splitList(raw_data):
  587. """
  588. Will convert string list to python list.
  589. i.e. "[value1,value2,value3]" ==> ["value1","value2","value3"]
  590. :param raw_data: string of list
  591. :return: Python list
  592. """
  593. proccesed_datas = [data.strip() for data in raw_data[1:-1].split(",")]
  594. return proccesed_datas
  595. def check_usecount(self, data):
  596. # used to find and return if their is usecount header and limit in input file
  597. usecount = False
  598. limit = 0
  599. usecount_header = None
  600. for header in data:
  601. if "usecount" in header.lower():
  602. usecount = True
  603. usecount_header = header
  604. if "usecount_" in header.lower():
  605. try:
  606. limit = int(header.lower().strip().split("count_")[1])
  607. except:
  608. limit = 0
  609. return usecount, limit, usecount_header
  610. def save_usecount(self):
  611. if self.noUpdateFiles:
  612. return
  613. for filename in self.isUsecount:
  614. logger.debug(f"Updating file {filename} with usecounts.")
  615. sheet_dict = self.rre_sheets[filename]
  616. ex = pd.ExcelFile(filename)
  617. for sheet in ex.sheet_names:
  618. if sheet in sheet_dict:
  619. continue
  620. df = self.get_str_sheet(ex, sheet)
  621. sheet_dict[sheet] = df
  622. with pd.ExcelWriter(filename) as writer:
  623. for sheetname in sheet_dict:
  624. sheet_dict[sheetname].to_excel(writer, sheetname, index=False)
  625. writer.save()
  626. logger.debug(f"File updated {filename}.")
  627. def update_usecount_in_source(self, data):
  628. if self.noUpdateFiles:
  629. return
  630. filename = self.usecount_dict[repr(data)]["file_name"]
  631. if not filename:
  632. filename = self.path
  633. if filename not in self.isUsecount:
  634. return
  635. if not self.isUsecount[filename]:
  636. return
  637. self.rre_sheets[filename][self.usecount_dict[repr(data)]["sheet_name"]][
  638. self.isUsecount[filename]][self.usecount_dict[repr(data)]["index"]] = self.usecount_dict[repr(data)]["use"]
  639. def __process_rrd_string(self, rrd_string):
  640. """
  641. This method is used to validate rrd_strings provided by the user.
  642. If their will be any error in string this fuction will immediately create an error and will stop further execution.
  643. Also these function will remove empty spaces around the commas in string.
  644. Regex supporting formats in this method are:
  645. ``RRD_[sheetName,TargetData,[Header:[values**],Header:[values**]]]``
  646. ``RRD_[sheetName,[TargetData**],[Header:[values**],Header:[values**]]]``
  647. ``RRD_(sheetName,[TargetData**],[Header:[values**],Header:[values**]])``
  648. ``RRD_[sheetName,*,[Header:[values**],Header:[values**]]]``
  649. ``RRD_[sheetName,*,[Header:[values**],Header:[values**]]]``
  650. ``RRD_[sheetName,TargetData,[]]``
  651. ``RRD_(sheetName,TargetData,[])``
  652. ``RRD_(sheetName,*,[])``
  653. ``RRD_[sheetName,*,[]]``
  654. :param rrd_string:
  655. :return:
  656. """
  657. processed_string = ','.join([word.strip() for word in rrd_string.split(', ')])
  658. match = re.match(r"(RRD_(\(|\[))[a-zA-z0-9\s]+,(\[?[a-zA-z\s,]+\]?|)|\*,\[([a-zA-z0-9\s]+:\[[a-zA-z0-9,\s]+\](,?))*\]",processed_string)
  659. err_string = f"{rrd_string} not matching pattern RRD_(sheetName,TargetData," \
  660. f"[Header1:[Value1],Header2:[Value1,Value2]])"
  661. assert match, err_string
  662. return processed_string
  663. def __process_rre_string(self, rrd_string):
  664. """
  665. This method is used to validate rrd_strings provided by the user.
  666. If their will be any error in string this fuction will immediately create an error and will stop further execution.
  667. Also these function will remove empty spaces around the commas in string.
  668. Regex supporting formats in this method are:
  669. ``RRE_[fileName,sheetName,TargetData,[Header:[values**],Header:[values**]]]``
  670. ``RRE_[fileName,sheetName,[TargetData**],[Header:[values**],Header:[values**]]]``
  671. ``RRE_(fileName,sheetName,[TargetData**],[Header:[values**],Header:[values**]])``
  672. ``RRE_[fileName,sheetName,*,[Header:[values**],Header:[values**]]]``
  673. ``RRE_[fileName,sheetName,*,[Header:[values**],Header:[values**]]]``
  674. ``RRE_[fileName,sheetName,TargetData,[]]``
  675. ``RRE_(fileName,sheetName,TargetData,[])``
  676. ``RRE_(fileName,sheetName,*,[])``
  677. ``RRE_[fileName,sheetName,*,[]]``
  678. :param rrd_string:
  679. :return:
  680. """
  681. processed_string = ','.join([word.strip() for word in rrd_string.split(', ')])
  682. match = re.match(r"(RRE_(\(|\[))[\w\d\s\-./\\]+\.(xlsx|xls),[a-zA-z0-9\s]+,(\[?[a-zA-z\s,]+\]?|)|\*,\[([a-zA-z0-9\s]+:\[[a-zA-z0-9,\s]+\](,?))*\]",processed_string)
  683. err_string = f"{rrd_string} not matching pattern RRE_(fileName, sheetName, TargetData," \
  684. f"[Header1:[Value1],Header2:[Value1,Value2]])"
  685. assert match, err_string
  686. return processed_string
  687. @staticmethod
  688. def get_env_variable(string):
  689. variable = string[1:-1].strip().split(',')[0].strip()
  690. data = os.environ.get(variable)
  691. try:
  692. if not data:
  693. data = string[1:-1].strip().split(',')[1].strip()
  694. logger.info(f"{variable} not found in environment, using {data} instead")
  695. except:
  696. raise BaseException(f"Can't find {variable} in envrionment & default value is also not set")
  697. return data
  698. if __name__ == "__main__":
  699. lTestDataGenerator = TestDataGenerator("../../tests/0TestInput/RawTestData.xlsx")
  700. lTestDataGenerator.write()